diff --git a/acb_dirichlet.h b/acb_dirichlet.h
index a28d3756..4ce3ab8d 100644
--- a/acb_dirichlet.h
+++ b/acb_dirichlet.h
@@ -298,8 +298,12 @@ void acb_dirichlet_dft_pol(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_crt(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_rad2(acb_ptr v, int e, slong prec);
+void acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_prod(acb_ptr w, acb_srcptr v, slong * cyc, slong num, slong prec);
+void acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec);
+void acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec);
+
void acb_dirichlet_dft_conrey(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec);
void acb_dirichlet_dft(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec);
@@ -347,6 +351,16 @@ acb_dirichlet_dft_rad2_struct;
typedef acb_dirichlet_dft_rad2_struct acb_dirichlet_dft_rad2_t[1];
+typedef struct
+{
+ slong n;
+ acb_ptr z;
+ acb_dirichlet_dft_rad2_t rad2;
+}
+acb_dirichlet_dft_bluestein_struct;
+
+typedef acb_dirichlet_dft_bluestein_struct acb_dirichlet_dft_bluestein_t[1];
+
typedef struct
{
slong n;
@@ -438,11 +452,12 @@ acb_dirichlet_dft_cyc_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_c
{
acb_dirichlet_dft_step(w, v, cyc->cyc, cyc->num, prec);
}
-void acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec);
+void acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec);
void acb_dirichlet_dft_crt_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_crt_t crt, slong prec);
void acb_dirichlet_dft_prod_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_prod_t prod, slong prec);
-void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec);
+void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec);
+void acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t, slong prec);
void _acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong dv, acb_ptr z, slong dz, slong len, slong prec);
void acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong len, slong prec);
@@ -502,6 +517,15 @@ acb_dirichlet_dft_rad2_clear(acb_dirichlet_dft_rad2_t t)
_acb_vec_clear(t->z, t->nz);
}
+void acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec);
+
+ACB_DIRICHLET_INLINE void
+acb_dirichlet_dft_bluestein_clear(acb_dirichlet_dft_bluestein_t t)
+{
+ _acb_vec_clear(t->z, t->n);
+ acb_dirichlet_dft_rad2_clear(t->rad2);
+}
+
void _acb_dirichlet_dft_crt_init(acb_dirichlet_dft_crt_t crt, slong dv, slong len, slong prec);
ACB_DIRICHLET_INLINE void
@@ -543,6 +567,18 @@ _acb_vec_kronecker_mul(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong p
acb_mul(z + k, x + k, y + k, prec);
}
+/* z[k] = conj(x[k])*y[k] */
+ACB_DIRICHLET_INLINE void
+acb_vec_kronecker_mul_conj(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec)
+{
+ slong k;
+ for (k = 0; k < len; k++)
+ {
+ acb_conj(z + k, x + k);
+ acb_mul(z + k, z + k, y + k, prec);
+ }
+}
+
ACB_DIRICHLET_INLINE void
acb_vec_printd(acb_srcptr vec, slong len, slong digits)
{
diff --git a/acb_dirichlet/dft_bluestein.c b/acb_dirichlet/dft_bluestein.c
new file mode 100644
index 00000000..a73294ec
--- /dev/null
+++ b/acb_dirichlet/dft_bluestein.c
@@ -0,0 +1,67 @@
+/*
+ Copyright (C) 2016 Pascal Molin
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "acb_dirichlet.h"
+
+void
+acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec)
+{
+
+ nmod_t n2;
+ slong k, k2;
+ acb_ptr z2n;
+ int e = n_clog(2 * n - 1, 2);
+ acb_dirichlet_dft_rad2_init(t->rad2, e, prec);
+ z2n = _acb_vec_init(2 * n);
+ acb_dirichlet_vec_nth_roots(z2n, 2 * n, prec);
+ nmod_init(&n2, 2 * n);
+ t->n = n;
+ t->z = _acb_vec_init(n);
+ for (k = 0, k2 = 0; k < n; k++)
+ {
+ acb_conj(t->z + k, z2n + k2);
+ k2 = nmod_add(k2, 2 * k + 1, n2);
+ }
+ _acb_vec_clear(z2n, 2 * n);
+}
+
+void
+acb_dirichlet_dft_bluestein_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_bluestein_t t, slong prec)
+{
+ slong n = t->n;
+ acb_ptr vz, wz, z;
+ z = t->z;
+ /* TODO: allocate directly length 2^e and pad */
+ flint_printf("\n\n====================\n\nv\n");
+ acb_vec_printd_index(v, n, 10);
+ vz = _acb_vec_init(n);
+ acb_vec_kronecker_mul_conj(vz, z, v, n, prec);
+ flint_printf("\nvz\n");
+ acb_vec_printd_index(vz, n, 10);
+ wz = _acb_vec_init(n);
+ acb_dirichlet_dft_convol_rad2_precomp(wz, vz, z, n, t->rad2, prec);
+ flint_printf("\nwz\n");
+ acb_vec_printd_index(wz, n, 10);
+ acb_vec_kronecker_mul_conj(w, z, wz, n, prec);
+ flint_printf("\nw\n");
+ acb_vec_printd_index(w, n, 10);
+ _acb_vec_clear(wz, n);
+ _acb_vec_clear(vz, n);
+}
+
+void
+acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec)
+{
+ acb_dirichlet_dft_bluestein_t t;
+ acb_dirichlet_dft_bluestein_init(t, len, prec);
+ acb_dirichlet_dft_bluestein_precomp(w, v, t, prec);
+ acb_dirichlet_dft_bluestein_clear(t);
+}
diff --git a/acb_dirichlet/dft_convol_fft.c b/acb_dirichlet/dft_convol_fft.c
index 23a3b72d..07a836bc 100644
--- a/acb_dirichlet/dft_convol_fft.c
+++ b/acb_dirichlet/dft_convol_fft.c
@@ -16,14 +16,25 @@ void
acb_dirichlet_dft_convol_pad(acb_ptr fp, acb_ptr gp, acb_srcptr f, acb_srcptr g, slong n, slong np)
{
slong k;
+
+ if (np < 2 * n - 1)
+ {
+ flint_printf("dft_convol_pad: overlapping padding %ld < 2*%ld-1\n", np, n);
+ abort();
+ }
+
for (k = 0; k < n; k++)
acb_set(gp + k, g + k);
+ for (; k < np; k++)
+ acb_zero(gp + k);
+
for (k = 0; k < n; k++)
acb_set(fp + k, f + k);
for (k = 1; k < n; k++)
acb_set(fp + np - k, f + n - k);
for (k = n; k <= np - n; k++)
acb_zero(fp + k);
+
}
void
@@ -37,22 +48,58 @@ acb_dirichlet_dft_inverse_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec)
}
void
-acb_dirichlet_dft_convol_fft(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec)
+acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t rad2, slong prec)
{
- int e;
- slong k, np;
+ slong np;
acb_ptr fp, gp;
- acb_dirichlet_dft_rad2_t dft;
- e = n_clog(2 * len + 1, 2);
- acb_dirichlet_dft_rad2_init(dft, e, prec);
- np = dft->n;
+ np = rad2->n;
+
+ flint_printf("\nf\n");
+ acb_vec_printd_index(f, len, 10);
+ flint_printf("\ng\n");
+ acb_vec_printd_index(g, len, 10);
+
fp = _acb_vec_init(np);
gp = _acb_vec_init(np);
acb_dirichlet_dft_convol_pad(fp, gp, f, g, len, np);
- acb_dirichlet_dft_rad2_precomp(fp, dft, prec);
- acb_dirichlet_dft_rad2_precomp(gp, dft, prec);
+
+ flint_printf("\nF\n");
+ acb_vec_printd_index(fp, np, 10);
+ flint_printf("\nG\n");
+ acb_vec_printd_index(gp, np, 10);
+
+ acb_dirichlet_dft_rad2_precomp(fp, rad2, prec);
+
+ flint_printf("\nDFT F\n");
+ acb_vec_printd_index(fp, np, 10);
+
+ acb_dirichlet_dft_rad2_precomp(gp, rad2, prec);
+
+ flint_printf("\nDFT G\n");
+ acb_vec_printd_index(gp, np, 10);
+
_acb_vec_kronecker_mul(gp, gp, fp, np, prec);
- acb_dirichlet_dft_inverse_rad2_precomp(gp, dft, prec);
- for (k = 0; k < len; k++)
- acb_set(w + k, gp + k);
+
+ flint_printf("\n(DFT F)(DFT G)=DFT(F*G)\n");
+ acb_vec_printd_index(gp, np, 10);
+
+ acb_dirichlet_dft_inverse_rad2_precomp(gp, rad2, prec);
+
+ flint_printf("\nF*G\n");
+ acb_vec_printd_index(gp, np, 10);
+
+ _acb_vec_set(w, gp, len);
+ _acb_vec_clear(fp, np);
+ _acb_vec_clear(gp, np);
+}
+
+void
+acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec)
+{
+ int e;
+ acb_dirichlet_dft_rad2_t dft;
+ e = n_clog(2 * len - 1, 2);
+ acb_dirichlet_dft_rad2_init(dft, e, prec);
+ acb_dirichlet_dft_convol_rad2_precomp(w, f, g, len, dft, prec);
+ acb_dirichlet_dft_rad2_clear(dft);
}
diff --git a/acb_dirichlet/dft_convol_naive.c b/acb_dirichlet/dft_convol_naive.c
index be7f2c8e..a11db829 100644
--- a/acb_dirichlet/dft_convol_naive.c
+++ b/acb_dirichlet/dft_convol_naive.c
@@ -24,8 +24,8 @@ acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len,
gy = g;
acb_zero(wx);
for (y = 0; y <= x; y++)
- acb_addmul(wx, fx--, gy++, prec);
+ acb_addmul(wx, fx - y, g + y, prec);
for (; y < len; y++)
- acb_addmul(wx, f + x - y, g + y, prec);
+ acb_addmul(wx, fx + (len - y), g + y, prec);
}
}
diff --git a/acb_dirichlet/dft_rad2.c b/acb_dirichlet/dft_rad2.c
index bfd99ff1..061a6441 100644
--- a/acb_dirichlet/dft_rad2.c
+++ b/acb_dirichlet/dft_rad2.c
@@ -38,7 +38,7 @@ acb_dirichlet_dft_rad2_reorder(acb_ptr v, slong n)
/* remark: can use same rad2 with smaller power of 2 */
void
-acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec)
+acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec)
{
slong j, k, l;
slong n = rad2->n, nz = rad2->nz;
@@ -46,6 +46,8 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p
acb_t tmp;
acb_init(tmp);
+ acb_dirichlet_dft_rad2_reorder(v, n);
+
for (k = 1, l = nz; k < n; k <<= 1, l >>= 1)
for (p = v; p < vend; p += k)
for (j = 0; j < nz; j += l, p++)
@@ -59,7 +61,7 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p
}
void
-acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec)
+acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec)
{
slong k, n = rad2->n;
acb_dirichlet_dft_rad2_precomp(v, rad2, prec);
diff --git a/acb_dirichlet/test/t-convol.c b/acb_dirichlet/test/t-convol.c
new file mode 100644
index 00000000..df2a00fa
--- /dev/null
+++ b/acb_dirichlet/test/t-convol.c
@@ -0,0 +1,118 @@
+/*
+ Copyright (C) 2016 Pascal Molin
+
+ This file is part of Arb.
+
+ Arb is free software: you can redistribute it and/or modify it under
+ the terms of the GNU Lesser General Public License (LGPL) as published
+ by the Free Software Foundation; either version 2.1 of the License, or
+ (at your option) any later version. See .
+*/
+
+#include "acb_dirichlet.h"
+
+typedef void (*do_f) (acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec);
+
+void
+check_vec_eq_prec(acb_srcptr w1, acb_srcptr w2, slong len, slong prec, slong digits, ulong q, char f1[], char f2[])
+{
+ slong i;
+
+ for (i = 0; i < len; i++)
+ {
+ if (!acb_overlaps(w1 + i, w2 + i))
+ {
+ flint_printf("FAIL\n\n");
+ flint_printf("q = %wu, size = %wu\n", q, len);
+ flint_printf("\nDFT differ from index %ld / %ld \n", i, len);
+ flint_printf("\n%s =\n", f1);
+ acb_vec_printd_index(w1, len, digits);
+ flint_printf("\n%s =\n", f2);
+ acb_vec_printd_index(w2, len, digits);
+ flint_printf("\n\n");
+ abort();
+ }
+ else if (!acb_is_zero(w1+i) && (acb_rel_accuracy_bits(w1 + i) < 30
+ || acb_rel_accuracy_bits(w2 + i) < 30))
+ {
+ flint_printf("FAIL\n\n");
+ flint_printf("q = %wu\n", q);
+ flint_printf("\nDFT inaccurate from index %ld / %ld \n", i, len);
+ flint_printf("\nnaive =\n");
+ acb_printd(w1 + i, digits);
+ flint_printf("\nfast =\n");
+ acb_printd(w2 + i, digits);
+ flint_printf("\nerrors %ld & %ld [prec = %wu]\n",
+ acb_rel_accuracy_bits(w1 + i),
+ acb_rel_accuracy_bits(w2 + i), prec);
+ abort();
+ }
+ }
+}
+
+
+int main()
+{
+
+ slong k;
+ slong prec = 100, digits = 30;
+ slong nq = 13;
+ ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961};
+ flint_rand_t state;
+
+ slong f, nf = 2;
+ do_f func[2] = { acb_dirichlet_dft_convol_naive, acb_dirichlet_dft_convol_rad2 };
+ char * name[4] = { "naive", "rad2" };
+
+ flint_printf("convol....");
+ fflush(stdout);
+
+ flint_randinit(state);
+
+ for (k = 0; k < nq; k++)
+ {
+ slong i;
+ acb_ptr z1, z2, x, y;
+
+ z1 = _acb_vec_init(q[k]);
+ z2 = _acb_vec_init(q[k]);
+ x = _acb_vec_init(q[k]);
+ y = _acb_vec_init(q[k]);
+
+ for (i = 0; i < q[k]; i++)
+ {
+ acb_set_si(x + i, q[k] - i);
+ acb_set_si(y + i, i * i);
+ /*
+ acb_set_si(x + i, n_randint(state, q[k]));
+ acb_set_si(y + i, n_randint(state, q[k]));
+ */
+ }
+
+ for (f = 0; f < nf; f++)
+ {
+
+ acb_ptr z = (f == 0) ? z1 : z2;
+
+ func[f](z, x, y, q[k], prec);
+
+ if (f == 0)
+ continue;
+
+ check_vec_eq_prec(z1, z2, q[k], prec, digits, q[k], name[0], name[f]);
+
+ }
+
+ _acb_vec_clear(x, q[k]);
+ _acb_vec_clear(y, q[k]);
+ _acb_vec_clear(z1, q[k]);
+ _acb_vec_clear(z2, q[k]);
+ }
+
+ flint_randclear(state);
+ flint_cleanup();
+ flint_printf("PASS\n");
+ return EXIT_SUCCESS;
+}
+
+
diff --git a/acb_dirichlet/test/t-dft.c b/acb_dirichlet/test/t-dft.c
index 02fa7bb6..1e783277 100644
--- a/acb_dirichlet/test/t-dft.c
+++ b/acb_dirichlet/test/t-dft.c
@@ -60,9 +60,9 @@ int main()
ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961};
flint_rand_t state;
- slong f, nf = 3;
- do_f func[3] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt };
- char * name[3] = { "pol", "cyc", "crt" };
+ slong f, nf = 4;
+ do_f func[4] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt , acb_dirichlet_dft_bluestein };
+ char * name[4] = { "pol", "cyc", "crt", "bluestein" };
flint_printf("dft....");
fflush(stdout);
@@ -104,11 +104,14 @@ int main()
/* radix2 dft */
for (k = 1; k < 12; k++)
{
- slong n = 1 << k;
+ slong n = 1 << k, j;
acb_ptr v, w1, w2;
v = w2 = _acb_vec_init(n);
w1 = _acb_vec_init(n);
+ for (j = 0; j < n; j++)
+ acb_set_si(v + k, k);
+
acb_dirichlet_dft_pol(w1, v, n, prec);
acb_dirichlet_dft_rad2(v, k, prec);