bluestein dft, WIP

This commit is contained in:
Pascal 2016-10-06 08:52:22 +02:00
parent ae23aecd8f
commit 0f3eea07c2
7 changed files with 295 additions and 22 deletions

View file

@ -298,8 +298,12 @@ void acb_dirichlet_dft_pol(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_crt(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_rad2(acb_ptr v, int e, slong prec);
void acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec);
void acb_dirichlet_dft_prod(acb_ptr w, acb_srcptr v, slong * cyc, slong num, slong prec);
void acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec);
void acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec);
void acb_dirichlet_dft_conrey(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec);
void acb_dirichlet_dft(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec);
@ -347,6 +351,16 @@ acb_dirichlet_dft_rad2_struct;
typedef acb_dirichlet_dft_rad2_struct acb_dirichlet_dft_rad2_t[1];
typedef struct
{
slong n;
acb_ptr z;
acb_dirichlet_dft_rad2_t rad2;
}
acb_dirichlet_dft_bluestein_struct;
typedef acb_dirichlet_dft_bluestein_struct acb_dirichlet_dft_bluestein_t[1];
typedef struct
{
slong n;
@ -438,11 +452,12 @@ acb_dirichlet_dft_cyc_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_c
{
acb_dirichlet_dft_step(w, v, cyc->cyc, cyc->num, prec);
}
void acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec);
void acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec);
void acb_dirichlet_dft_crt_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_crt_t crt, slong prec);
void acb_dirichlet_dft_prod_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_prod_t prod, slong prec);
void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec);
void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec);
void acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t, slong prec);
void _acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong dv, acb_ptr z, slong dz, slong len, slong prec);
void acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong len, slong prec);
@ -502,6 +517,15 @@ acb_dirichlet_dft_rad2_clear(acb_dirichlet_dft_rad2_t t)
_acb_vec_clear(t->z, t->nz);
}
void acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec);
ACB_DIRICHLET_INLINE void
acb_dirichlet_dft_bluestein_clear(acb_dirichlet_dft_bluestein_t t)
{
_acb_vec_clear(t->z, t->n);
acb_dirichlet_dft_rad2_clear(t->rad2);
}
void _acb_dirichlet_dft_crt_init(acb_dirichlet_dft_crt_t crt, slong dv, slong len, slong prec);
ACB_DIRICHLET_INLINE void
@ -543,6 +567,18 @@ _acb_vec_kronecker_mul(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong p
acb_mul(z + k, x + k, y + k, prec);
}
/* z[k] = conj(x[k])*y[k] */
ACB_DIRICHLET_INLINE void
acb_vec_kronecker_mul_conj(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec)
{
slong k;
for (k = 0; k < len; k++)
{
acb_conj(z + k, x + k);
acb_mul(z + k, z + k, y + k, prec);
}
}
ACB_DIRICHLET_INLINE void
acb_vec_printd(acb_srcptr vec, slong len, slong digits)
{

View file

@ -0,0 +1,67 @@
/*
Copyright (C) 2016 Pascal Molin
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb_dirichlet.h"
void
acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec)
{
nmod_t n2;
slong k, k2;
acb_ptr z2n;
int e = n_clog(2 * n - 1, 2);
acb_dirichlet_dft_rad2_init(t->rad2, e, prec);
z2n = _acb_vec_init(2 * n);
acb_dirichlet_vec_nth_roots(z2n, 2 * n, prec);
nmod_init(&n2, 2 * n);
t->n = n;
t->z = _acb_vec_init(n);
for (k = 0, k2 = 0; k < n; k++)
{
acb_conj(t->z + k, z2n + k2);
k2 = nmod_add(k2, 2 * k + 1, n2);
}
_acb_vec_clear(z2n, 2 * n);
}
void
acb_dirichlet_dft_bluestein_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_bluestein_t t, slong prec)
{
slong n = t->n;
acb_ptr vz, wz, z;
z = t->z;
/* TODO: allocate directly length 2^e and pad */
flint_printf("\n\n====================\n\nv\n");
acb_vec_printd_index(v, n, 10);
vz = _acb_vec_init(n);
acb_vec_kronecker_mul_conj(vz, z, v, n, prec);
flint_printf("\nvz\n");
acb_vec_printd_index(vz, n, 10);
wz = _acb_vec_init(n);
acb_dirichlet_dft_convol_rad2_precomp(wz, vz, z, n, t->rad2, prec);
flint_printf("\nwz\n");
acb_vec_printd_index(wz, n, 10);
acb_vec_kronecker_mul_conj(w, z, wz, n, prec);
flint_printf("\nw\n");
acb_vec_printd_index(w, n, 10);
_acb_vec_clear(wz, n);
_acb_vec_clear(vz, n);
}
void
acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec)
{
acb_dirichlet_dft_bluestein_t t;
acb_dirichlet_dft_bluestein_init(t, len, prec);
acb_dirichlet_dft_bluestein_precomp(w, v, t, prec);
acb_dirichlet_dft_bluestein_clear(t);
}

View file

@ -16,14 +16,25 @@ void
acb_dirichlet_dft_convol_pad(acb_ptr fp, acb_ptr gp, acb_srcptr f, acb_srcptr g, slong n, slong np)
{
slong k;
if (np < 2 * n - 1)
{
flint_printf("dft_convol_pad: overlapping padding %ld < 2*%ld-1\n", np, n);
abort();
}
for (k = 0; k < n; k++)
acb_set(gp + k, g + k);
for (; k < np; k++)
acb_zero(gp + k);
for (k = 0; k < n; k++)
acb_set(fp + k, f + k);
for (k = 1; k < n; k++)
acb_set(fp + np - k, f + n - k);
for (k = n; k <= np - n; k++)
acb_zero(fp + k);
}
void
@ -37,22 +48,58 @@ acb_dirichlet_dft_inverse_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec)
}
void
acb_dirichlet_dft_convol_fft(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec)
acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t rad2, slong prec)
{
int e;
slong k, np;
slong np;
acb_ptr fp, gp;
acb_dirichlet_dft_rad2_t dft;
e = n_clog(2 * len + 1, 2);
acb_dirichlet_dft_rad2_init(dft, e, prec);
np = dft->n;
np = rad2->n;
flint_printf("\nf\n");
acb_vec_printd_index(f, len, 10);
flint_printf("\ng\n");
acb_vec_printd_index(g, len, 10);
fp = _acb_vec_init(np);
gp = _acb_vec_init(np);
acb_dirichlet_dft_convol_pad(fp, gp, f, g, len, np);
acb_dirichlet_dft_rad2_precomp(fp, dft, prec);
acb_dirichlet_dft_rad2_precomp(gp, dft, prec);
flint_printf("\nF\n");
acb_vec_printd_index(fp, np, 10);
flint_printf("\nG\n");
acb_vec_printd_index(gp, np, 10);
acb_dirichlet_dft_rad2_precomp(fp, rad2, prec);
flint_printf("\nDFT F\n");
acb_vec_printd_index(fp, np, 10);
acb_dirichlet_dft_rad2_precomp(gp, rad2, prec);
flint_printf("\nDFT G\n");
acb_vec_printd_index(gp, np, 10);
_acb_vec_kronecker_mul(gp, gp, fp, np, prec);
acb_dirichlet_dft_inverse_rad2_precomp(gp, dft, prec);
for (k = 0; k < len; k++)
acb_set(w + k, gp + k);
flint_printf("\n(DFT F)(DFT G)=DFT(F*G)\n");
acb_vec_printd_index(gp, np, 10);
acb_dirichlet_dft_inverse_rad2_precomp(gp, rad2, prec);
flint_printf("\nF*G\n");
acb_vec_printd_index(gp, np, 10);
_acb_vec_set(w, gp, len);
_acb_vec_clear(fp, np);
_acb_vec_clear(gp, np);
}
void
acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec)
{
int e;
acb_dirichlet_dft_rad2_t dft;
e = n_clog(2 * len - 1, 2);
acb_dirichlet_dft_rad2_init(dft, e, prec);
acb_dirichlet_dft_convol_rad2_precomp(w, f, g, len, dft, prec);
acb_dirichlet_dft_rad2_clear(dft);
}

View file

@ -24,8 +24,8 @@ acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len,
gy = g;
acb_zero(wx);
for (y = 0; y <= x; y++)
acb_addmul(wx, fx--, gy++, prec);
acb_addmul(wx, fx - y, g + y, prec);
for (; y < len; y++)
acb_addmul(wx, f + x - y, g + y, prec);
acb_addmul(wx, fx + (len - y), g + y, prec);
}
}

View file

@ -38,7 +38,7 @@ acb_dirichlet_dft_rad2_reorder(acb_ptr v, slong n)
/* remark: can use same rad2 with smaller power of 2 */
void
acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec)
acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec)
{
slong j, k, l;
slong n = rad2->n, nz = rad2->nz;
@ -46,6 +46,8 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p
acb_t tmp;
acb_init(tmp);
acb_dirichlet_dft_rad2_reorder(v, n);
for (k = 1, l = nz; k < n; k <<= 1, l >>= 1)
for (p = v; p < vend; p += k)
for (j = 0; j < nz; j += l, p++)
@ -59,7 +61,7 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p
}
void
acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec)
acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec)
{
slong k, n = rad2->n;
acb_dirichlet_dft_rad2_precomp(v, rad2, prec);

View file

@ -0,0 +1,118 @@
/*
Copyright (C) 2016 Pascal Molin
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb_dirichlet.h"
typedef void (*do_f) (acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec);
void
check_vec_eq_prec(acb_srcptr w1, acb_srcptr w2, slong len, slong prec, slong digits, ulong q, char f1[], char f2[])
{
slong i;
for (i = 0; i < len; i++)
{
if (!acb_overlaps(w1 + i, w2 + i))
{
flint_printf("FAIL\n\n");
flint_printf("q = %wu, size = %wu\n", q, len);
flint_printf("\nDFT differ from index %ld / %ld \n", i, len);
flint_printf("\n%s =\n", f1);
acb_vec_printd_index(w1, len, digits);
flint_printf("\n%s =\n", f2);
acb_vec_printd_index(w2, len, digits);
flint_printf("\n\n");
abort();
}
else if (!acb_is_zero(w1+i) && (acb_rel_accuracy_bits(w1 + i) < 30
|| acb_rel_accuracy_bits(w2 + i) < 30))
{
flint_printf("FAIL\n\n");
flint_printf("q = %wu\n", q);
flint_printf("\nDFT inaccurate from index %ld / %ld \n", i, len);
flint_printf("\nnaive =\n");
acb_printd(w1 + i, digits);
flint_printf("\nfast =\n");
acb_printd(w2 + i, digits);
flint_printf("\nerrors %ld & %ld [prec = %wu]\n",
acb_rel_accuracy_bits(w1 + i),
acb_rel_accuracy_bits(w2 + i), prec);
abort();
}
}
}
int main()
{
slong k;
slong prec = 100, digits = 30;
slong nq = 13;
ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961};
flint_rand_t state;
slong f, nf = 2;
do_f func[2] = { acb_dirichlet_dft_convol_naive, acb_dirichlet_dft_convol_rad2 };
char * name[4] = { "naive", "rad2" };
flint_printf("convol....");
fflush(stdout);
flint_randinit(state);
for (k = 0; k < nq; k++)
{
slong i;
acb_ptr z1, z2, x, y;
z1 = _acb_vec_init(q[k]);
z2 = _acb_vec_init(q[k]);
x = _acb_vec_init(q[k]);
y = _acb_vec_init(q[k]);
for (i = 0; i < q[k]; i++)
{
acb_set_si(x + i, q[k] - i);
acb_set_si(y + i, i * i);
/*
acb_set_si(x + i, n_randint(state, q[k]));
acb_set_si(y + i, n_randint(state, q[k]));
*/
}
for (f = 0; f < nf; f++)
{
acb_ptr z = (f == 0) ? z1 : z2;
func[f](z, x, y, q[k], prec);
if (f == 0)
continue;
check_vec_eq_prec(z1, z2, q[k], prec, digits, q[k], name[0], name[f]);
}
_acb_vec_clear(x, q[k]);
_acb_vec_clear(y, q[k]);
_acb_vec_clear(z1, q[k]);
_acb_vec_clear(z2, q[k]);
}
flint_randclear(state);
flint_cleanup();
flint_printf("PASS\n");
return EXIT_SUCCESS;
}

View file

@ -60,9 +60,9 @@ int main()
ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961};
flint_rand_t state;
slong f, nf = 3;
do_f func[3] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt };
char * name[3] = { "pol", "cyc", "crt" };
slong f, nf = 4;
do_f func[4] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt , acb_dirichlet_dft_bluestein };
char * name[4] = { "pol", "cyc", "crt", "bluestein" };
flint_printf("dft....");
fflush(stdout);
@ -104,11 +104,14 @@ int main()
/* radix2 dft */
for (k = 1; k < 12; k++)
{
slong n = 1 << k;
slong n = 1 << k, j;
acb_ptr v, w1, w2;
v = w2 = _acb_vec_init(n);
w1 = _acb_vec_init(n);
for (j = 0; j < n; j++)
acb_set_si(v + k, k);
acb_dirichlet_dft_pol(w1, v, n, prec);
acb_dirichlet_dft_rad2(v, k, prec);