mirror of
https://github.com/vale981/arb
synced 2025-03-05 09:21:38 -05:00
bluestein dft, WIP
This commit is contained in:
parent
ae23aecd8f
commit
0f3eea07c2
7 changed files with 295 additions and 22 deletions
|
@ -298,8 +298,12 @@ void acb_dirichlet_dft_pol(acb_ptr w, acb_srcptr v, slong len, slong prec);
|
|||
void acb_dirichlet_dft_crt(acb_ptr w, acb_srcptr v, slong len, slong prec);
|
||||
void acb_dirichlet_dft_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec);
|
||||
void acb_dirichlet_dft_rad2(acb_ptr v, int e, slong prec);
|
||||
void acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec);
|
||||
void acb_dirichlet_dft_prod(acb_ptr w, acb_srcptr v, slong * cyc, slong num, slong prec);
|
||||
|
||||
void acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec);
|
||||
void acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec);
|
||||
|
||||
void acb_dirichlet_dft_conrey(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec);
|
||||
void acb_dirichlet_dft(acb_ptr w, acb_srcptr v, const acb_dirichlet_group_t G, slong prec);
|
||||
|
||||
|
@ -347,6 +351,16 @@ acb_dirichlet_dft_rad2_struct;
|
|||
|
||||
typedef acb_dirichlet_dft_rad2_struct acb_dirichlet_dft_rad2_t[1];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
slong n;
|
||||
acb_ptr z;
|
||||
acb_dirichlet_dft_rad2_t rad2;
|
||||
}
|
||||
acb_dirichlet_dft_bluestein_struct;
|
||||
|
||||
typedef acb_dirichlet_dft_bluestein_struct acb_dirichlet_dft_bluestein_t[1];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
slong n;
|
||||
|
@ -438,11 +452,12 @@ acb_dirichlet_dft_cyc_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_c
|
|||
{
|
||||
acb_dirichlet_dft_step(w, v, cyc->cyc, cyc->num, prec);
|
||||
}
|
||||
void acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec);
|
||||
void acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec);
|
||||
void acb_dirichlet_dft_crt_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_crt_t crt, slong prec);
|
||||
void acb_dirichlet_dft_prod_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_prod_t prod, slong prec);
|
||||
|
||||
void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec);
|
||||
void acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec);
|
||||
void acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t, slong prec);
|
||||
|
||||
void _acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong dv, acb_ptr z, slong dz, slong len, slong prec);
|
||||
void acb_dirichlet_dft_precomp_init(acb_dirichlet_dft_pre_t pre, slong len, slong prec);
|
||||
|
@ -502,6 +517,15 @@ acb_dirichlet_dft_rad2_clear(acb_dirichlet_dft_rad2_t t)
|
|||
_acb_vec_clear(t->z, t->nz);
|
||||
}
|
||||
|
||||
void acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec);
|
||||
|
||||
ACB_DIRICHLET_INLINE void
|
||||
acb_dirichlet_dft_bluestein_clear(acb_dirichlet_dft_bluestein_t t)
|
||||
{
|
||||
_acb_vec_clear(t->z, t->n);
|
||||
acb_dirichlet_dft_rad2_clear(t->rad2);
|
||||
}
|
||||
|
||||
void _acb_dirichlet_dft_crt_init(acb_dirichlet_dft_crt_t crt, slong dv, slong len, slong prec);
|
||||
|
||||
ACB_DIRICHLET_INLINE void
|
||||
|
@ -543,6 +567,18 @@ _acb_vec_kronecker_mul(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong p
|
|||
acb_mul(z + k, x + k, y + k, prec);
|
||||
}
|
||||
|
||||
/* z[k] = conj(x[k])*y[k] */
|
||||
ACB_DIRICHLET_INLINE void
|
||||
acb_vec_kronecker_mul_conj(acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec)
|
||||
{
|
||||
slong k;
|
||||
for (k = 0; k < len; k++)
|
||||
{
|
||||
acb_conj(z + k, x + k);
|
||||
acb_mul(z + k, z + k, y + k, prec);
|
||||
}
|
||||
}
|
||||
|
||||
ACB_DIRICHLET_INLINE void
|
||||
acb_vec_printd(acb_srcptr vec, slong len, slong digits)
|
||||
{
|
||||
|
|
67
acb_dirichlet/dft_bluestein.c
Normal file
67
acb_dirichlet/dft_bluestein.c
Normal file
|
@ -0,0 +1,67 @@
|
|||
/*
|
||||
Copyright (C) 2016 Pascal Molin
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "acb_dirichlet.h"
|
||||
|
||||
void
|
||||
acb_dirichlet_dft_bluestein_init(acb_dirichlet_dft_bluestein_t t, slong n, slong prec)
|
||||
{
|
||||
|
||||
nmod_t n2;
|
||||
slong k, k2;
|
||||
acb_ptr z2n;
|
||||
int e = n_clog(2 * n - 1, 2);
|
||||
acb_dirichlet_dft_rad2_init(t->rad2, e, prec);
|
||||
z2n = _acb_vec_init(2 * n);
|
||||
acb_dirichlet_vec_nth_roots(z2n, 2 * n, prec);
|
||||
nmod_init(&n2, 2 * n);
|
||||
t->n = n;
|
||||
t->z = _acb_vec_init(n);
|
||||
for (k = 0, k2 = 0; k < n; k++)
|
||||
{
|
||||
acb_conj(t->z + k, z2n + k2);
|
||||
k2 = nmod_add(k2, 2 * k + 1, n2);
|
||||
}
|
||||
_acb_vec_clear(z2n, 2 * n);
|
||||
}
|
||||
|
||||
void
|
||||
acb_dirichlet_dft_bluestein_precomp(acb_ptr w, acb_srcptr v, const acb_dirichlet_dft_bluestein_t t, slong prec)
|
||||
{
|
||||
slong n = t->n;
|
||||
acb_ptr vz, wz, z;
|
||||
z = t->z;
|
||||
/* TODO: allocate directly length 2^e and pad */
|
||||
flint_printf("\n\n====================\n\nv\n");
|
||||
acb_vec_printd_index(v, n, 10);
|
||||
vz = _acb_vec_init(n);
|
||||
acb_vec_kronecker_mul_conj(vz, z, v, n, prec);
|
||||
flint_printf("\nvz\n");
|
||||
acb_vec_printd_index(vz, n, 10);
|
||||
wz = _acb_vec_init(n);
|
||||
acb_dirichlet_dft_convol_rad2_precomp(wz, vz, z, n, t->rad2, prec);
|
||||
flint_printf("\nwz\n");
|
||||
acb_vec_printd_index(wz, n, 10);
|
||||
acb_vec_kronecker_mul_conj(w, z, wz, n, prec);
|
||||
flint_printf("\nw\n");
|
||||
acb_vec_printd_index(w, n, 10);
|
||||
_acb_vec_clear(wz, n);
|
||||
_acb_vec_clear(vz, n);
|
||||
}
|
||||
|
||||
void
|
||||
acb_dirichlet_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec)
|
||||
{
|
||||
acb_dirichlet_dft_bluestein_t t;
|
||||
acb_dirichlet_dft_bluestein_init(t, len, prec);
|
||||
acb_dirichlet_dft_bluestein_precomp(w, v, t, prec);
|
||||
acb_dirichlet_dft_bluestein_clear(t);
|
||||
}
|
|
@ -16,14 +16,25 @@ void
|
|||
acb_dirichlet_dft_convol_pad(acb_ptr fp, acb_ptr gp, acb_srcptr f, acb_srcptr g, slong n, slong np)
|
||||
{
|
||||
slong k;
|
||||
|
||||
if (np < 2 * n - 1)
|
||||
{
|
||||
flint_printf("dft_convol_pad: overlapping padding %ld < 2*%ld-1\n", np, n);
|
||||
abort();
|
||||
}
|
||||
|
||||
for (k = 0; k < n; k++)
|
||||
acb_set(gp + k, g + k);
|
||||
for (; k < np; k++)
|
||||
acb_zero(gp + k);
|
||||
|
||||
for (k = 0; k < n; k++)
|
||||
acb_set(fp + k, f + k);
|
||||
for (k = 1; k < n; k++)
|
||||
acb_set(fp + np - k, f + n - k);
|
||||
for (k = n; k <= np - n; k++)
|
||||
acb_zero(fp + k);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -37,22 +48,58 @@ acb_dirichlet_dft_inverse_cyc(acb_ptr w, acb_srcptr v, slong len, slong prec)
|
|||
}
|
||||
|
||||
void
|
||||
acb_dirichlet_dft_convol_fft(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec)
|
||||
acb_dirichlet_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, const acb_dirichlet_dft_rad2_t rad2, slong prec)
|
||||
{
|
||||
int e;
|
||||
slong k, np;
|
||||
slong np;
|
||||
acb_ptr fp, gp;
|
||||
acb_dirichlet_dft_rad2_t dft;
|
||||
e = n_clog(2 * len + 1, 2);
|
||||
acb_dirichlet_dft_rad2_init(dft, e, prec);
|
||||
np = dft->n;
|
||||
np = rad2->n;
|
||||
|
||||
flint_printf("\nf\n");
|
||||
acb_vec_printd_index(f, len, 10);
|
||||
flint_printf("\ng\n");
|
||||
acb_vec_printd_index(g, len, 10);
|
||||
|
||||
fp = _acb_vec_init(np);
|
||||
gp = _acb_vec_init(np);
|
||||
acb_dirichlet_dft_convol_pad(fp, gp, f, g, len, np);
|
||||
acb_dirichlet_dft_rad2_precomp(fp, dft, prec);
|
||||
acb_dirichlet_dft_rad2_precomp(gp, dft, prec);
|
||||
|
||||
flint_printf("\nF\n");
|
||||
acb_vec_printd_index(fp, np, 10);
|
||||
flint_printf("\nG\n");
|
||||
acb_vec_printd_index(gp, np, 10);
|
||||
|
||||
acb_dirichlet_dft_rad2_precomp(fp, rad2, prec);
|
||||
|
||||
flint_printf("\nDFT F\n");
|
||||
acb_vec_printd_index(fp, np, 10);
|
||||
|
||||
acb_dirichlet_dft_rad2_precomp(gp, rad2, prec);
|
||||
|
||||
flint_printf("\nDFT G\n");
|
||||
acb_vec_printd_index(gp, np, 10);
|
||||
|
||||
_acb_vec_kronecker_mul(gp, gp, fp, np, prec);
|
||||
acb_dirichlet_dft_inverse_rad2_precomp(gp, dft, prec);
|
||||
for (k = 0; k < len; k++)
|
||||
acb_set(w + k, gp + k);
|
||||
|
||||
flint_printf("\n(DFT F)(DFT G)=DFT(F*G)\n");
|
||||
acb_vec_printd_index(gp, np, 10);
|
||||
|
||||
acb_dirichlet_dft_inverse_rad2_precomp(gp, rad2, prec);
|
||||
|
||||
flint_printf("\nF*G\n");
|
||||
acb_vec_printd_index(gp, np, 10);
|
||||
|
||||
_acb_vec_set(w, gp, len);
|
||||
_acb_vec_clear(fp, np);
|
||||
_acb_vec_clear(gp, np);
|
||||
}
|
||||
|
||||
void
|
||||
acb_dirichlet_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec)
|
||||
{
|
||||
int e;
|
||||
acb_dirichlet_dft_rad2_t dft;
|
||||
e = n_clog(2 * len - 1, 2);
|
||||
acb_dirichlet_dft_rad2_init(dft, e, prec);
|
||||
acb_dirichlet_dft_convol_rad2_precomp(w, f, g, len, dft, prec);
|
||||
acb_dirichlet_dft_rad2_clear(dft);
|
||||
}
|
||||
|
|
|
@ -24,8 +24,8 @@ acb_dirichlet_dft_convol_naive(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len,
|
|||
gy = g;
|
||||
acb_zero(wx);
|
||||
for (y = 0; y <= x; y++)
|
||||
acb_addmul(wx, fx--, gy++, prec);
|
||||
acb_addmul(wx, fx - y, g + y, prec);
|
||||
for (; y < len; y++)
|
||||
acb_addmul(wx, f + x - y, g + y, prec);
|
||||
acb_addmul(wx, fx + (len - y), g + y, prec);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -38,7 +38,7 @@ acb_dirichlet_dft_rad2_reorder(acb_ptr v, slong n)
|
|||
|
||||
/* remark: can use same rad2 with smaller power of 2 */
|
||||
void
|
||||
acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec)
|
||||
acb_dirichlet_dft_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec)
|
||||
{
|
||||
slong j, k, l;
|
||||
slong n = rad2->n, nz = rad2->nz;
|
||||
|
@ -46,6 +46,8 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p
|
|||
acb_t tmp;
|
||||
acb_init(tmp);
|
||||
|
||||
acb_dirichlet_dft_rad2_reorder(v, n);
|
||||
|
||||
for (k = 1, l = nz; k < n; k <<= 1, l >>= 1)
|
||||
for (p = v; p < vend; p += k)
|
||||
for (j = 0; j < nz; j += l, p++)
|
||||
|
@ -59,7 +61,7 @@ acb_dirichlet_dft_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong p
|
|||
}
|
||||
|
||||
void
|
||||
acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, acb_dirichlet_dft_rad2_t rad2, slong prec)
|
||||
acb_dirichlet_dft_inverse_rad2_precomp(acb_ptr v, const acb_dirichlet_dft_rad2_t rad2, slong prec)
|
||||
{
|
||||
slong k, n = rad2->n;
|
||||
acb_dirichlet_dft_rad2_precomp(v, rad2, prec);
|
||||
|
|
118
acb_dirichlet/test/t-convol.c
Normal file
118
acb_dirichlet/test/t-convol.c
Normal file
|
@ -0,0 +1,118 @@
|
|||
/*
|
||||
Copyright (C) 2016 Pascal Molin
|
||||
|
||||
This file is part of Arb.
|
||||
|
||||
Arb is free software: you can redistribute it and/or modify it under
|
||||
the terms of the GNU Lesser General Public License (LGPL) as published
|
||||
by the Free Software Foundation; either version 2.1 of the License, or
|
||||
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "acb_dirichlet.h"
|
||||
|
||||
typedef void (*do_f) (acb_ptr z, acb_srcptr x, acb_srcptr y, slong len, slong prec);
|
||||
|
||||
void
|
||||
check_vec_eq_prec(acb_srcptr w1, acb_srcptr w2, slong len, slong prec, slong digits, ulong q, char f1[], char f2[])
|
||||
{
|
||||
slong i;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
if (!acb_overlaps(w1 + i, w2 + i))
|
||||
{
|
||||
flint_printf("FAIL\n\n");
|
||||
flint_printf("q = %wu, size = %wu\n", q, len);
|
||||
flint_printf("\nDFT differ from index %ld / %ld \n", i, len);
|
||||
flint_printf("\n%s =\n", f1);
|
||||
acb_vec_printd_index(w1, len, digits);
|
||||
flint_printf("\n%s =\n", f2);
|
||||
acb_vec_printd_index(w2, len, digits);
|
||||
flint_printf("\n\n");
|
||||
abort();
|
||||
}
|
||||
else if (!acb_is_zero(w1+i) && (acb_rel_accuracy_bits(w1 + i) < 30
|
||||
|| acb_rel_accuracy_bits(w2 + i) < 30))
|
||||
{
|
||||
flint_printf("FAIL\n\n");
|
||||
flint_printf("q = %wu\n", q);
|
||||
flint_printf("\nDFT inaccurate from index %ld / %ld \n", i, len);
|
||||
flint_printf("\nnaive =\n");
|
||||
acb_printd(w1 + i, digits);
|
||||
flint_printf("\nfast =\n");
|
||||
acb_printd(w2 + i, digits);
|
||||
flint_printf("\nerrors %ld & %ld [prec = %wu]\n",
|
||||
acb_rel_accuracy_bits(w1 + i),
|
||||
acb_rel_accuracy_bits(w2 + i), prec);
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
slong k;
|
||||
slong prec = 100, digits = 30;
|
||||
slong nq = 13;
|
||||
ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961};
|
||||
flint_rand_t state;
|
||||
|
||||
slong f, nf = 2;
|
||||
do_f func[2] = { acb_dirichlet_dft_convol_naive, acb_dirichlet_dft_convol_rad2 };
|
||||
char * name[4] = { "naive", "rad2" };
|
||||
|
||||
flint_printf("convol....");
|
||||
fflush(stdout);
|
||||
|
||||
flint_randinit(state);
|
||||
|
||||
for (k = 0; k < nq; k++)
|
||||
{
|
||||
slong i;
|
||||
acb_ptr z1, z2, x, y;
|
||||
|
||||
z1 = _acb_vec_init(q[k]);
|
||||
z2 = _acb_vec_init(q[k]);
|
||||
x = _acb_vec_init(q[k]);
|
||||
y = _acb_vec_init(q[k]);
|
||||
|
||||
for (i = 0; i < q[k]; i++)
|
||||
{
|
||||
acb_set_si(x + i, q[k] - i);
|
||||
acb_set_si(y + i, i * i);
|
||||
/*
|
||||
acb_set_si(x + i, n_randint(state, q[k]));
|
||||
acb_set_si(y + i, n_randint(state, q[k]));
|
||||
*/
|
||||
}
|
||||
|
||||
for (f = 0; f < nf; f++)
|
||||
{
|
||||
|
||||
acb_ptr z = (f == 0) ? z1 : z2;
|
||||
|
||||
func[f](z, x, y, q[k], prec);
|
||||
|
||||
if (f == 0)
|
||||
continue;
|
||||
|
||||
check_vec_eq_prec(z1, z2, q[k], prec, digits, q[k], name[0], name[f]);
|
||||
|
||||
}
|
||||
|
||||
_acb_vec_clear(x, q[k]);
|
||||
_acb_vec_clear(y, q[k]);
|
||||
_acb_vec_clear(z1, q[k]);
|
||||
_acb_vec_clear(z2, q[k]);
|
||||
}
|
||||
|
||||
flint_randclear(state);
|
||||
flint_cleanup();
|
||||
flint_printf("PASS\n");
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
|
|
@ -60,9 +60,9 @@ int main()
|
|||
ulong q[13] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 335, 961};
|
||||
flint_rand_t state;
|
||||
|
||||
slong f, nf = 3;
|
||||
do_f func[3] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt };
|
||||
char * name[3] = { "pol", "cyc", "crt" };
|
||||
slong f, nf = 4;
|
||||
do_f func[4] = { acb_dirichlet_dft_pol, acb_dirichlet_dft_cyc, acb_dirichlet_dft_crt , acb_dirichlet_dft_bluestein };
|
||||
char * name[4] = { "pol", "cyc", "crt", "bluestein" };
|
||||
|
||||
flint_printf("dft....");
|
||||
fflush(stdout);
|
||||
|
@ -104,11 +104,14 @@ int main()
|
|||
/* radix2 dft */
|
||||
for (k = 1; k < 12; k++)
|
||||
{
|
||||
slong n = 1 << k;
|
||||
slong n = 1 << k, j;
|
||||
acb_ptr v, w1, w2;
|
||||
v = w2 = _acb_vec_init(n);
|
||||
w1 = _acb_vec_init(n);
|
||||
|
||||
for (j = 0; j < n; j++)
|
||||
acb_set_si(v + k, k);
|
||||
|
||||
acb_dirichlet_dft_pol(w1, v, n, prec);
|
||||
acb_dirichlet_dft_rad2(v, k, prec);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue