arb/acb_dft/bluestein.c

139 lines
3.5 KiB
C
Raw Normal View History

/*
Copyright (C) 2016 Pascal Molin
This file is part of Arb.
Arb is free software: you can redistribute it and/or modify it under
the terms of the GNU Lesser General Public License (LGPL) as published
by the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version. See <http://www.gnu.org/licenses/>.
*/
#include "acb_dft.h"
2017-10-23 12:23:17 +02:00
#include "acb_modular.h"
/* z[k] = z^(k^2), z a 2n-th root of unity */
static void
_acb_vec_bluestein_factors(acb_ptr z, slong n, slong prec)
{
/* this function is used mostly with prime-power n
* so the set of squares has index 2 only.
* computing an addition sequence does not considerably improve things */
if (n < 30)
{
slong k, k2;
acb_ptr z2n;
nmod_t n2;
z2n = _acb_vec_init(2 * n);
2017-10-29 18:24:45 +01:00
_acb_vec_unit_roots(z2n, -2 * n, 2 * n, prec);
2017-10-23 12:23:17 +02:00
nmod_init(&n2, 2 * n);
for (k = 0, k2 = 0; k < n; k++)
{
acb_set(z + k, z2n + k2);
k2 = nmod_add(k2, 2 * k + 1, n2);
}
_acb_vec_clear(z2n, 2 * n);
}
else
{
nmod_t n2;
slong k, k2, dk;
slong * v, * s;
acb_ptr t;
s = flint_malloc(n * sizeof(slong));
v = flint_malloc((n + 1)* sizeof(slong));
t = _acb_vec_init(n + 1);
nmod_init(&n2, 2 * n);
for (k = 0; k < n; k++)
v[k] = 0;
for (k = 0, k2 = 0, dk = 1; k < n; k++)
{
s[k] = k2;
if (k2 < n)
v[k2] = -1;
else
v[2 * n - k2] = -1;
k2 = nmod_add(k2, dk, n2);
dk = nmod_add(dk, 2, n2);
}
acb_modular_fill_addseq(v, n);
acb_one(t + 0);
acb_unit_root(t + 1, 2 * n, prec);
acb_conj(t + 1, t + 1);
2017-10-23 12:23:17 +02:00
acb_set_si(t + n, -1);
for (k = 2; k < n; k++)
if (v[k])
acb_mul(t + k, t + v[k], t + k - v[k], prec);
for (k = 0; k < n; k++)
{
if (s[k] <= n)
acb_set(z + k, t + s[k]);
else
acb_conj(z + k, t + 2 * n - s[k]);
}
_acb_vec_clear(t, n + 1);
flint_free(s);
flint_free(v);
}
}
void
_acb_dft_bluestein_init(acb_dft_bluestein_t t, slong dv, slong n, slong prec)
{
int e = n_clog(2 * n - 1, 2);
2017-10-20 14:24:12 +02:00
if (DFT_VERB)
flint_printf("dft_bluestein: init z[2^%i]\n", e);
acb_dft_rad2_init(t->rad2, e, prec);
2017-10-02 18:08:55 +02:00
t->n = n;
t->dv = dv;
t->z = _acb_vec_init(n);
2017-10-23 12:23:17 +02:00
_acb_vec_bluestein_factors(t->z, n, prec);
}
void
acb_dft_bluestein_precomp(acb_ptr w, acb_srcptr v, const acb_dft_bluestein_t t, slong prec)
{
slong k, n = t->n, np = t->rad2->n, dv = t->dv;
2017-10-02 18:08:55 +02:00
acb_ptr fp, gp, z;
z = t->z;
2017-10-02 18:08:55 +02:00
fp = _acb_vec_init(np);
_acb_vec_kronecker_mul_step(fp, z, v, dv, n, prec);
2017-10-02 18:08:55 +02:00
gp = _acb_vec_init(np);
acb_one(gp + 0);
for (k = 1; k < n; k++)
{
acb_conj(gp + k, z + k);
acb_set(gp + np - k, gp + k);
}
acb_dft_rad2_precomp_inplace(fp, t->rad2, prec);
acb_dft_rad2_precomp_inplace(gp, t->rad2, prec);
2017-10-02 18:08:55 +02:00
_acb_vec_kronecker_mul(gp, gp, fp, np, prec);
acb_dft_inverse_rad2_precomp_inplace(gp, t->rad2, prec);
2017-10-02 18:08:55 +02:00
_acb_vec_kronecker_mul(w, z, gp, n, prec);
2017-10-02 18:08:55 +02:00
_acb_vec_clear(fp, n);
_acb_vec_clear(gp, n);
}
void
acb_dft_bluestein(acb_ptr w, acb_srcptr v, slong len, slong prec)
{
acb_dft_bluestein_t t;
acb_dft_bluestein_init(t, len, prec);
acb_dft_bluestein_precomp(w, v, t, prec);
acb_dft_bluestein_clear(t);
}