mirror of
https://github.com/vale981/arb
synced 2025-03-04 17:01:40 -05:00
122 lines
3 KiB
C
122 lines
3 KiB
C
/*
|
|
Copyright (C) 2016 Pascal Molin
|
|
|
|
This file is part of Arb.
|
|
|
|
Arb is free software: you can redistribute it and/or modify it under
|
|
the terms of the GNU Lesser General Public License (LGPL) as published
|
|
by the Free Software Foundation; either version 2.1 of the License, or
|
|
(at your option) any later version. See <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "acb_dft.h"
|
|
|
|
/* swap each element with one with bit-reversed index */
|
|
void
|
|
acb_dft_rad2_reorder(acb_ptr v, slong n)
|
|
{
|
|
slong i, j, k, l;
|
|
|
|
for (i = 0, l = n>>1; i < l; i++)
|
|
{
|
|
/* j = bit reversal of i */
|
|
for (k = 1, j = 0; k < n; k <<= 1)
|
|
{
|
|
j <<= 1;
|
|
if (i & k)
|
|
j |= 1;
|
|
}
|
|
if (i < j)
|
|
acb_swap(v + i, v + j);
|
|
else if (i > j)
|
|
acb_swap(v + n - 1 - i, v + n - 1 - j);
|
|
i++, j |= l;
|
|
acb_swap(v + i, v + j);
|
|
}
|
|
|
|
}
|
|
|
|
void
|
|
_acb_dft_rad2_init(acb_dft_rad2_t t, slong dv, int e, slong prec)
|
|
{
|
|
if (e < 0)
|
|
{
|
|
flint_printf("acb_dft_rad2_init: need e >= 0");
|
|
abort();
|
|
}
|
|
t->e = e;
|
|
t->n = WORD(1) << e;
|
|
t->dv = dv;
|
|
t->nz = t->n >> 1;
|
|
t->z = _acb_vec_init(t->nz);
|
|
/* set n/2 roots of order n */
|
|
_acb_vec_unit_roots(t->z, -t->n, t->nz, prec);
|
|
}
|
|
|
|
/* remark: can use same rad2 with smaller power of 2 */
|
|
void
|
|
acb_dft_rad2_precomp_inplace(acb_ptr v, const acb_dft_rad2_t rad2, slong prec)
|
|
{
|
|
if (flint_get_num_threads() > 1 && rad2-> e > 9)
|
|
{
|
|
acb_dft_rad2_precomp_inplace_threaded(v, rad2, prec);
|
|
}
|
|
else
|
|
{
|
|
slong j, k, l;
|
|
slong n = rad2->n, nz = rad2->nz;
|
|
acb_ptr p, vend = v + n, w = rad2->z;
|
|
acb_t tmp;
|
|
acb_init(tmp);
|
|
|
|
acb_dft_rad2_reorder(v, n);
|
|
|
|
for (k = 1, l = nz; k < n; k <<= 1, l >>= 1)
|
|
for (p = v; p < vend; p += k)
|
|
for (j = 0; j < nz; j += l, p++)
|
|
{
|
|
acb_mul(tmp, p + k, w + j, prec);
|
|
acb_sub(p + k, p + 0, tmp, prec);
|
|
acb_add(p + 0, p + 0, tmp, prec);
|
|
}
|
|
|
|
acb_clear(tmp);
|
|
}
|
|
}
|
|
|
|
void
|
|
acb_dft_inverse_rad2_precomp_inplace(acb_ptr v, const acb_dft_rad2_t rad2, slong prec)
|
|
{
|
|
slong k, n = rad2->n;
|
|
acb_dft_rad2_precomp_inplace(v, rad2, prec);
|
|
_acb_vec_scalar_mul_2exp_si(v, v, n, - rad2->e);
|
|
for (k = 1; k < n / 2; k++)
|
|
acb_swap(v + k, v + n - k);
|
|
}
|
|
|
|
void
|
|
acb_dft_rad2_inplace(acb_ptr v, int e, slong prec)
|
|
{
|
|
acb_dft_rad2_t rad2;
|
|
acb_dft_rad2_init(rad2, e, prec);
|
|
acb_dft_rad2_precomp_inplace(v, rad2, prec);
|
|
acb_dft_rad2_clear(rad2);
|
|
}
|
|
|
|
void
|
|
acb_dft_rad2_precomp(acb_ptr w, acb_srcptr v, const acb_dft_rad2_t rad2, slong prec)
|
|
{
|
|
slong k;
|
|
for (k = 0; k < rad2->n; k++, v += rad2->dv)
|
|
acb_set(w + k, v + 0);
|
|
acb_dft_rad2_precomp_inplace(w, rad2, prec);
|
|
}
|
|
|
|
void
|
|
acb_dft_rad2(acb_ptr w, acb_srcptr v, int e, slong prec)
|
|
{
|
|
acb_dft_rad2_t rad2;
|
|
acb_dft_rad2_init(rad2, e, prec);
|
|
acb_dft_rad2_precomp(w, v, rad2, prec);
|
|
acb_dft_rad2_clear(rad2);
|
|
}
|