faster dft test

This commit is contained in:
Pascal 2017-10-29 14:31:43 +01:00
parent a08e24d04e
commit 6eb9c87335
3 changed files with 35 additions and 20 deletions

View file

@ -44,6 +44,9 @@ acb_dft_convol_rad2_precomp(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, co
acb_ptr fp, gp; acb_ptr fp, gp;
np = rad2->n; np = rad2->n;
if (len <= 0)
return;
fp = _acb_vec_init(np); fp = _acb_vec_init(np);
gp = _acb_vec_init(np); gp = _acb_vec_init(np);
@ -73,7 +76,9 @@ acb_dft_convol_rad2(acb_ptr w, acb_srcptr f, acb_srcptr g, slong len, slong prec
int e; int e;
acb_dft_rad2_t dft; acb_dft_rad2_t dft;
/* catch power of 2 */ /* catch power of 2 */
if ((len & (len - 1)) == 0) if (len <= 0)
return;
else if ((len & (len - 1)) == 0)
e = n_clog(len, 2); e = n_clog(len, 2);
else else
e = n_clog(2 * len - 1, 2); e = n_clog(2 * len - 1, 2);

View file

@ -39,6 +39,11 @@ acb_dft_rad2_reorder(acb_ptr v, slong n)
void void
_acb_dft_rad2_init(acb_dft_rad2_t t, slong dv, int e, slong prec) _acb_dft_rad2_init(acb_dft_rad2_t t, slong dv, int e, slong prec)
{ {
if (e < 0)
{
flint_printf("acb_dft_rad2_init: need e >= 0");
abort();
}
t->e = e; t->e = e;
t->n = 1 << e; t->n = 1 << e;
t->dv = dv; t->dv = dv;

View file

@ -56,7 +56,7 @@ int main()
slong prec = 100, digits = 30; slong prec = 100, digits = 30;
slong nq = 19; slong nq = 19;
ulong q[19] = { 0, 1, 2, 3, 4, 5, 6, 23, 10, 15, 16, 30, 59, 125, 308, 335, 525, 961, 1225}; ulong q[19] = { 0, 1, 2, 3, 4, 5, 6, 23, 10, 15, 16, 30, 59, 125, 308, 335, 525, 961, 1225};
slong nr = 10; slong nr = 5;
flint_rand_t state; flint_rand_t state;
slong f, nf = 5; slong f, nf = 5;
@ -71,13 +71,13 @@ int main()
/* cyclic dft */ /* cyclic dft */
for (k = 0; k < nq + nr; k++) for (k = 0; k < nq + nr; k++)
{ {
slong i, len; slong i, len, f0;
acb_ptr v, w1, w2, w3; acb_ptr v, w1, w2, w3;
if (k < nq) if (k < nq)
len = q[k]; len = q[k];
else else
len = n_randint(state, 2000); len = n_randint(state, 1000);
v = _acb_vec_init(len); v = _acb_vec_init(len);
w1 = _acb_vec_init(len); w1 = _acb_vec_init(len);
@ -87,10 +87,13 @@ int main()
for (i = 0; i < len; i++) for (i = 0; i < len; i++)
acb_set_si_si(v + i, i, 3 - i); acb_set_si_si(v + i, i, 3 - i);
for (f = 0; f < nf; f++) /* avoid naive for long transforms */
f0 = (len > 50);
for (f = f0; f < nf; f++)
{ {
acb_ptr w = (f == 0) ? w1 : w2; acb_ptr w = (f == f0) ? w1 : w2;
if (DFT_VERB) if (DFT_VERB)
flint_printf("\n%s %wu\n", name[f], len); flint_printf("\n%s %wu\n", name[f], len);
@ -98,24 +101,26 @@ int main()
/* compute DFT */ /* compute DFT */
func[f](w, v, len, prec); func[f](w, v, len, prec);
/* check aliasing */ if (len < 500)
_acb_vec_set(w3, v, len); {
func[f](w3, w3, len, prec); /* check aliasing */
_acb_vec_set(w3, v, len);
func[f](w3, w3, len, prec);
check_vec_eq_prec(w1, w3, len, prec, digits, len, "alias", name[0], name[f]); check_vec_eq_prec(w1, w3, len, prec, digits, len, "alias", name[0], name[f]);
}
if (f > f0)
if (f == 0) {
/* check non aliased */
check_vec_eq_prec(w1, w2, len, prec, digits, len, "no alias", name[0], name[f]);
}
else
{ {
/* check inverse */ /* check inverse */
acb_dft_inverse(w2, w1, len, prec); acb_dft_inverse(w2, w1, len, prec);
check_vec_eq_prec(v, w2, len, prec, digits, len, "inverse", "original", "inverse"); check_vec_eq_prec(v, w2, len, prec, digits, len, "inverse", "original", "inverse");
} }
else
{
/* check non aliased */
check_vec_eq_prec(w1, w2, len, prec, digits, len, "no alias", name[0], name[f]);
}
} }
_acb_vec_clear(v, len); _acb_vec_clear(v, len);
@ -125,7 +130,7 @@ int main()
} }
/* radix2 dft */ /* radix2 dft */
for (k = 0; k < 12; k++) for (k = 0; k < 11; k++)
{ {
slong n = 1 << k, j; slong n = 1 << k, j;
acb_ptr v, w1, w2; acb_ptr v, w1, w2;
@ -135,10 +140,10 @@ int main()
for (j = 0; j < n; j++) for (j = 0; j < n; j++)
acb_set_si_si(v + j, j, j + 2); acb_set_si_si(v + j, j, j + 2);
acb_dft_naive(w1, v, n, prec); acb_dft_cyc(w1, v, n, prec);
acb_dft_rad2_inplace(w2, k, prec); acb_dft_rad2_inplace(w2, k, prec);
check_vec_eq_prec(w1, w2, n, prec, digits, n, "rad2", "pol", "rad2"); check_vec_eq_prec(w1, w2, n, prec, digits, n, "rad2", "cyc", "rad2");
_acb_vec_clear(v, n); _acb_vec_clear(v, n);
_acb_vec_clear(w1, n); _acb_vec_clear(w1, n);