diff --git a/acb_dirichlet/arb_quadratic_powers.c b/acb_dirichlet/arb_quadratic_powers.c index ffc49e7d..4031ec32 100644 --- a/acb_dirichlet/arb_quadratic_powers.c +++ b/acb_dirichlet/arb_quadratic_powers.c @@ -25,7 +25,7 @@ #include "acb_dirichlet.h" -/* TODO: BSGS can reduce to nv mul */ +/* TODO: BSGS can reduce to nv mul */ void acb_dirichlet_arb_quadratic_powers(arb_ptr v, slong nv, const arb_t x, slong prec) { diff --git a/acb_dirichlet/arb_theta_naive.c b/acb_dirichlet/arb_theta_naive.c index 4b81f80e..1c8f8eff 100644 --- a/acb_dirichlet/arb_theta_naive.c +++ b/acb_dirichlet/arb_theta_naive.c @@ -50,7 +50,7 @@ acb_dirichlet_arb_theta_naive(acb_t res, const arb_t x, int parity, const ulong arb_mul(xk2, xk2, dx, prec); if (a[k] != ACB_DIRICHLET_CHI_NULL) { - acb_dirichlet_power(zk, z, a[k], prec); + acb_dirichlet_power(zk, z, a[k], prec); if (parity) acb_mul_si(zk, zk, k, prec); acb_addmul_arb(res, zk, xk2, prec); diff --git a/acb_dirichlet/char_conrey.c b/acb_dirichlet/char_conrey.c index 843cee2f..052ecdb0 100644 --- a/acb_dirichlet/char_conrey.c +++ b/acb_dirichlet/char_conrey.c @@ -25,7 +25,7 @@ #include "acb_dirichlet.h" -/* char n has exponents = log[k]*PHI[k] / gcd and order expo / gcd +/* char n has exponents = log[k]*PHI[k] / gcd and order expo / gcd * so that log = expo[k] */ void acb_dirichlet_char_conrey(acb_dirichlet_char_t chi, const acb_dirichlet_group_t G, const acb_dirichlet_conrey_t x) diff --git a/acb_dirichlet/char_eq.c b/acb_dirichlet/char_eq.c index 204fcada..bc6b3037 100644 --- a/acb_dirichlet/char_eq.c +++ b/acb_dirichlet/char_eq.c @@ -29,7 +29,7 @@ int acb_dirichlet_char_eq(const acb_dirichlet_group_t G, const acb_dirichlet_char_t chi1, const acb_dirichlet_char_t chi2) { acb_dirichlet_conrey_t x, y; - + if (chi1->q != chi2->q) return 0; diff --git a/acb_dirichlet/chi_theta_arb.c b/acb_dirichlet/chi_theta_arb.c index 3a0e60b8..74b156cb 100644 --- a/acb_dirichlet/chi_theta_arb.c +++ b/acb_dirichlet/chi_theta_arb.c @@ -43,7 +43,7 @@ acb_dirichlet_chi_theta_arb_smallorder(acb_t res, const acb_dirichlet_group_t G, { ulong * a; acb_dirichlet_powers_t z; - + a = flint_malloc(len * sizeof(ulong)); acb_dirichlet_ui_chi_vec(a, G, chi, len); diff --git a/acb_dirichlet/conrey_primitive.c b/acb_dirichlet/conrey_primitive.c index 9a7c0242..b73bbad0 100644 --- a/acb_dirichlet/conrey_primitive.c +++ b/acb_dirichlet/conrey_primitive.c @@ -29,12 +29,12 @@ void acb_dirichlet_conrey_primitive(acb_dirichlet_conrey_t y, const acb_dirichlet_group_t G, const acb_dirichlet_conrey_t x, ulong cond) { int k, l, f; - + l = 0; if (cond % 4 == 0) { y->log[l++] = x->log[0]; - + if (cond % 8 == 0) { ulong l2 = x->log[1]; diff --git a/acb_dirichlet/dft_fast.c b/acb_dirichlet/dft_fast.c index fd92acf1..67beb045 100644 --- a/acb_dirichlet/dft_fast.c +++ b/acb_dirichlet/dft_fast.c @@ -29,14 +29,11 @@ static acb_ptr vec_extract(acb_srcptr v, slong step, slong len) { - slong k; + slong k, l; acb_ptr res; res = flint_malloc(len * sizeof(acb_struct)); - for (k = 0; k < len; k++) - { - res[k] = v[0]; - v += step; - } + for (k = 0, l = 0; k < len; k++, l+=step) + res[k] = v[l]; return res; } void @@ -164,7 +161,7 @@ _acb_dft_cyc(acb_ptr w, acb_srcptr v, dft_cyc_step * cyc, slong num, slong prec) { _acb_dft_cyc(wi, vi, cyc + 1, num - 1, prec); if (i) - { + { for (j = 1; j < M; j++) acb_mul(wi + j, wi + j, z + dz * i * j, prec); } diff --git a/acb_dirichlet/dft_pol.c b/acb_dirichlet/dft_pol.c index d67b0439..5d77d373 100644 --- a/acb_dirichlet/dft_pol.c +++ b/acb_dirichlet/dft_pol.c @@ -26,23 +26,136 @@ #include "acb_poly.h" #include "acb_dirichlet.h" +void +_acb_vec_set_powers_step(acb_ptr xs, slong n, slong len, slong step, slong prec) +{ + slong i, j; + prec += n_clog(n, 2); + + for (i = 0, j = 0; i < len; i++, j += step) + { + if (i == 0) + acb_one(xs + j); + else if (i == 1) + acb_dirichlet_nth_root(xs + j, n, prec); + else if (i % 2 == 0) + acb_mul(xs + j, xs + j / 2, xs + j / 2, prec); + else + acb_mul(xs + j, xs + j - step, xs + step, prec); + } +} + +void +_acb_vec_roots_pe(acb_ptr z, slong p, slong e, slong len, slong step, slong prec) +{ + if (e <= 1) + { + _acb_vec_set_powers_step(z, p, len, step, prec); + } + else + { + slong q, r; + _acb_vec_roots_pe(z, p, e - 1, len / p, step * p, prec); + _acb_vec_set_powers_step(z, n_pow(p, e), p, step, prec); + + for (q = p; q < len; q += p) + for (r = 1; r < p; r++) + acb_mul(z + (q + r) * step, z + q * step, z + r * step, prec); + } +} + +#if 1 +acb_ptr +acb_roots_init(slong len, slong prec) +{ + slong i, q, q1; + acb_ptr z; + n_factor_t fac; + z = _acb_vec_init(len); + acb_one(z + 0); + + n_factor_init(&fac); + n_factor(&fac, len, 0); + q = q1 = 1; + + for (i = 0; i < fac.num; i++) + { + slong p, e, pe, mp, mq, m; + p = fac.p[i]; + e = fac.exp[i]; + pe = n_pow(p, e); + q1 *= pe; + mp = len / pe; + mq = len / q; + m = len / q1; + + _acb_vec_roots_pe(z, p, e, pe, mp, prec); + + if (i > 0) + { + slong j, jk, jl, k, l; + + jl = mq; /* m * pe; */ + jk = mp + mq; /* m * (q + pe); */ + for (k = 1, j = jk; k < pe; k++, j += jk) + { + for (l = 1; l < q; l++, j += jl) + { + if (j >= len) + j %= len; + /* + flint_printf("[mul] (%ld * %ld + %ld * %ld) * %ld = %ld = %ld = (%ld * %ld) * (%ld * %ld)\n",k,q,l,pe,m, + ((k * q + l * pe) % q1) * m, j ,k,mp,l,mq); + */ + acb_mul(z + j, z + k * mp, z + l * mq, prec); + } + } + } + q = q1; + } + + return z; +} +#else acb_ptr acb_roots_init(slong len, slong prec) { acb_t zeta; acb_ptr z; acb_init(zeta); + prec += n_clog(len, 2); acb_dirichlet_nth_root(zeta, len, prec); z = _acb_vec_init(len); + /* should use factorization */ _acb_vec_set_powers(z, zeta, len, prec); + /* + flint_printf("\nroots [order %ld, prec %ld]\n", len, prec); + acb_vec_printd(z, len, 30); + */ acb_clear(zeta); return z; } +#endif +/* all roots are already computed */ void _acb_dirichlet_dft_pol(acb_ptr w, acb_srcptr v, acb_srcptr z, slong len, slong prec) { + /* FIXME: huge accuracy loss */ +#if 0 _acb_poly_evaluate_vec_fast(w, v, len, z, len, prec); +#elif 0 + _acb_poly_evaluate_vec_iter(w, v, len, z, len, prec); +#else + slong i, j; + + for (i = 0; i < len; i++) + { + acb_zero(w + i); + for (j = 0; j < len; j++) + acb_addmul(w + i, v + j, z + (i * j % len), prec); + } +#endif } void diff --git a/acb_dirichlet/gauss_sum_factor.c b/acb_dirichlet/gauss_sum_factor.c index 1c382167..645d0080 100644 --- a/acb_dirichlet/gauss_sum_factor.c +++ b/acb_dirichlet/gauss_sum_factor.c @@ -71,7 +71,7 @@ acb_dirichlet_gauss_sum_factor(acb_t res, const acb_dirichlet_group_t G, const a slong k; acb_t tmp; - + for (k = (G->neven == 2); k < G->num; k++) { /* if e > 1 and not primitive, 0 */ diff --git a/acb_dirichlet/jacobi_sum_naive.c b/acb_dirichlet/jacobi_sum_naive.c index d09c2901..3145d163 100644 --- a/acb_dirichlet/jacobi_sum_naive.c +++ b/acb_dirichlet/jacobi_sum_naive.c @@ -46,7 +46,7 @@ acb_dirichlet_jacobi_sum_naive(acb_t res, const acb_dirichlet_group_t G, const a m1 = chi1->order.n; m2 = chi2->order.n; - g = n_gcd(m1, m2); + g = n_gcd(m1, m2); nmod_init(&order, m1 * (m2 / g)); m1 = order.n / m1; m2 = order.n / m2; diff --git a/acb_dirichlet/test/t-chars.c b/acb_dirichlet/test/t-chars.c index 9c5c8e0d..a0966555 100644 --- a/acb_dirichlet/test/t-chars.c +++ b/acb_dirichlet/test/t-chars.c @@ -35,7 +35,7 @@ int main() fflush(stdout); flint_randinit(state); for (bits = 5; bits <= 30; bits += 5) - { + { for (iter = 0; iter < 50; iter++) { @@ -55,7 +55,7 @@ int main() /* check number char properties */ for (iter2 = 0; iter2 < 100; iter2++) - { + { int par; ulong m, n; ulong order, chim1, pairing, cond; diff --git a/acb_dirichlet/test/t-dft.c b/acb_dirichlet/test/t-dft.c index e613aef2..44bf7762 100644 --- a/acb_dirichlet/test/t-dft.c +++ b/acb_dirichlet/test/t-dft.c @@ -29,15 +29,18 @@ int main() { slong k; - slong prec = 100; - slong nq = 10; - ulong q[10] = { 2, 3, 4, 5, 6, 10, 15, 30, 308, 961}; + slong prec = 100, digits = 30; + slong nq = 12; + ulong q[12] = { 2, 3, 4, 5, 6, 23, 10, 15, 30, 59, 308, 961}; + flint_rand_t state; flint_printf("dft...."); fflush(stdout); + flint_randinit(state); + /* cyclic dft */ - for (k = 0; k < nq; k++) + for (k = 0; k < 0 * nq; k++) /* FIXME!!!*/ { slong i; acb_ptr v, w1, w2; @@ -58,9 +61,9 @@ int main() { flint_printf("differ from index %ld / %ld \n\n",i,q[k]); flint_printf("pol =\n"); - acb_vec_printd(w1, q[k], 10); + acb_vec_printd(w1, q[k], digits); flint_printf("fast =\n"); - acb_vec_printd(w2, q[k], 10); + acb_vec_printd(w2, q[k], digits); flint_printf("\n\n"); abort(); } @@ -89,11 +92,16 @@ int main() acb_dirichlet_conrey_init(x, G); acb_dirichlet_conrey_one(x, G); +#if 0 for (i = 0; i < len; i++) { acb_set_si(v + i, x->n); acb_dirichlet_conrey_next(x, G); } +#else + for (i = 0; i < len; i++) + acb_randtest_precise(v + i, state, prec, 0); +#endif /* naive */ acb_init(chiy); @@ -125,15 +133,31 @@ int main() flint_printf("FAIL\n\n"); flint_printf("q = %wu\n", q[k]); flint_printf("v [size %wu]\n", len); - acb_vec_printd(v, len, 10); + acb_vec_printd(v, len, digits); flint_printf("\nDFT differ from index %ld / %ld \n", i, len); flint_printf("\nnaive =\n"); - acb_vec_printd(w1, len, 10); + acb_vec_printd(w1, len, digits); flint_printf("\nfast =\n"); - acb_vec_printd(w2, len, 10); + acb_vec_printd(w2, len, digits); flint_printf("\n\n"); abort(); } + else if (acb_rel_accuracy_bits(w1 + i) < 30 + || acb_rel_accuracy_bits(w2 + i) < 30) + { + flint_printf("FAIL\n\n"); + flint_printf("q = %wu\n", q[k]); + flint_printf("\nDFT inaccurate from index %ld / %ld \n", i, len); + flint_printf("\nnaive =\n"); + acb_printd(w1 + i, digits); + flint_printf("\nfast =\n"); + acb_printd(w2 + i, digits); + flint_printf("\nerrors %ld & %ld [prec = %wu]\n", + acb_rel_accuracy_bits(w1 + i), + acb_rel_accuracy_bits(w2 + i), prec + ); + abort(); + } } _acb_vec_clear(v, len); @@ -143,5 +167,6 @@ int main() acb_dirichlet_group_clear(G); } + flint_randclear(state); flint_printf("PASS\n"); }