From 05ecc82d87d0154a85e400e7f6e92a0997176cf7 Mon Sep 17 00:00:00 2001 From: Pascal Date: Wed, 16 Mar 2016 15:20:39 +0100 Subject: [PATCH] dlog vec performs as it should but still mathematically not debugged --- dlog/profile/p-vec.c | 27 +++-- dlog/vec_crt.c | 24 ++-- dlog/vec_eratos.c | 58 +++++++++ dlog/vec_loop.c | 2 +- dlog/vec_sieve.c | 271 +++++++++++++++++++++++++++++++++++-------- 5 files changed, 313 insertions(+), 69 deletions(-) create mode 100644 dlog/vec_eratos.c diff --git a/dlog/profile/p-vec.c b/dlog/profile/p-vec.c index 91aee7e8..bfedf73a 100644 --- a/dlog/profile/p-vec.c +++ b/dlog/profile/p-vec.c @@ -25,6 +25,9 @@ #include "dlog.h" #include "profiler.h" + +#define NPRIMES 640 + typedef void (*vec_f) (ulong *v, ulong nv, ulong a, ulong va, const nmod_t mod, ulong na, const nmod_t order); void @@ -35,20 +38,20 @@ f_empty(ulong *v, ulong nv, ulong a, ulong va, const nmod_t mod, ulong na, const int main() { - int i, ni = 3; - int bits[5] = { 10, 20, 30, 40, 50 }; + int i, ni = 8; + int bits[9] = { 10, 15, 20, 25, 30, 35, 40, 45, 50 }; - int j, nj = 5; + int j, nj = 6; ulong * v; - ulong nv[5] = { 50, 200, 1000, 2000, 10000 }; + ulong nv[6] = { 50, 200, 1000, 2000, 10000, 30000 }; - int k, np = 1000; + int k, np = NPRIMES; nmod_t * p; ulong * a; - int l, nf = 3; - vec_f func[4] = { f_empty, dlog_vec_loop, dlog_vec_sieve, dlog_vec_crt }; - char * n[4] = { "empty", "loop", "sieve", "crt" }; + int l, nf = 5; + vec_f func[5] = { f_empty, dlog_vec_loop, dlog_vec_eratos, dlog_vec_sieve, dlog_vec_crt }; + char * n[5] = { "empty", "loop", "eratos", "sieve", "crt" }; flint_rand_t state; nmod_t order; @@ -57,6 +60,9 @@ int main() p = flint_malloc(np * sizeof(nmod_t)); a = flint_malloc(np * sizeof(ulong)); + + flint_randinit(state); + for (i = 0; i < ni; i++) { for (k = 0; k < np; k++) @@ -75,7 +81,9 @@ int main() for (l = 0; l < nf; l++) { - if (l == 1 && (i >= 2 || j > 0)) + if (l == 1 && i > 2) + continue; + if (l == 2 && i > 5) continue; flint_printf("%-20s... ",n[l]); @@ -92,6 +100,7 @@ int main() } flint_free(v); } + np /= 2; } flint_free(p); diff --git a/dlog/vec_crt.c b/dlog/vec_crt.c index cb8b8076..09bb4928 100644 --- a/dlog/vec_crt.c +++ b/dlog/vec_crt.c @@ -25,9 +25,12 @@ #include "dlog.h" +#define SIEVE_START 100 + /* group components up to bound and return cofactor */ -#define LOOP 0 -#define SIEVE 1 +#define G_SMALL 0 +#define G_BIG 1 + static void n_factor_group(n_factor_t * fac, ulong bound) { @@ -53,19 +56,19 @@ n_factor_group(n_factor_t * fac, ulong bound) for (j = 0; j < i; j++) { fac->p[j] = m[j]; - fac->exp[j] = LOOP; + fac->exp[j] = G_SMALL; } if (c > 1) { fac->p[i] = c; - fac->exp[i] = SIEVE; + fac->exp[i] = G_BIG; i++; } fac->num = i; } /* assume v[k] = -1 for bad primes? */ -/* loop on small components and keep one subgroup for DLOG + sieve */ +/* loop on small components and if needed keep one subgroup for DLOG + sieve */ void dlog_vec_crt(ulong *v, ulong nv, ulong a, ulong va, nmod_t mod, ulong na, nmod_t order) { @@ -78,16 +81,21 @@ dlog_vec_crt(ulong *v, ulong nv, ulong a, ulong va, nmod_t mod, ulong na, nmod_t n_factor(&fac, na, 1); n_factor_group(&fac, maxloop); for (k = 0; k < fac.num; k++) - { + { ulong m, M, aM, uM, vaM; m = fac.p[k]; M = na / m; aM = nmod_pow_ui(a, M, mod); uM = M * n_invmod(M % m, m); /* uM < n */ vaM = nmod_mul(va, uM % order.n, order); - if (fac.exp[k] == LOOP) + if (fac.exp[k] == G_SMALL) dlog_vec_loop(v, nv, aM, vaM, mod, m, order); else - dlog_vec_sieve(v, nv, aM, vaM, mod, m, order); + { + if (nv <= SIEVE_START) + dlog_vec_eratos_ph(v, nv, aM, vaM, M, mod, m, order); + else + dlog_vec_sieve_ph(v, nv, aM, vaM, M, mod, m, order); + } } } diff --git a/dlog/vec_eratos.c b/dlog/vec_eratos.c new file mode 100644 index 00000000..fd33374e --- /dev/null +++ b/dlog/vec_eratos.c @@ -0,0 +1,58 @@ +/*============================================================================= + + This file is part of ARB. + + ARB is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + ARB is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with ARB; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +=============================================================================*/ +/****************************************************************************** + + Copyright (C) 2016 Pascal Molin + +******************************************************************************/ + +#include "dlog.h" + +void +dlog_vec_eratos_ph(ulong *v, ulong nv, ulong a, ulong va, ulong M, nmod_t mod, ulong na, nmod_t order) +{ + ulong p, pmax; + dlog_precomp_t pre; + n_primes_t iter; + + /* discrete log on primes */ + pmax = (nv < mod.n) ? nv : mod.n; + dlog_precomp_n_init(pre, a, mod.n, na, n_prime_pi(nv)); + + n_primes_init(iter); + while ((p = n_primes_next(iter)) < pmax) + { + ulong k, pM, wp; + if (mod.n % p == 0) + continue; /* won't be attained another time */ + pM = (M) ? nmod_pow_ui(p, M, mod) : p; + wp = nmod_mul(dlog_precomp(pre, pM), va, order); + for (pM = p; pM < nv; pM *= p) + for (k = pM; k < nv; k += pM) + v[k] = nmod_add(v[k], wp, order); + } + n_primes_clear(iter); +} + +void +dlog_vec_eratos(ulong *v, ulong nv, ulong a, ulong va, nmod_t mod, ulong na, nmod_t order) +{ + dlog_vec_eratos_ph(v, nv, a, va, 1, mod, na, order); +} diff --git a/dlog/vec_loop.c b/dlog/vec_loop.c index 4bd8e792..66640ea0 100644 --- a/dlog/vec_loop.c +++ b/dlog/vec_loop.c @@ -31,7 +31,7 @@ dlog_vec_loop(ulong * v, ulong nv, ulong a, ulong va, nmod_t mod, ulong na, nmod { ulong x, xp; ulong vx = 0; - for(x = a; x != 1; x = nmod_mul(x, a, mod)) + for (x = a; x != 1; x = nmod_mul(x, a, mod)) { vx = nmod_add(vx, va, order); for(xp = x; xp < nv; xp+=mod.n) diff --git a/dlog/vec_sieve.c b/dlog/vec_sieve.c index 9981db93..d2263c39 100644 --- a/dlog/vec_sieve.c +++ b/dlog/vec_sieve.c @@ -24,51 +24,41 @@ ******************************************************************************/ #include "dlog.h" +#include + +#define vbs 0 +#define FACTOR_RATIO 4 static ulong -logp_sieve(log_pair_t * v, ulong nv, ulong p, ulong mod, ulong logm1, nmod_t order, int maxtry) +logp_sieve_gcd(log_pair_t * v, ulong nv, ulong p, nmod_t mod, ulong a, ulong na, ulong loga, ulong logm1, nmod_t order, int maxtry) { - int i, j, nr = 0, nm = 0, ng = 0; - ulong l, pm, logm; + int i, j, nm = 0, ng = 0; + ulong pm, logm; ulong u[2], r[2], t; -#if rnd - flint_rand_t state; - flint_randinit(state); -#endif -#if vbs +#if vbs > 1 flint_printf("\nEnter logp_sieve p=%wu mod %wu...\n", p, mod); #endif - while (1) { - /* find multiplier m */ - logm = 0; - pm = l = p; - do { - nm++; - /* random ? pb when p lies in a small subgroup */ - do { - nr++; -#if rnd - l = 1 + n_randint(state, p - 1); -#else - l = (l > 1) ? l - 1 : p - 1; -#endif - } while (v[l].m != l); - pm *= l; - logm += v[l].logm; - } while (pm < mod); - pm = pm % mod; -#if vbs - flint_printf("[pm=%wu, v[pm]=%ld]", pm, v[pm]); -#endif + pm = p; + logm = 0; + while (nm++ < maxtry) + { + pm = nmod_mul(pm, a, mod); + logm = nmod_add(logm, loga, order); + if (2 * pm > mod.n) + { + pm = nmod_neg(pm, mod); + logm = nmod_add(logm, logm1, order); + } /* half gcd u * pm + v * mod = r, ignore v */ - u[0] = 0; r[0] = mod; + u[0] = 0; r[0] = mod.n; u[1] = 1; r[1] = pm; i = 1; j = 0; /* flip flap */ do { ng++; -#if vbs - flint_printf("[r=%d, v[r]=%d, u=%d, v[u]=%d]\n", - r[i],v[r[i]], u[i], v[u[i]]); +#if vbs > 1 + if (r[i] < nv && u[i] < nv) + flint_printf("[r=%wu, v[r]=%wu, u=%wu, v[u]=%wu -- nv=%wu]\n", + r[i], v[r[i]].m, u[i], v[u[i]].m, nv); #endif if (r[i] < nv && v[r[i]].m == r[i] && u[i] < nv && v[u[i]].m == u[i]) { @@ -77,9 +67,6 @@ logp_sieve(log_pair_t * v, ulong nv, ulong p, ulong mod, ulong logm1, nmod_t ord x = nmod_sub(v[r[i]].logm, nmod_add(v[u[i]].logm, logm, order), order); if (j) x = nmod_add(x, logm1, order); -#if rnd - flint_randclear(state); -#endif return x; } @@ -88,55 +75,237 @@ logp_sieve(log_pair_t * v, ulong nv, ulong p, ulong mod, ulong logm1, nmod_t ord r[i] = r[i] % r[j]; u[i] = u[i] + t * u[j]; /* (-1)^j */ - } while (r[i] > 0 && u[i] < p); - if (nm > maxtry) - return NOT_FOUND; + } while (r[i] > 0 && u[i] < nv); } + return NOT_FOUND; +} + +static ulong +logp_sieve_factor(log_pair_t * v, ulong nv, ulong p, nmod_t mod, ulong a, ulong na, ulong loga, ulong logm1, nmod_t order, int maxtry) +{ + int nm = 0; + ulong pm, logm; + + const ulong * prime; + prime = n_primes_arr_readonly(p); + + pm = p; + logm = 0; + while (pm < mod.n) + { + /*nm++;*/ /* init ignored */ + pm *= a; + logm = nmod_add(logm, loga, order); + } + pm = pm % mod.n; + do { + int i, j, ind[15], exp[15]; + /* find multiplier m */ + if (2 * pm > mod.n) + { + pm = nmod_neg(pm, mod); + logm = nmod_add(logm, logm1, order); + } + + for (i = 0, j = 0; j < p && pm >= nv && 4 * prime[j] < p; j++) + { + int e = n_remove(&pm, prime[j]); + if (e) + { + ind[i] = j; + exp[i] = e; + i++; + } + } + if (pm < nv && v[pm].m == pm) + { + /* goal! */ + ulong x = v[pm].logm; + /* chi(m)*chi(p)=chi(pm)*prod */ + for(j = 0; j < i; j++) + x = nmod_add(x, nmod_mul(exp[j], v[prime[ind[j]]].logm, order), order); + x = nmod_sub(logm, x, order); + /*flint_printf("managed %d mults / %d for p=%wu, pm=%wu\n",nm,maxtry,p,pm);*/ + return x; + } + nm++; + pm = nmod_mul(pm, a, mod); + logm = nmod_add(logm, loga, order); + } while (nm < maxtry); + return NOT_FOUND; +} + +static int +factor_until(ulong * n, ulong nlim, const ulong * p, ulong pmax, ulong * fp, int * fe) +{ + int i, j; + for (i = 0, j = 0; *n >= nlim && p[j] < pmax; j++) + { + int e = n_remove(n, p[j]); + if (e) + { + fp[i] = p[j]; + fe[i] = e; + i++; + } + } + return i; +} + +static ulong +logp_sieve_factorgcd(log_pair_t * v, ulong nv, ulong p, nmod_t mod, ulong a, ulong na, ulong loga, ulong logm1, nmod_t order, int maxtry) +{ + int nm = 0, ng = 0; + ulong pm, logm, pmax; + ulong u[2], r[2], t; + ulong up[15], rp[15]; + int ue[15], re[15]; + const ulong * prime; + prime = n_primes_arr_readonly(p); + pmax = p / FACTOR_RATIO; + pm = p; + logm = 0; + while (nm++ < maxtry) + { + int i, j, iu, ir; + ulong logr; + pm = nmod_mul(pm, a, mod); + logm = nmod_add(logm, loga, order); + /* + if (2 * pm > mod.n) + { + pm = nmod_neg(pm, mod); + logm = nmod_add(logm, logm1, order); + } + */ + /* half gcd u * pm + v * mod = r, ignore v */ + u[0] = 0; r[0] = mod.n; + u[1] = 1; r[1] = pm; + i = 1; j = 0; /* flip flap */ + while (r[i] > u[i]) + { + ng++; + if (r[i] < nv && v[r[i]].m == r[i] && u[i] < nv && v[u[i]].m == u[i]) + { + /* early smooth detection: occurs for primes < 30 bits */ + ulong x; + /* chi(-1)^j*chi(u)*chi(p)*chi(m)=chi(r) */ + x = nmod_sub(v[r[i]].logm, nmod_add(v[u[i]].logm, logm, order), order); + if (j) + x = nmod_add(x, logm1, order); + return x; + } + j = i; i = 1 - i; /* switch */ + t = r[i] / r[j]; + r[i] = r[i] % r[j]; + u[i] = u[i] + t * u[j]; /* (-1)^j */ + + }; + /* try to factor both r[i] and u[i] */ + iu = factor_until(&u[i], nv, prime, pmax, up, ue); + if (u[i] >= nv || v[u[i]].m < u[i]) + continue; + ir = factor_until(&r[i], nv, prime, pmax, rp, re); + if (r[i] >= nv || v[r[i]].m < r[i]) + continue; + /* log(u)+log(p)+log(m)=log(r) */ + logr = 0; + for (i=0; i < ir; i++) + logr = nmod_add(logr, (re[i] * v[rp[i]].logm) % order.n, order); + for (i=0; i < iu; i++) + logm = nmod_add(logm, (ue[i] * v[up[i]].logm) % order.n, order); + return nmod_sub(logr, logm, order); + } + return NOT_FOUND; } void -dlog_vec_sieve(ulong *v, ulong nv, ulong a, ulong va, nmod_t mod, ulong na, nmod_t order) +dlog_vec_sieve_ph(ulong *v, ulong nv, ulong a, ulong va, ulong M, nmod_t mod, ulong na, nmod_t order) { - int maxtry; + ulong smooth = 0, sievecount = 0, logcount = 0, missed = 0; + ulong logcost, limcount; ulong k, p, p1, pmax, logm1; log_pair_t * w; dlog_precomp_t pre; n_primes_t iter; + ulong X, aX, vaX; /* store size */ w = flint_malloc( nv * sizeof(log_pair_t)); for (k = 0; k < nv; k++) { w[k].m = 1; - w[k].logm = 1; - } - w[1].logm = 0; + w[k].logm = 0; /* could be v[k]... */ + } /* discrete log on first primes, then sieve */ pmax = (nv < mod.n) ? nv : mod.n; - p1 = maxtry = 50; /* FIXME: tune this limit! */ + p1 = 50; /* FIXME: tune this limit! */ dlog_precomp_n_init(pre, a, mod.n, na, p1); + /*flint_printf("## single log cost: %wu\n", pre->cost);*/ + logcost = pre->cost; + + if (logcost < 15) + { + /* p1 = pmax; */ + limcount = mod.n; + } + else + { + limcount = ceil(pow((double)mod.n,1./2.3) * 40 / logcost); + logm1 = (mod.n % 2) ? 0 : dlog_precomp(pre, mod.n - 1); + } - logm1 = (mod.n % 2) ? 0 : dlog_precomp(pre, mod.n - 1); + /* find big power of gen */ + X = n_nextprime(na / 2 + 10, 0); + X = (na % 257) ? 257 % na : 1031 % na ; /* FIXME! */ + aX = nmod_pow_ui(a, X, mod); + vaX = nmod_mul(va, X % order.n, order); n_primes_init(iter); while ((p = n_primes_next(iter)) < pmax) { + double cost = log(mod.n)/log(p); ulong m, wp; if (mod.n % p == 0) /* FIXME: those primes could be known... */ continue; /* won't be attained another time */ - if (p < p1 || (wp = logp_sieve(w, nv, p, mod.n, logm1, order, maxtry)) != NOT_FOUND) - wp = nmod_mul(dlog_precomp(pre, p), va, order); + cost = log(mod.n)/log(p); + cost = pow(cost,cost); + sievecount++; + /* if (p < p1 || (wp = logp_sieve(w, nv, p, mod.n, logm1, order, logcost)) == NOT_FOUND) */ + /*if (smooth < limcount || (wp = logp_sieve_factor(w, nv, p, mod.n, a, na, va, logm1, order, logcost)) == NOT_FOUND)*/ + if (logcost < cost || (wp = logp_sieve_factorgcd(w, nv, p, mod, aX, na, vaX, logm1, order, cost)) == NOT_FOUND) + { + if (logcost < cost) + sievecount--; + else + missed++; + logcount++; + wp = nmod_mul(dlog_precomp(pre, nmod_pow_ui(p, M, mod)), va, order); + } for (k = p, m = 1; k < nv; k += p, m++) { - w[k].m *= p; - w[k].logm = nmod_add(w[k].logm, wp, order); + w[k].m = w[m].m * p; + w[k].logm = nmod_add(w[m].logm, wp, order); + if (w[k].m == k) + smooth++; } } /* write in v */ for (k = 0; k < nv; k++) if (v[k] != NOT_FOUND) v[k] = nmod_add(v[k], w[k].logm, order); +#if vbs + if (missed) + flint_printf("[sieve: got %wu / %wu, n = %wu, cost %wu, logs %wu, sieve %wu missed %wu]\n", + smooth, limcount, mod.n, logcost, logcount, sievecount, missed); +#endif n_primes_clear(iter); flint_free(w); } + +void +dlog_vec_sieve(ulong *v, ulong nv, ulong a, ulong va, nmod_t mod, ulong na, nmod_t order) +{ + dlog_vec_sieve_ph(v, nv, a, va, 1, mod, na, order); +}