bugfix; improve arb_fmpz_poly_evaluate_arb and arb_fmpz_poly_evaluate_acb using dot products

This commit is contained in:
fredrik 2021-07-02 15:10:11 +02:00
parent d49264f251
commit d144c31760
5 changed files with 25 additions and 24 deletions

View file

@ -138,8 +138,6 @@ acb_dot_fmpz(acb_t res, const acb_t initial, int subtract, acb_srcptr x, slong x
} }
} }
TMP_END;
arb_dot(((arb_ptr) res) + 0, (initial == NULL) ? NULL : ((arb_srcptr) initial) + 0, subtract, ((arb_srcptr) x) + 0, 2 * xstep, t, 1, len, prec); arb_dot(((arb_ptr) res) + 0, (initial == NULL) ? NULL : ((arb_srcptr) initial) + 0, subtract, ((arb_srcptr) x) + 0, 2 * xstep, t, 1, len, prec);
arb_dot(((arb_ptr) res) + 1, (initial == NULL) ? NULL : ((arb_srcptr) initial) + 1, subtract, ((arb_srcptr) x) + 1, 2 * xstep, t, 1, len, prec); arb_dot(((arb_ptr) res) + 1, (initial == NULL) ? NULL : ((arb_srcptr) initial) + 1, subtract, ((arb_srcptr) x) + 1, 2 * xstep, t, 1, len, prec);

View file

@ -138,8 +138,6 @@ arb_dot_fmpz(arb_t res, const arb_t initial, int subtract, arb_srcptr x, slong x
} }
} }
TMP_END;
arb_dot(res, initial, subtract, x, xstep, t, 1, len, prec); arb_dot(res, initial, subtract, x, xstep, t, 1, len, prec);
TMP_END; TMP_END;

View file

@ -15,7 +15,7 @@ void
_arb_fmpz_poly_evaluate_acb_rectangular(acb_t y, const fmpz * poly, _arb_fmpz_poly_evaluate_acb_rectangular(acb_t y, const fmpz * poly,
slong len, const acb_t x, slong prec) slong len, const acb_t x, slong prec)
{ {
slong i, j, m, r; slong i, m, r;
acb_ptr xs; acb_ptr xs;
acb_t s, t, c; acb_t s, t, c;
@ -36,15 +36,14 @@ _arb_fmpz_poly_evaluate_acb_rectangular(acb_t y, const fmpz * poly,
_acb_vec_set_powers(xs, x, m + 1, prec); _acb_vec_set_powers(xs, x, m + 1, prec);
acb_set_fmpz(y, poly + (r - 1) * m); acb_set_fmpz(y, poly + (r - 1) * m);
for (j = 1; (r - 1) * m + j < len; j++) acb_dot_fmpz(y, y, 0, xs + 1, 1,
acb_addmul_fmpz(y, xs + j, poly + (r - 1) * m + j, prec); poly + (r - 1) * m + 1, 1, len - (r - 1) * m - 1, prec);
for (i = r - 2; i >= 0; i--) for (i = r - 2; i >= 0; i--)
{ {
acb_set_fmpz(s, poly + i * m); acb_set_fmpz(s, poly + i * m);
for (j = 1; j < m; j++) acb_dot_fmpz(s, s, 0, xs + 1, 1,
acb_addmul_fmpz(s, xs + j, poly + i * m + j, prec); poly + i * m + 1, 1, m - 1, prec);
acb_mul(y, y, xs + m, prec); acb_mul(y, y, xs + m, prec);
acb_add(y, y, s, prec); acb_add(y, y, s, prec);
} }

View file

@ -15,17 +15,24 @@ void
_arb_fmpz_poly_evaluate_arb(arb_t res, const fmpz * f, slong len, _arb_fmpz_poly_evaluate_arb(arb_t res, const fmpz * f, slong len,
const arb_t x, slong prec) const arb_t x, slong prec)
{ {
if ((prec >= 1024) && (len >= 5 + 20000 / prec)) if (len >= 6 && len >= 5 + 2500 / (FLINT_MAX(prec, 64) + 64))
{ {
slong fbits; /* todo: improve this tuning? */
if (prec > 1024)
fbits = _fmpz_vec_max_bits(f, len);
if (fbits <= prec / 2)
{ {
_arb_fmpz_poly_evaluate_arb_rectangular(res, f, len, x, prec); slong fbits;
return; fbits = _fmpz_vec_max_bits(f, len);
fbits = FLINT_ABS(fbits);
if (fbits > prec / 2)
{
_arb_fmpz_poly_evaluate_arb_horner(res, f, len, x, prec);
return;
}
} }
_arb_fmpz_poly_evaluate_arb_rectangular(res, f, len, x, prec);
return;
} }
_arb_fmpz_poly_evaluate_arb_horner(res, f, len, x, prec); _arb_fmpz_poly_evaluate_arb_horner(res, f, len, x, prec);

View file

@ -15,7 +15,7 @@ void
_arb_fmpz_poly_evaluate_arb_rectangular(arb_t y, const fmpz * poly, _arb_fmpz_poly_evaluate_arb_rectangular(arb_t y, const fmpz * poly,
slong len, const arb_t x, slong prec) slong len, const arb_t x, slong prec)
{ {
slong i, j, m, r; slong i, m, r;
arb_ptr xs; arb_ptr xs;
arb_t s, t, c; arb_t s, t, c;
@ -36,15 +36,14 @@ _arb_fmpz_poly_evaluate_arb_rectangular(arb_t y, const fmpz * poly,
_arb_vec_set_powers(xs, x, m + 1, prec); _arb_vec_set_powers(xs, x, m + 1, prec);
arb_set_fmpz(y, poly + (r - 1) * m); arb_set_fmpz(y, poly + (r - 1) * m);
for (j = 1; (r - 1) * m + j < len; j++) arb_dot_fmpz(y, y, 0, xs + 1, 1,
arb_addmul_fmpz(y, xs + j, poly + (r - 1) * m + j, prec); poly + (r - 1) * m + 1, 1, len - (r - 1) * m - 1, prec);
for (i = r - 2; i >= 0; i--) for (i = r - 2; i >= 0; i--)
{ {
arb_set_fmpz(s, poly + i * m); arb_set_fmpz(s, poly + i * m);
for (j = 1; j < m; j++) arb_dot_fmpz(s, s, 0, xs + 1, 1,
arb_addmul_fmpz(s, xs + j, poly + i * m + j, prec); poly + i * m + 1, 1, m - 1, prec);
arb_mul(y, y, xs + m, prec); arb_mul(y, y, xs + m, prec);
arb_add(y, y, s, prec); arb_add(y, y, s, prec);
} }