From 5ceaf24774514b6cd0dfa6ad24590e677c7944b9 Mon Sep 17 00:00:00 2001
From: Fredrik Johansson <fredrik.johansson@gmail.com>
Date: Sun, 9 Jun 2013 16:20:44 +0200
Subject: [PATCH] add reciprocal square root functions (fmpr_rsqrt,
 fmprb_rsqrt, fmprb_rsqrt)

---
 doc/source/fmpr.rst  |   6 +++
 doc/source/fmprb.rst |   7 +++
 fmpr.h               |   2 +
 fmpr/rsqrt.c         |  83 ++++++++++++++++++++++++++++++++++
 fmpr/test/t-rsqrt.c  | 103 +++++++++++++++++++++++++++++++++++++++++++
 fmprb.h              |   3 ++
 fmprb/root.c         |   6 +++
 fmprb/rsqrt.c        |  81 ++++++++++++++++++++++++++++++++++
 fmprb/sqrt.c         |   4 +-
 fmprb/test/t-rsqrt.c |  80 +++++++++++++++++++++++++++++++++
 10 files changed, 373 insertions(+), 2 deletions(-)
 create mode 100644 fmpr/rsqrt.c
 create mode 100644 fmpr/test/t-rsqrt.c
 create mode 100644 fmprb/rsqrt.c
 create mode 100644 fmprb/test/t-rsqrt.c

diff --git a/doc/source/fmpr.rst b/doc/source/fmpr.rst
index 70a09f53..fe96fc3f 100644
--- a/doc/source/fmpr.rst
+++ b/doc/source/fmpr.rst
@@ -488,6 +488,12 @@ Arithmetic
     Sets *z* to the square root of *x*, rounded according to *prec* and *rnd*.
     The result is NaN if *x* is negative.
 
+.. function:: long fmpr_rsqrt(fmpr_t z, const fmpr_t x, long prec, fmpr_rnd_t rnd)
+
+    Sets *z* to the reciprocal square root of *x*, rounded according to
+    *prec* and *rnd*. The result is NaN if *x* is negative.
+    At high precision, this is faster than computing a square root.
+
 .. function:: long fmpr_root(fmpr_t z, const fmpr_t x, ulong k, long prec)
 
     Sets *z* to the *k*-th root of *x*, rounded to *prec* bits.
diff --git a/doc/source/fmprb.rst b/doc/source/fmprb.rst
index 73d57f0e..5673a98f 100644
--- a/doc/source/fmprb.rst
+++ b/doc/source/fmprb.rst
@@ -442,6 +442,13 @@ Powers and roots
 
     Sets *z* to `\sqrt{x^2 + y^2}`.
 
+.. function:: void fmprb_rsqrt(fmprb_t z, const fmprb_t x, long prec)
+
+.. function:: void fmprb_rsqrt_ui(fmprb_t z, ulong x, long prec)
+
+    Sets *z* to the reciprocal square root of *x*, rounded to *prec* bits.
+    At high precision, this is faster than computing a square root.
+
 .. function:: void fmprb_root(fmprb_t z, const fmprb_t x, ulong k, long prec)
 
     Sets *z* to the *k*-th root of *x*, rounded to *prec* bits.
diff --git a/fmpr.h b/fmpr.h
index d65c5ba3..f5b74fed 100644
--- a/fmpr.h
+++ b/fmpr.h
@@ -425,6 +425,8 @@ long fmpr_sqrt(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd);
 long fmpr_sqrt_ui(fmpr_t z, ulong x, long prec, fmpr_rnd_t rnd);
 long fmpr_sqrt_fmpz(fmpr_t z, const fmpz_t x, long prec, fmpr_rnd_t rnd);
 
+long fmpr_rsqrt(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd);
+
 long fmpr_root(fmpr_t y, const fmpr_t x, ulong k, long prec, fmpr_rnd_t rnd);
 
 long fmpr_log(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd);
diff --git a/fmpr/rsqrt.c b/fmpr/rsqrt.c
new file mode 100644
index 00000000..53b92ccb
--- /dev/null
+++ b/fmpr/rsqrt.c
@@ -0,0 +1,83 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "fmpr.h"
+
+long
+fmpr_rsqrt(fmpr_t y, const fmpr_t x, long prec, fmpr_rnd_t rnd)
+{
+    long r;
+
+    if (fmpr_is_special(x))
+    {
+        if (fmpr_is_zero(x))
+            fmpr_pos_inf(y);
+        else if (fmpr_is_pos_inf(x))
+            fmpr_zero(y);
+        else
+            fmpr_nan(y);
+
+        return FMPR_RESULT_EXACT;
+    }
+
+    if (fmpr_sgn(x) < 0)
+    {
+        fmpr_nan(y);
+        return FMPR_RESULT_EXACT;
+    }
+
+    /* special case: 4^n */
+    if (fmpz_is_one(fmpr_manref(x)) && fmpz_is_even(fmpr_expref(x)))
+    {
+        r = fmpr_set_round(y, x, prec, rnd);
+        fmpz_tdiv_q_2exp(fmpr_expref(y), fmpr_expref(y), 1);
+        fmpz_neg(fmpr_expref(y), fmpr_expref(y));
+        return r;
+    }
+
+    {
+        fmpr_t t;
+        fmpz_t e;
+
+        fmpr_init(t);
+        fmpz_init(e);
+
+        fmpz_neg(e, fmpr_expref(x));
+        if (fmpz_is_odd(e))
+            fmpz_add_ui(e, e, 1);
+        fmpr_mul_2exp_fmpz(t, x, e);
+
+        CALL_MPFR_FUNC(r, mpfr_rec_sqrt, y, t, prec, rnd);
+
+        fmpz_tdiv_q_2exp(e, e, 1);
+        fmpr_mul_2exp_fmpz(y, y, e);
+
+        fmpr_clear(t);
+        fmpz_clear(e);
+
+        return r;
+    }
+}
+
diff --git a/fmpr/test/t-rsqrt.c b/fmpr/test/t-rsqrt.c
new file mode 100644
index 00000000..5edf4db6
--- /dev/null
+++ b/fmpr/test/t-rsqrt.c
@@ -0,0 +1,103 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "fmpr.h"
+
+
+int main()
+{
+    long iter;
+    flint_rand_t state;
+
+    printf("rsqrt....");
+    fflush(stdout);
+
+    flint_randinit(state);
+
+    for (iter = 0; iter < 100000; iter++)
+    {
+        long bits;
+        fmpr_t x, z, w;
+        mpfr_t X, Z;
+
+        bits = 2 + n_randint(state, 200);
+
+        fmpr_init(x);
+        fmpr_init(z);
+        fmpr_init(w);
+
+        mpfr_init2(X, 2 * (bits + 100));
+        mpfr_init2(Z, bits);
+
+        fmpr_randtest_special(x, state, bits + n_randint(state, 100), 10);
+        fmpr_randtest_special(z, state, bits + n_randint(state, 100), 10);
+
+        fmpr_get_mpfr(X, x, MPFR_RNDN);
+
+        switch (n_randint(state, 4))
+        {
+            case 0:
+                mpfr_rec_sqrt(Z, X, MPFR_RNDZ);
+                fmpr_rsqrt(z, x, bits, FMPR_RND_DOWN);
+                break;
+            case 1:
+                mpfr_rec_sqrt(Z, X, MPFR_RNDA);
+                fmpr_rsqrt(z, x, bits, FMPR_RND_UP);
+                break;
+            case 2:
+                mpfr_rec_sqrt(Z, X, MPFR_RNDD);
+                fmpr_rsqrt(z, x, bits, FMPR_RND_FLOOR);
+                break;
+            case 3:
+                mpfr_rec_sqrt(Z, X, MPFR_RNDU);
+                fmpr_rsqrt(z, x, bits, FMPR_RND_CEIL);
+                break;
+        }
+
+        fmpr_set_mpfr(w, Z);
+
+        if (!fmpr_equal(z, w))
+        {
+            printf("FAIL\n\n");
+            printf("bits = %ld\n", bits);
+            printf("x = "); fmpr_print(x); printf("\n\n");
+            printf("z = "); fmpr_print(z); printf("\n\n");
+            printf("w = "); fmpr_print(w); printf("\n\n");
+            abort();
+        }
+
+        fmpr_clear(x);
+        fmpr_clear(z);
+        fmpr_clear(w);
+
+        mpfr_clear(X);
+        mpfr_clear(Z);
+    }
+
+    flint_randclear(state);
+    _fmpz_cleanup();
+    printf("PASS\n");
+    return EXIT_SUCCESS;
+}
diff --git a/fmprb.h b/fmprb.h
index d4ddddc8..803bc58d 100644
--- a/fmprb.h
+++ b/fmprb.h
@@ -250,6 +250,9 @@ void fmprb_sqrt(fmprb_t z, const fmprb_t x, long prec);
 void fmprb_sqrt_ui(fmprb_t z, ulong x, long prec);
 void fmprb_sqrt_fmpz(fmprb_t z, const fmpz_t x, long prec);
 
+void fmprb_rsqrt(fmprb_t z, const fmprb_t x, long prec);
+void fmprb_rsqrt_ui(fmprb_t z, ulong x, long prec);
+
 void fmprb_sqrtpos(fmprb_t z, const fmprb_t x, long prec);
 
 void fmprb_hypot(fmprb_t z, const fmprb_t x, const fmprb_t y, long prec);
diff --git a/fmprb/root.c b/fmprb/root.c
index 05217b47..82d86c01 100644
--- a/fmprb/root.c
+++ b/fmprb/root.c
@@ -42,6 +42,12 @@ fmprb_root(fmprb_t z, const fmprb_t x, ulong k, long prec)
         return;
     }
 
+    if (k == -2)
+    {
+        fmprb_rsqrt(z, x, prec);
+        return;
+    }
+
     if (fmprb_contains_negative(x))
     {
         fmpr_nan(fmprb_midref(z));
diff --git a/fmprb/rsqrt.c b/fmprb/rsqrt.c
new file mode 100644
index 00000000..0717304c
--- /dev/null
+++ b/fmprb/rsqrt.c
@@ -0,0 +1,81 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "fmprb.h"
+
+void
+fmprb_rsqrt(fmprb_t z, const fmprb_t x, long prec)
+{
+    long r;
+
+    if (fmprb_contains_negative(x))
+    {
+        fmpr_nan(fmprb_midref(z));
+        fmpr_pos_inf(fmprb_radref(z));
+        return;
+    }
+
+    if (fmprb_is_exact(x))
+    {
+        r = fmpr_rsqrt(fmprb_midref(z), fmprb_midref(x), prec, FMPR_RND_DOWN);
+        fmpr_set_error_result(fmprb_radref(z), fmprb_midref(z), r);
+    }
+    else
+    {
+        fmpr_t t, err;
+
+        fmpr_init(t);
+        fmpr_init(err);
+
+        /* lower point */
+        fmpr_sub(t, fmprb_midref(x), fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_DOWN);
+
+        /* error bound: (1/2) t^(-3/2) * rad */
+        fmpr_rsqrt(err, t, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_div(err, err, t, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_mul(err, err, fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_mul_2exp_si(err, err, -1);
+
+        r = fmpr_rsqrt(fmprb_midref(z), fmprb_midref(x), prec, FMPR_RND_DOWN);
+        fmpr_add_error_result(fmprb_radref(z), err, fmprb_midref(z), r,
+            FMPRB_RAD_PREC, FMPR_RND_UP);
+
+        fmpr_clear(t);
+        fmpr_clear(err);
+    }
+
+    fmprb_adjust(z);
+}
+
+void
+fmprb_rsqrt_ui(fmprb_t z, ulong x, long prec)
+{
+    fmprb_t t;
+    fmprb_init(t);
+    fmprb_set_ui(t, x);
+    fmprb_rsqrt(z, t, prec);
+    fmprb_clear(t);
+}
+
diff --git a/fmprb/sqrt.c b/fmprb/sqrt.c
index 10b3b193..946bb5dd 100644
--- a/fmprb/sqrt.c
+++ b/fmprb/sqrt.c
@@ -47,8 +47,8 @@ fmprb_sqrt(fmprb_t z, const fmprb_t x, long prec)
         fmpr_t err;
         fmpr_init(err);
         fmpr_sub(err, fmprb_midref(x), fmprb_radref(x), FMPRB_RAD_PREC, FMPR_RND_DOWN);
-        fmpr_sqrt(err, err, FMPRB_RAD_PREC, FMPR_RND_DOWN);
-        fmpr_div(err, fmprb_radref(x), err, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_rsqrt(err, err, FMPRB_RAD_PREC, FMPR_RND_UP);
+        fmpr_mul(err, fmprb_radref(x), err, FMPRB_RAD_PREC, FMPR_RND_UP);
         fmpr_mul_2exp_si(err, err, -1);
 
         r = fmpr_sqrt(fmprb_midref(z), fmprb_midref(x), prec, FMPR_RND_DOWN);
diff --git a/fmprb/test/t-rsqrt.c b/fmprb/test/t-rsqrt.c
new file mode 100644
index 00000000..9be917ea
--- /dev/null
+++ b/fmprb/test/t-rsqrt.c
@@ -0,0 +1,80 @@
+/*=============================================================================
+
+    This file is part of ARB.
+
+    ARB is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    ARB is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with ARB; if not, write to the Free Software
+    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+=============================================================================*/
+/******************************************************************************
+
+    Copyright (C) 2012 Fredrik Johansson
+
+******************************************************************************/
+
+#include "fmprb.h"
+
+int main()
+{
+    long iter;
+    flint_rand_t state;
+
+    printf("rsqrt....");
+    fflush(stdout);
+
+    flint_randinit(state);
+
+    for (iter = 0; iter < 100000; iter++)
+    {
+        fmprb_t a, b, c;
+        long prec = 2 + n_randint(state, 200);
+
+        fmprb_init(a);
+        fmprb_init(b);
+        fmprb_init(c);
+
+        fmprb_randtest(a, state, 1 + n_randint(state, 200), 10);
+
+        fmprb_rsqrt(b, a, prec);
+
+        fmprb_ui_div(c, 1, b, prec);
+        fmprb_mul(c, c, c, prec);
+
+        if (!fmprb_contains(c, a))
+        {
+            printf("FAIL: containment\n\n");
+            printf("a = "); fmprb_print(a); printf("\n\n");
+            printf("b = "); fmprb_print(b); printf("\n\n");
+            printf("c = "); fmprb_print(c); printf("\n\n");
+            abort();
+        }
+
+        fmprb_rsqrt(a, a, prec);
+
+        if (!fmprb_equal(a, b))
+        {
+            printf("FAIL: aliasing\n\n");
+            abort();
+        }
+
+        fmprb_clear(a);
+        fmprb_clear(b);
+        fmprb_clear(c);
+    }
+
+    flint_randclear(state);
+    _fmpz_cleanup();
+    printf("PASS\n");
+    return EXIT_SUCCESS;
+}