arb_mat: window matrices, block recursive LU factorization and triangular solving

2025-03-05 17:31:38 -05:00 · 2018-06-26 21:42:55 +02:00 · 2018-06-26 21:42:55 +02:00 · 33171f012b
commit 33171f012b
parent ee6883756b
14 changed files with 918 additions and 66 deletions
--- a/arb_mat.h
+++ b/arb_mat.h
@ -64,6 +64,16 @@ arb_mat_swap(arb_mat_t mat1, arb_mat_t mat2)
    *mat2 = t;
 }
 /* Window matrices */
 void arb_mat_window_init(arb_mat_t window, const arb_mat_t mat, slong r1, slong c1, slong r2, slong c2);
 ARB_MAT_INLINE void
 arb_mat_window_clear(arb_mat_t window)
 {
    flint_free(window->rows);
 }
 /* Conversions */
 void arb_mat_set(arb_mat_t dest, const arb_mat_t src);
@ -318,6 +328,16 @@ arb_mat_swap_rows(arb_mat_t mat, slong * perm, slong r, slong s)
 slong arb_mat_find_pivot_partial(const arb_mat_t mat,
                                    slong start_row, slong end_row, slong c);
 void arb_mat_solve_tril_classical(arb_mat_t X, const arb_mat_t L, const arb_mat_t B, int unit, slong prec);
 void arb_mat_solve_tril_recursive(arb_mat_t X, const arb_mat_t L, const arb_mat_t B, int unit, slong prec);
 void arb_mat_solve_tril(arb_mat_t X, const arb_mat_t L, const arb_mat_t B, int unit, slong prec);
 void arb_mat_solve_triu_classical(arb_mat_t X, const arb_mat_t U, const arb_mat_t B, int unit, slong prec);
 void arb_mat_solve_triu_recursive(arb_mat_t X, const arb_mat_t U, const arb_mat_t B, int unit, slong prec);
 void arb_mat_solve_triu(arb_mat_t X, const arb_mat_t U, const arb_mat_t B, int unit, slong prec);
 int arb_mat_lu_classical(slong * P, arb_mat_t LU, const arb_mat_t A, slong prec);
 int arb_mat_lu_recursive(slong * P, arb_mat_t LU, const arb_mat_t A, slong prec);
 int arb_mat_lu(slong * P, arb_mat_t LU, const arb_mat_t A, slong prec);
 void arb_mat_solve_lu_precomp(arb_mat_t X, const slong * perm,
--- a/arb_mat/lu.c
+++ b/arb_mat/lu.c
@ -14,60 +14,9 @@
 int
 arb_mat_lu(slong * P, arb_mat_t LU, const arb_mat_t A, slong prec)
 {
-    arb_t d, e;
+    if (arb_mat_nrows(A) < 8 || arb_mat_ncols(A) < 8)
-    arb_ptr * a;
+        return arb_mat_lu_classical(P, LU, A, prec);
-    slong i, j, m, n, r, row, col;
+    else
-    int result;
+        return arb_mat_lu_recursive(P, LU, A, prec);
    if (arb_mat_is_empty(A))
        return 1;
    m = arb_mat_nrows(A);
    n = arb_mat_ncols(A);
    arb_mat_set(LU, A);
    a = LU->rows;
    row = col = 0;
    for (i = 0; i < m; i++)
        P[i] = i;
    arb_init(d);
    arb_init(e);
    result = 1;
    while (row < m && col < n)
    {
        r = arb_mat_find_pivot_partial(LU, row, m, col);
        if (r == -1)
        {
            result = 0;
            break;
        }
        else if (r != row)
            arb_mat_swap_rows(LU, P, row, r);
        arb_set(d, a[row] + col);
        for (j = row + 1; j < m; j++)
        {
            arb_div(e, a[j] + col, d, prec);
            arb_neg(e, e);
            _arb_vec_scalar_addmul(a[j] + col,
                a[row] + col, n - col, e, prec);
            arb_zero(a[j] + col);
            arb_neg(a[j] + row, e);
 }
        row++;
        col++;
    }
    arb_clear(d);
    arb_clear(e);
    return result;
 }
--- a/arb_mat/lu_classical.c
+++ b/arb_mat/lu_classical.c
@ -0,0 +1,74 @@
 /*
    Copyright (C) 2012 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 int
 arb_mat_lu_classical(slong * P, arb_mat_t LU, const arb_mat_t A, slong prec)
 {
    arb_t d, e;
    arb_ptr * a;
    slong i, j, m, n, r, row, col;
    int result;
    if (arb_mat_is_empty(A))
        return 1;
    m = arb_mat_nrows(A);
    n = arb_mat_ncols(A);
    arb_mat_set(LU, A);
    a = LU->rows;
    row = col = 0;
    for (i = 0; i < m; i++)
        P[i] = i;
    arb_init(d);
    arb_init(e);
    result = 1;
    while (row < m && col < n)
    {
        r = arb_mat_find_pivot_partial(LU, row, m, col);
        if (r == -1)
        {
            result = 0;
            break;
        }
        else if (r != row)
            arb_mat_swap_rows(LU, P, row, r);
        arb_set(d, a[row] + col);
        for (j = row + 1; j < m; j++)
        {
            arb_div(e, a[j] + col, d, prec);
            arb_neg(e, e);
            _arb_vec_scalar_addmul(a[j] + col,
                a[row] + col, n - col, e, prec);
            arb_zero(a[j] + col);
            arb_neg(a[j] + row, e);
        }
        row++;
        col++;
    }
    arb_clear(d);
    arb_clear(e);
    return result;
 }
--- a/arb_mat/lu_recursive.c
+++ b/arb_mat/lu_recursive.c
@ -0,0 +1,113 @@
 /*
    Copyright (C) 2018 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 static void
 _apply_permutation(slong * AP, arb_mat_t A, slong * P,
    slong n, slong offset)
 {
    if (n != 0)
    {
        arb_ptr * Atmp;
        slong * APtmp;
        slong i;
        Atmp = flint_malloc(sizeof(arb_ptr) * n);
        APtmp = flint_malloc(sizeof(slong) * n);
        for (i = 0; i < n; i++) Atmp[i] = A->rows[P[i] + offset];
        for (i = 0; i < n; i++) A->rows[i + offset] = Atmp[i];
        for (i = 0; i < n; i++) APtmp[i] = AP[P[i] + offset];
        for (i = 0; i < n; i++) AP[i + offset] = APtmp[i];
        flint_free(Atmp);
        flint_free(APtmp);
    }
 }
 int
 arb_mat_lu_recursive(slong * P, arb_mat_t LU, const arb_mat_t A, slong prec)
 {
    slong i, m, n, r1, r2, n1;
    arb_mat_t A0, A1, A00, A01, A10, A11;
    slong * P1;
    m = A->r;
    n = A->c;
    if (m <= 1 || n <= 1)
    {
        return arb_mat_lu_classical(P, LU, A, prec);
    }
    if (LU != A)
        arb_mat_set(LU, A);
    n1 = n / 2;
    for (i = 0; i < m; i++)
        P[i] = i;
    P1 = flint_malloc(sizeof(slong) * m);
    arb_mat_window_init(A0, LU, 0, 0, m, n1);
    arb_mat_window_init(A1, LU, 0, n1, m, n);
    r1 = arb_mat_lu(P1, A0, A0, prec);
    if (!r1)
    {
        flint_free(P1);
        arb_mat_window_clear(A0);
        arb_mat_window_clear(A1);
        return 0;
    }
    /* r1 = rank of A0 */
    r1 = FLINT_MIN(m, n1);
    _apply_permutation(P, LU, P1, m, 0);
    arb_mat_window_init(A00, LU, 0, 0, r1, r1);
    arb_mat_window_init(A10, LU, r1, 0, m, r1);
    arb_mat_window_init(A01, LU, 0, n1, r1, n);
    arb_mat_window_init(A11, LU, r1, n1, m, n);
    arb_mat_solve_tril(A01, A00, A01, 1, prec);
    {
        /* arb_mat_submul(A11, A11, A10, A01, prec); */
        arb_mat_t T;
        arb_mat_init(T, A10->r, A01->c);
        arb_mat_mul(T, A10, A01, prec);
        arb_mat_sub(A11, A11, T, prec);
        arb_mat_clear(T);
    }
    r2 = arb_mat_lu(P1, A11, A11, prec);
    if (!r2)
        r1 = r2 = 0;
    else
        _apply_permutation(P, LU, P1, m - r1, r1);
    flint_free(P1);
    arb_mat_window_clear(A00);
    arb_mat_window_clear(A01);
    arb_mat_window_clear(A10);
    arb_mat_window_clear(A11);
    arb_mat_window_clear(A0);
    arb_mat_window_clear(A1);
    return r1 && r2;
 }
--- a/arb_mat/solve_lu_precomp.c
+++ b/arb_mat/solve_lu_precomp.c
@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2012 Fredrik Johansson
+    Copyright (C) 2012,2018 Fredrik Johansson
    This file is part of Arb.
@ -46,6 +46,15 @@ arb_mat_solve_lu_precomp(arb_mat_t X, const slong * perm,
        }
    }
    /* todo: solve_tril and solve_triu have some overhead; should be
       able to eliminate the basecase code below */
    if (n >= 8 && m >= 8)
    {
        arb_mat_solve_tril(X, A, X, 1, prec);
        arb_mat_solve_triu(X, A, X, 0, prec);
        return;
    }
    for (c = 0; c < m; c++)
    {
        /* solve Ly = b */
--- a/arb_mat/solve_tril.c
+++ b/arb_mat/solve_tril.c
@ -0,0 +1,108 @@
 /*
    Copyright (C) 2018 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 void
 arb_mat_solve_tril_classical(arb_mat_t X,
        const arb_mat_t L, const arb_mat_t B, int unit, slong prec)
 {
    slong i, j, k, n, m;
    arb_ptr tmp;
    arb_t s;
    n = L->r;
    m = B->c;
    arb_init(s);
    tmp = _arb_vec_init(n);
    for (i = 0; i < m; i++)
    {
        for (j = 0; j < n; j++)
            arb_set(tmp + j, arb_mat_entry(X, j, i));
        for (j = 0; j < n; j++)
        {
            arb_zero(s);
            for (k = 0; k < j; k++)
                arb_addmul(s, L->rows[j] + k, tmp + k, prec);
            arb_sub(s, arb_mat_entry(B, j, i), s, prec);
            if (!unit)
                arb_div(s, s, arb_mat_entry(L, j, j), prec);
            arb_set(tmp + j, s);
        }
        for (j = 0; j < n; j++)
            arb_set(arb_mat_entry(X, j, i), tmp + j);
    }
    _arb_vec_clear(tmp, n);
    arb_clear(s);
 }
 void
 arb_mat_solve_tril_recursive(arb_mat_t X,
        const arb_mat_t L, const arb_mat_t B, int unit, slong prec)
 {
    arb_mat_t LA, LC, LD, XX, XY, BX, BY, T;
    slong r, n, m;
    n = L->r;
    m = B->c;
    r = n / 2;
    if (n == 0 || m == 0)
        return;
    /*
    Denoting inv(M) by M^, we have:
    [A 0]^ [X]  ==  [A^          0 ] [X]  ==  [A^ X]
    [C D]  [Y]  ==  [-D^ C A^    D^] [Y]  ==  [D^ (Y - C A^ X)]
    */
    arb_mat_window_init(LA, L, 0, 0, r, r);
    arb_mat_window_init(LC, L, r, 0, n, r);
    arb_mat_window_init(LD, L, r, r, n, n);
    arb_mat_window_init(BX, B, 0, 0, r, m);
    arb_mat_window_init(BY, B, r, 0, n, m);
    arb_mat_window_init(XX, X, 0, 0, r, m);
    arb_mat_window_init(XY, X, r, 0, n, m);
    arb_mat_solve_tril(XX, LA, BX, unit, prec);
    /* arb_mat_submul(XY, BY, LC, XX); */
    arb_mat_init(T, LC->r, BX->c);
    arb_mat_mul(T, LC, XX, prec);
    arb_mat_sub(XY, BY, T, prec);
    arb_mat_clear(T);
    arb_mat_solve_tril(XY, LD, XY, unit, prec);
    arb_mat_window_clear(LA);
    arb_mat_window_clear(LC);
    arb_mat_window_clear(LD);
    arb_mat_window_clear(BX);
    arb_mat_window_clear(BY);
    arb_mat_window_clear(XX);
    arb_mat_window_clear(XY);
 }
 void
 arb_mat_solve_tril(arb_mat_t X, const arb_mat_t L,
                                    const arb_mat_t B, int unit, slong prec)
 {
    if (B->r < 8 || B->c < 8)
        arb_mat_solve_tril_classical(X, L, B, unit, prec);
    else
        arb_mat_solve_tril_recursive(X, L, B, unit, prec);
 }
--- a/arb_mat/solve_triu.c
+++ b/arb_mat/solve_triu.c
@ -0,0 +1,110 @@
 /*
    Copyright (C) 2018 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 void
 arb_mat_solve_triu_classical(arb_mat_t X, const arb_mat_t U,
    const arb_mat_t B, int unit, slong prec)
 {
    slong i, j, k, n, m;
    arb_ptr tmp;
    arb_t s;
    n = U->r;
    m = B->c;
    tmp = _arb_vec_init(n);
    arb_init(s);
    for (i = 0; i < m; i++)
    {
        for (j = 0; j < n; j++)
            arb_set(tmp + j, arb_mat_entry(X, j, i));
        for (j = n - 1; j >= 0; j--)
        {
            arb_zero(s);
            for (k = 0; k < n - j - 1; k++)
                arb_addmul(s, U->rows[j] + j + 1 + k, tmp + j + 1 + k, prec);
            arb_sub(s, arb_mat_entry(B, j, i), s, prec);
            if (!unit)
                arb_div(s, s, arb_mat_entry(U, j, j), prec);
            arb_set(tmp + j, s);
        }
        for (j = 0; j < n; j++)
            arb_set(arb_mat_entry(X, j, i), tmp + j);
    }
    _arb_vec_clear(tmp, n);
    arb_clear(s);
 }
 void
 arb_mat_solve_triu_recursive(arb_mat_t X,
        const arb_mat_t U, const arb_mat_t B, int unit, slong prec)
 {
    arb_mat_t UA, UB, UD, XX, XY, BX, BY, T;
    slong r, n, m;
    n = U->r;
    m = B->c;
    r = n / 2;
    if (n == 0 || m == 0)
        return;
    /*
    Denoting inv(M) by M^, we have:
    [A B]^ [X]  ==  [A^ (X - B D^ Y)]
    [0 D]  [Y]  ==  [    D^ Y       ]
    */
    arb_mat_window_init(UA, U, 0, 0, r, r);
    arb_mat_window_init(UB, U, 0, r, r, n);
    arb_mat_window_init(UD, U, r, r, n, n);
    arb_mat_window_init(BX, B, 0, 0, r, m);
    arb_mat_window_init(BY, B, r, 0, n, m);
    arb_mat_window_init(XX, X, 0, 0, r, m);
    arb_mat_window_init(XY, X, r, 0, n, m);
    arb_mat_solve_triu(XY, UD, BY, unit, prec);
    /* arb_mat_submul(XX, BX, UB, XY); */
    arb_mat_init(T, UB->r, XY->c);
    arb_mat_mul(T, UB, XY, prec);
    arb_mat_sub(XX, BX, T, prec);
    arb_mat_clear(T);
    arb_mat_solve_triu(XX, UA, XX, unit, prec);
    arb_mat_window_clear(UA);
    arb_mat_window_clear(UB);
    arb_mat_window_clear(UD);
    arb_mat_window_clear(BX);
    arb_mat_window_clear(BY);
    arb_mat_window_clear(XX);
    arb_mat_window_clear(XY);
 }
 void
 arb_mat_solve_triu(arb_mat_t X, const arb_mat_t U,
                                    const arb_mat_t B, int unit, slong prec)
 {
    if (B->r < 8 || B->c < 8)
        arb_mat_solve_triu_classical(X, U, B, unit, prec);
    else
        arb_mat_solve_triu_recursive(X, U, B, unit, prec);
 }
--- a/arb_mat/test/t-lu.c
+++ b/arb_mat/test/t-lu.c
@ -32,14 +32,14 @@ int main()
    flint_randinit(state);
-    for (iter = 0; iter < 100000 * arb_test_multiplier(); iter++)
+    for (iter = 0; iter < 10000 * arb_test_multiplier(); iter++)
    {
        fmpq_mat_t Q;
        arb_mat_t A, LU, P, L, U, T;
        slong i, j, n, qbits, prec, *perm;
        int q_invertible, r_invertible;
-        n = n_randint(state, 8);
+        n = n_randint(state, 10);
        qbits = 1 + n_randint(state, 100);
        prec = 2 + n_randint(state, 202);
--- a/arb_mat/test/t-lu_recursive.c
+++ b/arb_mat/test/t-lu_recursive.c
@ -0,0 +1,177 @@
 /*
    Copyright (C) 2012 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 int fmpq_mat_is_invertible(const fmpq_mat_t A)
 {
    int r;
    fmpq_t t;
    fmpq_init(t);
    fmpq_mat_det(t, A);
    r = !fmpq_is_zero(t);
    fmpq_clear(t);
    return r;
 }
 int main()
 {
    slong iter;
    flint_rand_t state;
    flint_printf("lu_recursive....");
    fflush(stdout);
    flint_randinit(state);
    /* Dummy test with rectangular matrices. Rectangular matrices are
       not actually supported (the output may be bogus), but the algorithm
       should at least not crash. */
    for (iter = 0; iter < 1000 * arb_test_multiplier(); iter++)
    {
        slong m, n, prec;
        slong *perm;
        arb_mat_t A, LU;
        n = n_randint(state, 20);
        m = n_randint(state, 20);
        prec = 2 + n_randint(state, 200);
        arb_mat_init(A, n, m);
        arb_mat_init(LU, n, m);
        perm = _perm_init(n);
        arb_mat_randtest(A, state, prec, 10);
        if (n_randint(state, 2))
        {
            arb_mat_lu_recursive(perm, LU, A, prec);
        }
        else
        {
            arb_mat_set(LU, A);
            arb_mat_lu_recursive(perm, LU, LU, prec);
        }
        arb_mat_clear(A);
        arb_mat_clear(LU);
        _perm_clear(perm);
    }
    for (iter = 0; iter < 2000 * arb_test_multiplier(); iter++)
    {
        fmpq_mat_t Q;
        arb_mat_t A, LU, P, L, U, T;
        slong i, j, n, qbits, prec, *perm;
        int q_invertible, r_invertible;
        n = n_randint(state, 20);
        qbits = 1 + n_randint(state, 100);
        prec = 2 + n_randint(state, 202);
        fmpq_mat_init(Q, n, n);
        arb_mat_init(A, n, n);
        arb_mat_init(LU, n, n);
        arb_mat_init(P, n, n);
        arb_mat_init(L, n, n);
        arb_mat_init(U, n, n);
        arb_mat_init(T, n, n);
        perm = _perm_init(n);
        fmpq_mat_randtest(Q, state, qbits);
        q_invertible = fmpq_mat_is_invertible(Q);
        if (!q_invertible)
        {
            arb_mat_set_fmpq_mat(A, Q, prec);
            r_invertible = arb_mat_lu_recursive(perm, LU, A, prec);
            if (r_invertible)
            {
                flint_printf("FAIL: matrix is singular over Q but not over R\n");
                flint_printf("n = %wd, prec = %wd\n", n, prec);
                flint_printf("\n");
                flint_printf("Q = \n"); fmpq_mat_print(Q); flint_printf("\n\n");
                flint_printf("A = \n"); arb_mat_printd(A, 15); flint_printf("\n\n");
                flint_printf("LU = \n"); arb_mat_printd(LU, 15); flint_printf("\n\n");
            }
        }
        else
        {
            /* now this must converge */
            while (1)
            {
                arb_mat_set_fmpq_mat(A, Q, prec);
                r_invertible = arb_mat_lu_recursive(perm, LU, A, prec);
                if (r_invertible)
                {
                    break;
                }
                else
                {
                    if (prec > 10000)
                    {
                        flint_printf("FAIL: failed to converge at 10000 bits\n");
                        flint_abort();
                    }
                    prec *= 2;
                }
            }
            arb_mat_one(L);
            for (i = 0; i < n; i++)
                for (j = 0; j < i; j++)
                    arb_set(arb_mat_entry(L, i, j),
                        arb_mat_entry(LU, i, j));
            for (i = 0; i < n; i++)
                for (j = i; j < n; j++)
                    arb_set(arb_mat_entry(U, i, j),
                        arb_mat_entry(LU, i, j));
            for (i = 0; i < n; i++)
                arb_one(arb_mat_entry(P, perm[i], i));
            arb_mat_mul(T, P, L, prec);
            arb_mat_mul(T, T, U, prec);
            if (!arb_mat_contains_fmpq_mat(T, Q))
            {
                flint_printf("FAIL (containment, iter = %wd)\n", iter);
                flint_printf("n = %wd, prec = %wd\n", n, prec);
                flint_printf("\n");
                flint_printf("Q = \n"); fmpq_mat_print(Q); flint_printf("\n\n");
                flint_printf("A = \n"); arb_mat_printd(A, 15); flint_printf("\n\n");
                flint_printf("LU = \n"); arb_mat_printd(LU, 15); flint_printf("\n\n");
                flint_printf("L = \n"); arb_mat_printd(L, 15); flint_printf("\n\n");
                flint_printf("U = \n"); arb_mat_printd(U, 15); flint_printf("\n\n");
                flint_printf("P*L*U = \n"); arb_mat_printd(T, 15); flint_printf("\n\n");
                flint_abort();
            }
        }
        fmpq_mat_clear(Q);
        arb_mat_clear(A);
        arb_mat_clear(LU);
        arb_mat_clear(P);
        arb_mat_clear(L);
        arb_mat_clear(U);
        arb_mat_clear(T);
        _perm_clear(perm);
    }
    flint_randclear(state);
    flint_cleanup();
    flint_printf("PASS\n");
    return EXIT_SUCCESS;
 }
--- a/arb_mat/test/t-solve_lu.c
+++ b/arb_mat/test/t-solve_lu.c
@ -28,8 +28,16 @@ int main()
        slong n, m, qbits, prec;
        int q_invertible, r_invertible, r_invertible2;
        if (n_randint(state, 50) == 0)
        {
            n = n_randint(state, 20);
            m = n_randint(state, 20);
        }
        else
        {
            n = n_randint(state, 8);
            m = n_randint(state, 8);
        }
        qbits = 1 + n_randint(state, 30);
        prec = 2 + n_randint(state, 200);
--- a/arb_mat/test/t-solve_tril.c
+++ b/arb_mat/test/t-solve_tril.c
@ -0,0 +1,99 @@
 /*
    Copyright (C) 2018 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 int main()
 {
    slong iter;
    flint_rand_t state;
    flint_printf("solve_tril....");
    fflush(stdout);
    flint_randinit(state);
    for (iter = 0; iter < 2000 * arb_test_multiplier(); iter++)
    {
        arb_mat_t A, X, B, Y;
        slong rows, cols, prec, i, j;
        int unit;
        prec = 2 + n_randint(state, 200);
        rows = n_randint(state, 30);
        cols = n_randint(state, 30);
        unit = n_randint(state, 2);
        arb_mat_init(A, rows, rows);
        arb_mat_init(B, rows, cols);
        arb_mat_init(X, rows, cols);
        arb_mat_init(Y, rows, cols);
        arb_mat_randtest(A, state, prec, 10);
        arb_mat_randtest(X, state, prec, 10);
        arb_mat_randtest(Y, state, prec, 10);
        for (i = 0; i < rows; i++)
        {
            if (unit)
                arb_one(arb_mat_entry(A, i, i));
            else
                arb_set_ui(arb_mat_entry(A, i, i), 1 + n_randint(state, 100));
            for (j = i + 1; j < rows; j++)
                arb_zero(arb_mat_entry(A, i, j));
        }
        arb_mat_mul(B, A, X, prec);
        if (unit)  /* check that diagonal entries are ignored */
        {
            for (i = 0; i < rows; i++)
                arb_set_ui(arb_mat_entry(A, i, i), 1 + n_randint(state, 100));
        }
        /* Check Y = A^(-1) * (A * X) = X */
        arb_mat_solve_tril(Y, A, B, unit, prec);
        if (!arb_mat_overlaps(Y, X))
        {
            flint_printf("FAIL\n");
            flint_printf("A = \n"); arb_mat_printd(A, 10); flint_printf("\n\n");
            flint_printf("B = \n"); arb_mat_printd(B, 10); flint_printf("\n\n");
            flint_printf("X = \n"); arb_mat_printd(X, 10); flint_printf("\n\n");
            flint_printf("Y = \n"); arb_mat_printd(Y, 10); flint_printf("\n\n");
            flint_abort();
        }
        /* Check aliasing */
        arb_mat_solve_tril(B, A, B, unit, prec);
        if (!arb_mat_equal(B, Y))
        {
            flint_printf("FAIL (aliasing)\n");
            flint_printf("A = \n"); arb_mat_printd(A, 10); flint_printf("\n\n");
            flint_printf("B = \n"); arb_mat_printd(B, 10); flint_printf("\n\n");
            flint_printf("X = \n"); arb_mat_printd(X, 10); flint_printf("\n\n");
            flint_printf("Y = \n"); arb_mat_printd(Y, 10); flint_printf("\n\n");
            flint_abort();
        }
        arb_mat_clear(A);
        arb_mat_clear(B);
        arb_mat_clear(X);
        arb_mat_clear(Y);
    }
    flint_randclear(state);
    flint_cleanup();
    flint_printf("PASS\n");
    return EXIT_SUCCESS;
 }
--- a/arb_mat/test/t-solve_triu.c
+++ b/arb_mat/test/t-solve_triu.c
@ -0,0 +1,99 @@
 /*
    Copyright (C) 2018 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 int main()
 {
    slong iter;
    flint_rand_t state;
    flint_printf("solve_triu....");
    fflush(stdout);
    flint_randinit(state);
    for (iter = 0; iter < 2000 * arb_test_multiplier(); iter++)
    {
        arb_mat_t A, X, B, Y;
        slong rows, cols, prec, i, j;
        int unit;
        prec = 2 + n_randint(state, 200);
        rows = n_randint(state, 30);
        cols = n_randint(state, 30);
        unit = n_randint(state, 2);
        arb_mat_init(A, rows, rows);
        arb_mat_init(B, rows, cols);
        arb_mat_init(X, rows, cols);
        arb_mat_init(Y, rows, cols);
        arb_mat_randtest(A, state, prec, 10);
        arb_mat_randtest(X, state, prec, 10);
        arb_mat_randtest(Y, state, prec, 10);
        for (i = 0; i < rows; i++)
        {
            if (unit)
                arb_one(arb_mat_entry(A, i, i));
            else
                arb_set_ui(arb_mat_entry(A, i, i), 1 + n_randint(state, 100));
            for (j = 0; j < i; j++)
                arb_zero(arb_mat_entry(A, i, j));
        }
        arb_mat_mul(B, A, X, prec);
        if (unit)  /* check that diagonal entries are ignored */
        {
            for (i = 0; i < rows; i++)
                arb_set_ui(arb_mat_entry(A, i, i), 1 + n_randint(state, 100));
        }
        /* Check Y = A^(-1) * (A * X) = X */
        arb_mat_solve_triu(Y, A, B, unit, prec);
        if (!arb_mat_overlaps(Y, X))
        {
            flint_printf("FAIL\n");
            flint_printf("A = \n"); arb_mat_printd(A, 10); flint_printf("\n\n");
            flint_printf("B = \n"); arb_mat_printd(B, 10); flint_printf("\n\n");
            flint_printf("X = \n"); arb_mat_printd(X, 10); flint_printf("\n\n");
            flint_printf("Y = \n"); arb_mat_printd(Y, 10); flint_printf("\n\n");
            flint_abort();
        }
        /* Check aliasing */
        arb_mat_solve_triu(B, A, B, unit, prec);
        if (!arb_mat_equal(B, Y))
        {
            flint_printf("FAIL (aliasing)\n");
            flint_printf("A = \n"); arb_mat_printd(A, 10); flint_printf("\n\n");
            flint_printf("B = \n"); arb_mat_printd(B, 10); flint_printf("\n\n");
            flint_printf("X = \n"); arb_mat_printd(X, 10); flint_printf("\n\n");
            flint_printf("Y = \n"); arb_mat_printd(Y, 10); flint_printf("\n\n");
            flint_abort();
        }
        arb_mat_clear(A);
        arb_mat_clear(B);
        arb_mat_clear(X);
        arb_mat_clear(Y);
    }
    flint_randclear(state);
    flint_cleanup();
    flint_printf("PASS\n");
    return EXIT_SUCCESS;
 }
--- a/arb_mat/window_init.c
+++ b/arb_mat/window_init.c
@ -0,0 +1,29 @@
 /*
    Copyright (C) 2018 Fredrik Johansson
    This file is part of Arb.
    Arb is free software: you can redistribute it and/or modify it under
    the terms of the GNU Lesser General Public License (LGPL) as published
    by the Free Software Foundation; either version 2.1 of the License, or
    (at your option) any later version.  See <http://www.gnu.org/licenses/>.
 */
 #include "arb_mat.h"
 void
 arb_mat_window_init(arb_mat_t window, const arb_mat_t mat,
    slong r1, slong c1, slong r2, slong c2)
 {
    slong i;
    window->entries = NULL;
    window->rows = flint_malloc((r2 - r1) * sizeof(arb_ptr));
    for (i = 0; i < r2 - r1; i++)
        window->rows[i] = mat->rows[r1 + i] + c1;
    window->r = r2 - r1;
    window->c = c2 - c1;
 }
--- a/doc/source/arb_mat.rst
+++ b/doc/source/arb_mat.rst
@ -58,6 +58,16 @@ Memory management
    The count excludes the size of the structure itself. Add
    ``sizeof(arb_mat_struct)`` to get the size of the object as a whole.
 .. function:: void arb_mat_window_init(arb_mat_t window, const arb_mat_t mat, slong r1, slong c1, slong r2, slong c2)
    Initializes *window* to a window matrix into the submatrix of *mat*
    starting at the corner at row *r1* and column *c1* (inclusive) and ending
    at row *r2* and column *c2* (exclusive).
 .. function:: void arb_mat_window_clear(arb_mat_t window)
    Frees the submatrix.
 Conversions
 -------------------------------------------------------------------------------
@ -239,16 +249,24 @@ Arithmetic
 .. function:: void arb_mat_mul_threaded(arb_mat_t C, const arb_mat_t A, const arb_mat_t B, slong prec)
 .. function:: void arb_mat_mul_block(arb_mat_t C, const arb_mat_t A, const arb_mat_t B, slong prec)
 .. function:: void arb_mat_mul(arb_mat_t res, const arb_mat_t mat1, const arb_mat_t mat2, slong prec)
    Sets *res* to the matrix product of *mat1* and *mat2*. The operands must have
    compatible dimensions for matrix multiplication.
-    The *threaded* version splits the computation
+    The *classical* version performs matrix multiplication in the trivial way.
-    over the number of threads returned by *flint_get_num_threads()*.
+
-    The default version automatically calls the *threaded* version
+    The *block* version decomposes the input matrices into one or several
-    if the matrices are sufficiently large and more than one thread
+    blocks of uniformly scaled matrices and multiplies 
-    can be used.
+    large blocks via *fmpz_mat_mul*. It also invokes
    :func:`_arb_mat_addmul_rad_mag_fast` for the radius matrix multiplications.
    The *threaded* version performs classical multiplication but splits the
    computation over the number of threads returned by *flint_get_num_threads()*.
    The default version chooses an algorithm automatically.
 .. function:: void arb_mat_mul_entrywise(arb_mat_t C, const arb_mat_t A, const arb_mat_t B, slong prec)
@ -267,6 +285,14 @@ Arithmetic
    Sets *res* to *mat* raised to the power *exp*. Requires that *mat*
    is a square matrix.
 .. function:: void _arb_mat_addmul_rad_mag_fast(arb_mat_t C, mag_srcptr A, mag_srcptr B, slong ar, slong ac, slong bc)
    Helper function for matrix multiplication.
    Adds to the radii of *C* the matrix product of the matrices represented
    by *A* and *B*, where *A* is a linear array of coefficients in row-major
    order and *B* is a linear array of coefficients in column-major order. 
    This function assumes that all exponents are small and is unsafe
    for general use.
 Scalar arithmetic
 -------------------------------------------------------------------------------
@ -303,6 +329,10 @@ Scalar arithmetic
 Gaussian elimination and solving
 -------------------------------------------------------------------------------
 .. function:: int arb_mat_lu_classical(slong * perm, arb_mat_t LU, const arb_mat_t A, slong prec)
 .. function:: int arb_mat_lu_recursive(slong * perm, arb_mat_t LU, const arb_mat_t A, slong prec)
 .. function:: int arb_mat_lu(slong * perm, arb_mat_t LU, const arb_mat_t A, slong prec)
    Given an `n \times n` matrix `A`, computes an LU decomposition `PLU = A`
@ -323,6 +353,33 @@ Gaussian elimination and solving
    computed to insufficient precision, or the LU decomposition was
    attempted at insufficient precision.
    The *classical* version uses Gaussian elimination directly while
    the *recursive* version performs the computation in a block recursive
    way to benefit from fast matrix multiplication. The default version
    chooses an algorithm automatically.
 .. function:: void arb_mat_solve_tril_classical(arb_mat_t X, const arb_mat_t L, const arb_mat_t B, int unit, slong prec)
 .. function:: void arb_mat_solve_tril_recursive(arb_mat_t X, const arb_mat_t L, const arb_mat_t B, int unit, slong prec)
 .. function:: void arb_mat_solve_tril(arb_mat_t X, const arb_mat_t L, const arb_mat_t B, int unit, slong prec)
 .. function:: void arb_mat_solve_triu_classical(arb_mat_t X, const arb_mat_t U, const arb_mat_t B, int unit, slong prec)
 .. function:: void arb_mat_solve_triu_recursive(arb_mat_t X, const arb_mat_t U, const arb_mat_t B, int unit, slong prec)
 .. function:: void arb_mat_solve_triu(arb_mat_t X, const arb_mat_t U, const arb_mat_t B, int unit, slong prec)
    Solves the lower triangular system `LX = B` or the upper triangular system
    `UX = B`, respectively. If *unit* is set, the main diagonal of *L* or *U*
    is taken to consist of all ones, and in that case the actual entries on
    the diagonal are not read at all and can contain other data.
    The *classical* versions perform the computations iteratively while the
    *recursive* versions perform the computations in a block recursive
    way to benefit from fast matrix multiplication. The default versions
    choose an algorithm automatically.
 .. function:: void arb_mat_solve_lu_precomp(arb_mat_t X, const slong * perm, const arb_mat_t LU, const arb_mat_t B, slong prec)
    Solves `AX = B` given the precomputed nonsingular LU decomposition `A = PLU`.