initial use of arb_dot and acb_dot in matrix multiplication (cutoffs not updated)

This commit is contained in:
fredrik 2018-08-26 21:02:14 +02:00
parent a682aedb61
commit 41176f59db
4 changed files with 108 additions and 40 deletions

View file

@ -1,5 +1,5 @@
/*
Copyright (C) 2012 Fredrik Johansson
Copyright (C) 2012, 2018 Fredrik Johansson
This file is part of Arb.
@ -49,21 +49,47 @@ acb_mat_mul_classical(acb_mat_t C, const acb_mat_t A, const acb_mat_t B, slong p
return;
}
for (i = 0; i < ar; i++)
if (br <= 2)
{
for (j = 0; j < bc; j++)
for (i = 0; i < ar; i++)
{
acb_mul(acb_mat_entry(C, i, j),
acb_mat_entry(A, i, 0),
acb_mat_entry(B, 0, j), prec);
for (k = 1; k < br; k++)
for (j = 0; j < bc; j++)
{
acb_addmul(acb_mat_entry(C, i, j),
acb_mat_entry(A, i, k),
acb_mat_entry(B, k, j), prec);
/* todo: efficient fmma code */
acb_mul(acb_mat_entry(C, i, j),
acb_mat_entry(A, i, 0),
acb_mat_entry(B, 0, j), prec);
for (k = 1; k < br; k++)
{
acb_addmul(acb_mat_entry(C, i, j),
acb_mat_entry(A, i, k),
acb_mat_entry(B, k, j), prec);
}
}
}
}
}
else
{
acb_ptr tmp;
TMP_INIT;
TMP_START;
tmp = TMP_ALLOC(sizeof(acb_struct) * br * bc);
for (i = 0; i < br; i++)
for (j = 0; j < bc; j++)
tmp[j * br + i] = *acb_mat_entry(B, i, j);
for (i = 0; i < ar; i++)
{
for (j = 0; j < bc; j++)
{
acb_dot(acb_mat_entry(C, i, j), NULL, 0,
A->rows[i], 1, tmp + j * br, 1, br, prec);
}
}
TMP_END;
}
}

View file

@ -30,21 +30,30 @@ void *
_acb_mat_mul_thread(void * arg_ptr)
{
acb_mat_mul_arg_t arg = *((acb_mat_mul_arg_t *) arg_ptr);
slong i, j, k;
slong i, j, br, bc;
acb_ptr tmp;
TMP_INIT;
br = arg.br;
bc = arg.bc1 - arg.bc0;
TMP_START;
tmp = TMP_ALLOC(sizeof(acb_struct) * br * bc);
for (i = 0; i < br; i++)
for (j = 0; j < bc; j++)
tmp[j * br + i] = arg.B[i][arg.bc0 + j];
for (i = arg.ar0; i < arg.ar1; i++)
{
for (j = arg.bc0; j < arg.bc1; j++)
{
acb_mul(arg.C[i] + j, arg.A[i] + 0, arg.B[0] + j, arg.prec);
for (k = 1; k < arg.br; k++)
{
acb_addmul(arg.C[i] + j, arg.A[i] + k, arg.B[k] + j, arg.prec);
}
acb_dot(arg.C[i] + j, NULL, 0,
arg.A[i], 1, tmp + (j - arg.bc0) * br, 1, br, arg.prec);
}
}
TMP_END;
flint_cleanup();
return NULL;
}
@ -122,4 +131,3 @@ acb_mat_mul_threaded(acb_mat_t C, const acb_mat_t A, const acb_mat_t B, slong pr
flint_free(threads);
flint_free(args);
}

View file

@ -1,5 +1,5 @@
/*
Copyright (C) 2012 Fredrik Johansson
Copyright (C) 2012, 2018 Fredrik Johansson
This file is part of Arb.
@ -49,21 +49,47 @@ arb_mat_mul_classical(arb_mat_t C, const arb_mat_t A, const arb_mat_t B, slong p
return;
}
for (i = 0; i < ar; i++)
if (br <= 2)
{
for (j = 0; j < bc; j++)
for (i = 0; i < ar; i++)
{
arb_mul(arb_mat_entry(C, i, j),
arb_mat_entry(A, i, 0),
arb_mat_entry(B, 0, j), prec);
for (k = 1; k < br; k++)
for (j = 0; j < bc; j++)
{
arb_addmul(arb_mat_entry(C, i, j),
arb_mat_entry(A, i, k),
arb_mat_entry(B, k, j), prec);
/* todo: efficient fmma code */
arb_mul(arb_mat_entry(C, i, j),
arb_mat_entry(A, i, 0),
arb_mat_entry(B, 0, j), prec);
for (k = 1; k < br; k++)
{
arb_addmul(arb_mat_entry(C, i, j),
arb_mat_entry(A, i, k),
arb_mat_entry(B, k, j), prec);
}
}
}
}
}
else
{
arb_ptr tmp;
TMP_INIT;
TMP_START;
tmp = TMP_ALLOC(sizeof(arb_struct) * br * bc);
for (i = 0; i < br; i++)
for (j = 0; j < bc; j++)
tmp[j * br + i] = *arb_mat_entry(B, i, j);
for (i = 0; i < ar; i++)
{
for (j = 0; j < bc; j++)
{
arb_dot(arb_mat_entry(C, i, j), NULL, 0,
A->rows[i], 1, tmp + j * br, 1, br, prec);
}
}
TMP_END;
}
}

View file

@ -30,21 +30,30 @@ void *
_arb_mat_mul_thread(void * arg_ptr)
{
arb_mat_mul_arg_t arg = *((arb_mat_mul_arg_t *) arg_ptr);
slong i, j, k;
slong i, j, br, bc;
arb_ptr tmp;
TMP_INIT;
br = arg.br;
bc = arg.bc1 - arg.bc0;
TMP_START;
tmp = TMP_ALLOC(sizeof(arb_struct) * br * bc);
for (i = 0; i < br; i++)
for (j = 0; j < bc; j++)
tmp[j * br + i] = arg.B[i][arg.bc0 + j];
for (i = arg.ar0; i < arg.ar1; i++)
{
for (j = arg.bc0; j < arg.bc1; j++)
{
arb_mul(arg.C[i] + j, arg.A[i] + 0, arg.B[0] + j, arg.prec);
for (k = 1; k < arg.br; k++)
{
arb_addmul(arg.C[i] + j, arg.A[i] + k, arg.B[k] + j, arg.prec);
}
arb_dot(arg.C[i] + j, NULL, 0,
arg.A[i], 1, tmp + (j - arg.bc0) * br, 1, br, arg.prec);
}
}
TMP_END;
flint_cleanup();
return NULL;
}
@ -122,4 +131,3 @@ arb_mat_mul_threaded(arb_mat_t C, const arb_mat_t A, const arb_mat_t B, slong pr
flint_free(threads);
flint_free(args);
}