aboutsummaryrefslogtreecommitdiff
path: root/mt/ag_matmul.c
diff options
context:
space:
mode:
authorHenry Cook <hcook@eecs.berkeley.edu>2014-11-06 17:24:39 -0800
committerHenry Cook <hcook@eecs.berkeley.edu>2014-11-07 16:52:51 -0800
commitd537de7deffa6036dab573ff174b7f8c8e470437 (patch)
treeddc921eb337cda4889570f0251bdba85059a2531 /mt/ag_matmul.c
parent5afc6b9bc2e3685220cffb3da66ad9f5f1f7b14f (diff)
downloadriscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.zip
riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.gz
riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.bz2
Clean up canonical mt benchmarks and reorganize extra versions in /mt. All versions support support at least 1/2/4 threads.
Diffstat (limited to 'mt/ag_matmul.c')
-rwxr-xr-xmt/ag_matmul.c79
1 files changed, 79 insertions, 0 deletions
diff --git a/mt/ag_matmul.c b/mt/ag_matmul.c
new file mode 100755
index 0000000..0b9cc6e
--- /dev/null
+++ b/mt/ag_matmul.c
@@ -0,0 +1,79 @@
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+#include "util.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+
+ for ( i = 0; i < lda; i+=2 )
+ {
+ for (k = 0; k < lda; k+=4)
+ {
+ int d0 = B[k*lda + i];
+ int c0 = B[k*lda + i + 1];
+ int d1 = B[(k+1)*lda + i];
+ int c1 = B[(k+1)*lda + i + 1];
+ int d2 = B[(k+2)*lda + i];
+ int c2 = B[(k+2)*lda + i + 1];
+ int d3 = B[(k+3)*lda + i];
+ int c3 = B[(k+3)*lda + i + 1];
+
+ for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j+=4)
+ {
+
+ int sum = A[j*lda + k] * d0;
+ sum += A[j*lda + k + 1] * d1;
+ sum += A[j*lda + k + 2] * d2;
+ sum += A[j*lda + k + 3] * d3;
+ C[j*lda +i] += sum;
+
+ sum = A[j*lda + k] * c0;
+ sum += A[j*lda + k + 1] * c1;
+ sum += A[j*lda + k + 2] * c2;
+ sum += A[j*lda + k + 3] * c3;
+ C[j*lda + i + 1] += sum;
+
+ sum = A[(j+1)*lda + k] * d0;
+ sum += A[(j+1)*lda + k + 1] * d1;
+ sum += A[(j+1)*lda + k + 2] * d2;
+ sum += A[(j+1)*lda + k + 3] * d3;
+ C[(j+1)*lda +i] += sum;
+
+ sum = A[(j+1)*lda + k] * c0;
+ sum += A[(j+1)*lda + k + 1] * c1;
+ sum += A[(j+1)*lda + k + 2] * c2;
+ sum += A[(j+1)*lda + k + 3] * c3;
+ C[(j+1)*lda + i + 1] += sum;
+
+ sum = A[(j+2)*lda + k] * d0;
+ sum += A[(j+2)*lda + k + 1] * d1;
+ sum += A[(j+2)*lda + k + 2] * d2;
+ sum += A[(j+2)*lda + k + 3] * d3;
+ C[(j+2)*lda +i] += sum;
+
+ sum = A[(j+2)*lda + k] * c0;
+ sum += A[(j+2)*lda + k + 1] * c1;
+ sum += A[(j+2)*lda + k + 2] * c2;
+ sum += A[(j+2)*lda + k + 3] * c3;
+ C[(j+2)*lda + i + 1] += sum;
+
+ sum = A[(j+3)*lda + k] * d0;
+ sum += A[(j+3)*lda + k + 1] * d1;
+ sum += A[(j+3)*lda + k + 2] * d2;
+ sum += A[(j+3)*lda + k + 3] * d3;
+ C[(j+3)*lda +i] += sum;
+
+ sum = A[(j+3)*lda + k] * c0;
+ sum += A[(j+3)*lda + k + 1] * c1;
+ sum += A[(j+3)*lda + k + 2] * c2;
+ sum += A[(j+3)*lda + k + 3] * c3;
+ C[(j+3)*lda + i + 1] += sum;
+
+ }
+ barrier(ncores);
+ }
+ }
+}