aboutsummaryrefslogtreecommitdiff
path: root/mt/ad_matmul.c
diff options
context:
space:
mode:
authorHenry Cook <hcook@eecs.berkeley.edu>2014-11-06 17:24:39 -0800
committerHenry Cook <hcook@eecs.berkeley.edu>2014-11-07 16:52:51 -0800
commitd537de7deffa6036dab573ff174b7f8c8e470437 (patch)
treeddc921eb337cda4889570f0251bdba85059a2531 /mt/ad_matmul.c
parent5afc6b9bc2e3685220cffb3da66ad9f5f1f7b14f (diff)
downloadriscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.zip
riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.gz
riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.bz2
Clean up canonical mt benchmarks and reorganize extra versions in /mt. All versions support support at least 1/2/4 threads.
Diffstat (limited to 'mt/ad_matmul.c')
-rwxr-xr-xmt/ad_matmul.c37
1 files changed, 37 insertions, 0 deletions
diff --git a/mt/ad_matmul.c b/mt/ad_matmul.c
new file mode 100755
index 0000000..60e6e6c
--- /dev/null
+++ b/mt/ad_matmul.c
@@ -0,0 +1,37 @@
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, k;
+ int j = coreid*(lda/ncores);
+ int jend = (coreid+1)*(lda/ncores);
+ for ( ; j < jend; j++ )
+ {
+ int j32 = j << 5;
+ data_t* Cj32 = C + j32;
+ for ( k = 0; k < 32; k+=2 )
+ {
+ data_t Aj32k = A[k + j32];
+ data_t Aj32k2 = A[k + 1 + j32];
+ data_t* Bk32 = B + (k << 5);
+ data_t* Bk322 = Bk32 + 32;
+ for ( i = 0; i < 32; i+=4 )
+ {
+ Cj32[i] += Aj32k * Bk32 [i];
+ Cj32[i] += Aj32k2 * Bk322 [i];
+ Cj32[i+1] += Aj32k * Bk32 [i+1];
+ Cj32[i+1] += Aj32k2 * Bk322[i+1];
+ Cj32[i+2] += Aj32k * Bk32 [i+2];
+ Cj32[i+2] += Aj32k2 * Bk322[i+2];
+ Cj32[i+3] += Aj32k * Bk32 [i+3];
+ Cj32[i+3] += Aj32k2 * Bk322[i+3];
+ }
+ barrier(ncores);
+ }
+ }
+
+
+}