aboutsummaryrefslogtreecommitdiff
path: root/mt/ak_matmul.c
diff options
context:
space:
mode:
authorHenry Cook <hcook@eecs.berkeley.edu>2014-11-06 17:24:39 -0800
committerHenry Cook <hcook@eecs.berkeley.edu>2014-11-07 16:52:51 -0800
commitd537de7deffa6036dab573ff174b7f8c8e470437 (patch)
treeddc921eb337cda4889570f0251bdba85059a2531 /mt/ak_matmul.c
parent5afc6b9bc2e3685220cffb3da66ad9f5f1f7b14f (diff)
downloadriscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.zip
riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.gz
riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.bz2
Clean up canonical mt benchmarks and reorganize extra versions in /mt. All versions support support at least 1/2/4 threads.
Diffstat (limited to 'mt/ak_matmul.c')
-rwxr-xr-xmt/ak_matmul.c62
1 files changed, 62 insertions, 0 deletions
diff --git a/mt/ak_matmul.c b/mt/ak_matmul.c
new file mode 100755
index 0000000..e4b34e4
--- /dev/null
+++ b/mt/ak_matmul.c
@@ -0,0 +1,62 @@
+#include "stdlib.h"
+
+#include "util.h"
+
+#include "dataset.h"
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+
+ // ***************************** //
+ // **** ADD YOUR CODE HERE ***** //
+ // ***************************** //
+ //
+ // feel free to make a separate function for MI and MSI versions.
+ int i, j, k, ii, jj, bsize;
+ bsize = 16;
+ for ( jj = bsize*coreid; jj < lda; jj += bsize*ncores) {
+ for ( ii = 0; ii < lda; ii += bsize) {
+ for ( j = jj; j < lda && j < jj + bsize; j++) {
+ for ( i = ii; i < lda && i < ii + bsize; i += 8) {
+ data_t c1 = C[i + j*lda];
+ data_t c2 = C[i + j*lda + 1];
+ data_t c3 = C[i + j*lda + 2];
+ data_t c4 = C[i + j*lda + 3];
+ data_t c5 = C[i + j*lda + 4];
+ data_t c6 = C[i + j*lda + 5];
+ data_t c7 = C[i + j*lda + 6];
+ data_t c8 = C[i + j*lda + 7];
+ for ( k = 0; k < lda; k+=4 ) {
+ for (int x = 0; x < 4; x++) {
+ data_t a = A[j*lda + k+x];
+ data_t b1 = B[(k+x)*lda + i];
+ data_t b2 = B[(k+x)*lda + i + 1];
+ data_t b3 = B[(k+x)*lda + i + 2];
+ data_t b4 = B[(k+x)*lda + i + 3];
+ data_t b5 = B[(k+x)*lda + i + 4];
+ data_t b6 = B[(k+x)*lda + i + 5];
+ data_t b7 = B[(k+x)*lda + i + 6];
+ data_t b8 = B[(k+x)*lda + i + 7];
+ c1 += a * b1;
+ c2 += a * b2;
+ c3 += a * b3;
+ c4 += a * b4;
+ c5 += a * b5;
+ c6 += a * b6;
+ c7 += a * b7;
+ c8 += a * b8;
+ }
+ }
+ C[i + j*lda] = c1;
+ C[i + j*lda + 1] = c2;
+ C[i + j*lda + 2] = c3;
+ C[i + j*lda + 3] = c4;
+ C[i + j*lda + 4] = c5;
+ C[i + j*lda + 5] = c6;
+ C[i + j*lda + 6] = c7;
+ C[i + j*lda + 7] = c8;
+ }
+ }
+ }
+ }
+
+}