diff options
author | Henry Cook <hcook@eecs.berkeley.edu> | 2014-11-06 17:24:39 -0800 |
---|---|---|
committer | Henry Cook <hcook@eecs.berkeley.edu> | 2014-11-07 16:52:51 -0800 |
commit | d537de7deffa6036dab573ff174b7f8c8e470437 (patch) | |
tree | ddc921eb337cda4889570f0251bdba85059a2531 /mt/ck_matmul.c | |
parent | 5afc6b9bc2e3685220cffb3da66ad9f5f1f7b14f (diff) | |
download | riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.zip riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.gz riscv-tests-d537de7deffa6036dab573ff174b7f8c8e470437.tar.bz2 |
Clean up canonical mt benchmarks and reorganize extra versions in /mt. All versions support support at least 1/2/4 threads.
Diffstat (limited to 'mt/ck_matmul.c')
-rwxr-xr-x | mt/ck_matmul.c | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/mt/ck_matmul.c b/mt/ck_matmul.c new file mode 100755 index 0000000..753a36d --- /dev/null +++ b/mt/ck_matmul.c @@ -0,0 +1,61 @@ +#include "stdlib.h" + +#include "util.h" + +#include "dataset.h" +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + + // ***************************** // + // **** ADD YOUR CODE HERE ***** // + // ***************************** // + // + // feel free to make a separate function for MI and MSI versions. + int i, j, k, ii, jj, bsize, start; + bsize = 16; + start = bsize*coreid; + for ( jj = start; jj < lda; jj += bsize*ncores) { + int first = 1; + for ( ii = start; ii !=start || first; ii=(bsize+ii) % lda) { + first = 0; + for ( j = jj; j < lda && j < jj + bsize; j+=4) { + for ( i = ii; i < lda && i < ii + bsize; i+=2) { + data_t c1 = C[i + j*lda]; + data_t c2 = C[i + j*lda + 1]; + data_t c3 = C[i + (j+1)*lda]; + data_t c4 = C[i + (j+1)*lda + 1]; + data_t c5 = C[i + (j+2)*lda]; + data_t c6 = C[i + (j+2)*lda + 1]; + data_t c7 = C[i + (j+3)*lda]; + data_t c8 = C[i + (j+3)*lda + 1]; + for ( k = 0; k < lda; k+=8){ + for (int x = 0; x < 8; x++) { + data_t a = A[j*lda + k+x]; + data_t a1 = A[(j+1)*lda +k+x]; + data_t a2 = A[(j+2)*lda +k+x]; + data_t a3 = A[(j+3)*lda +k+x]; + data_t b1 = B[(k+x)*lda + i]; + data_t b2 = B[(k+x)*lda + i + 1]; + c1 += a * b1; + c2 += a * b2; + c3 += a1* b1; + c4 += a1* b2; + c5 += a2* b1; + c6 += a2* b2; + c7 += a3* b1; + c8 += a3* b2; + } + } + C[i + j*lda] = c1; + C[i + j*lda + 1] = c2; + C[i + (j+1)*lda] = c3; + C[i + (j+1)*lda + 1] = c4; + C[i + (j+2)*lda] = c5; + C[i + (j+2)*lda + 1] = c6; + C[i + (j+3)*lda] = c7; + C[i + (j+3)*lda + 1] = c8; + } + } + } + } +} |