diff options
Diffstat (limited to 'benchmarks')
-rw-r--r-- | benchmarks/Makefile | 3 | ||||
-rw-r--r-- | benchmarks/common/util.h | 11 | ||||
-rw-r--r-- | benchmarks/mt-matmul/bmark.mk | 1 | ||||
-rw-r--r-- | benchmarks/mt-matmul/dataset.h | 7 | ||||
-rw-r--r-- | benchmarks/mt-matmul/matmul.c | 20 | ||||
-rw-r--r-- | benchmarks/mt-matmul/mt-matmul.c | 82 | ||||
-rw-r--r-- | benchmarks/mt-vvadd/bmark.mk | 1 | ||||
-rw-r--r-- | benchmarks/mt-vvadd/dataset.h | 6 | ||||
-rw-r--r-- | benchmarks/mt-vvadd/mt-vvadd.c | 95 | ||||
-rw-r--r-- | benchmarks/mt-vvadd/vvadd.c | 16 |
10 files changed, 92 insertions, 150 deletions
diff --git a/benchmarks/Makefile b/benchmarks/Makefile index f8db5b9..3346c7c 100644 --- a/benchmarks/Makefile +++ b/benchmarks/Makefile @@ -26,8 +26,8 @@ bmarks = \ dhrystone \ spmv \ mt-vvadd \ + mt-matmul \ #vec-fft \ - #mt-matmul \ #vec-vvadd \ #vec-cmplxmult \ #vec-matmul \ @@ -38,7 +38,6 @@ bmarks_host = \ towers \ vvadd \ multiply \ - mm \ spmv \ vec-vvadd \ vec-cmplxmult \ diff --git a/benchmarks/common/util.h b/benchmarks/common/util.h index 6c4f963..638f024 100644 --- a/benchmarks/common/util.h +++ b/benchmarks/common/util.h @@ -113,4 +113,15 @@ static void __attribute__((noinline)) barrier(int ncores) #include "encoding.h" #endif +#define stringify_1(s) #s +#define stringify(s) stringify_1(s) +#define stats(code, iter) do { \ + unsigned long _c = -rdcycle(), _i = -rdinstret(); \ + code; \ + _c += rdcycle(), _i += rdinstret(); \ + if (cid == 0) \ + printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ + stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \ + } while(0) + #endif //__UTIL_H diff --git a/benchmarks/mt-matmul/bmark.mk b/benchmarks/mt-matmul/bmark.mk index 4b7fcb7..6a7140f 100644 --- a/benchmarks/mt-matmul/bmark.mk +++ b/benchmarks/mt-matmul/bmark.mk @@ -10,6 +10,7 @@ mt_matmul_c_src = \ mt-matmul.c \ + matmul.c \ syscalls.c \ mt_matmul_riscv_src = \ diff --git a/benchmarks/mt-matmul/dataset.h b/benchmarks/mt-matmul/dataset.h index dde3ee4..2c59a33 100644 --- a/benchmarks/mt-matmul/dataset.h +++ b/benchmarks/mt-matmul/dataset.h @@ -1,9 +1,12 @@ +#ifndef __DATASET_H +#define __DATASET_H #define ARRAY_SIZE 1024 - #define DIM_SIZE 32 +typedef double data_t; + static data_t input1_data[ARRAY_SIZE] = { 0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1, @@ -172,3 +175,5 @@ static data_t verify_data[ARRAY_SIZE] = 69, 73, 94, 89 }; + +#endif //__DATASET_H diff --git a/benchmarks/mt-matmul/matmul.c b/benchmarks/mt-matmul/matmul.c new file mode 100644 index 0000000..95fbe03 --- /dev/null +++ b/benchmarks/mt-matmul/matmul.c @@ -0,0 +1,20 @@ +#include "dataset.h" + +//-------------------------------------------------------------------------- +// single-thread, naive version +// +void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ) +{ + int i, j, k; + + for ( i = 0; i < lda; i++ ) + { + for ( j = 0; j < lda; j++ ) + { + for ( k = coreid; k < lda; k+=ncores ) + { + C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; + } + } + } +} diff --git a/benchmarks/mt-matmul/mt-matmul.c b/benchmarks/mt-matmul/mt-matmul.c index 1584a5d..2353962 100644 --- a/benchmarks/mt-matmul/mt-matmul.c +++ b/benchmarks/mt-matmul/mt-matmul.c @@ -25,62 +25,20 @@ //-------------------------------------------------------------------------- // Input/Reference Data -typedef double data_t; #include "dataset.h" - + //-------------------------------------------------------------------------- // Basic Utilities and Multi-thread Support -__thread unsigned long coreid; - #include "util.h" + -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) - //-------------------------------------------------------------------------- // matmul function -// single-thread, naive version -void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - int i, j, k; - - if (coreid > 0) - return; - - for ( i = 0; i < lda; i++ ) - for ( j = 0; j < lda; j++ ) - { - for ( k = 0; k < lda; k++ ) - { - C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; - } - } - -} - - + extern void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] ); -void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] ) -{ - - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // - // - // feel free to make a separate function for MI and MSI versions. - -} //-------------------------------------------------------------------------- // Main @@ -90,46 +48,16 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da void thread_entry(int cid, int nc) { - coreid = cid; - - // static allocates data in the binary, which is visible to both threads static data_t results_data[ARRAY_SIZE]; - - // Execute the provided, naive matmul - barrier(nc); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); + stats(matmul(cid, nc, DIM_SIZE, input1_data, input2_data, results_data); barrier(nc), DIM_SIZE/DIM_SIZE/DIM_SIZE); - - // verify int res = verifyDouble(ARRAY_SIZE, results_data, verify_data); - if (res) - exit(res); - -#if 0 - // clear results from the first trial - size_t i; - if (coreid == 0) - for (i=0; i < ARRAY_SIZE; i++) - results_data[i] = 0; - barrier(nc); - - // Execute your faster matmul - barrier(nc); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); - #ifdef DEBUG printArray("results:", ARRAY_SIZE, results_data); printArray("verify :", ARRAY_SIZE, verify_data); #endif - - // verify - res = verify(ARRAY_SIZE, results_data, verify_data); - if (res) - exit(res); - barrier(nc); -#endif - exit(0); + exit(res); } diff --git a/benchmarks/mt-vvadd/bmark.mk b/benchmarks/mt-vvadd/bmark.mk index 72b2d34..ff969c1 100644 --- a/benchmarks/mt-vvadd/bmark.mk +++ b/benchmarks/mt-vvadd/bmark.mk @@ -10,6 +10,7 @@ mt_vvadd_c_src = \ mt-vvadd.c \ + vvadd.c \ syscalls.c \ mt_vvadd_riscv_src = \ diff --git a/benchmarks/mt-vvadd/dataset.h b/benchmarks/mt-vvadd/dataset.h index ce9f936..51f25df 100644 --- a/benchmarks/mt-vvadd/dataset.h +++ b/benchmarks/mt-vvadd/dataset.h @@ -1,6 +1,10 @@ +#ifndef __DATASET_H +#define __DATASET_H #define DATA_SIZE 1000 +typedef double data_t; + static data_t input1_data[DATA_SIZE] = { 0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00, @@ -163,3 +167,5 @@ static data_t verify_data[DATA_SIZE] = 32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00 }; + +#endif //__DATASET_H diff --git a/benchmarks/mt-vvadd/mt-vvadd.c b/benchmarks/mt-vvadd/mt-vvadd.c index 2116115..48eae6a 100644 --- a/benchmarks/mt-vvadd/mt-vvadd.c +++ b/benchmarks/mt-vvadd/mt-vvadd.c @@ -24,49 +24,20 @@ //-------------------------------------------------------------------------- // Input/Reference Data -typedef double data_t; #include "dataset.h" //-------------------------------------------------------------------------- // Basic Utilities and Multi-thread Support -__thread unsigned long coreid; - #include "util.h" -#define stringify_1(s) #s -#define stringify(s) stringify_1(s) -#define stats(code) do { \ - unsigned long _c = -rdcycle(), _i = -rdinstret(); \ - code; \ - _c += rdcycle(), _i += rdinstret(); \ - if (coreid == 0) \ - printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \ - stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \ - } while(0) //-------------------------------------------------------------------------- // vvadd function -//perform in-place vvadd -void __attribute__((noinline)) vvadd(int ncores, size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - size_t i; - - // interleave accesses - for (i = coreid; i < n; i+=ncores) - { - x[i] = x[i] + y[i]; - } -} +extern void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z); -void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y) -{ - // ***************************** // - // **** ADD YOUR CODE HERE ***** // - // ***************************** // -} //-------------------------------------------------------------------------- // Main @@ -76,57 +47,41 @@ void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const void thread_entry(int cid, int nc) { - coreid = cid; - // static allocates data in the binary, which is visible to both threads static data_t results_data[DATA_SIZE]; - // because we're going to perform an in-place vvadd (and we're going to run - // it a couple of times) let's copy the input data to a temporary results - // array - - size_t i; - if (coreid == 0) - { - for (i = 0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; - } - - - // Execute the provided, terrible vvadd + // First do out-of-place vvadd barrier(nc); - stats(vvadd(nc, DATA_SIZE, results_data, input2_data); barrier(nc)); + stats(vvadd(cid, nc, DATA_SIZE, input1_data, input2_data, results_data); barrier(nc), DATA_SIZE); - - // verify - int res = verifyDouble(DATA_SIZE, results_data, verify_data); - if (res) - exit(res); - -#if 0 - // reset results from the first trial - if (coreid == 0) - { - for (i=0; i < DATA_SIZE; i++) - results_data[i] = input1_data[i]; + if(cid == 0) { +//#ifdef DEBUG + printDoubleArray("out-of-place results: ", DATA_SIZE, results_data); + printDoubleArray("out-of-place verify : ", DATA_SIZE, verify_data); +//#endif + int res = verifyDouble(DATA_SIZE, results_data, verify_data); + if(res) exit(res); } - barrier(nc); - // Execute your faster vvadd + // Second do in-place vvadd + // Copying input + size_t i; + if(cid == 0) { + for (i = 0; i < DATA_SIZE; i++) + results_data[i] = input1_data[i]; + } barrier(nc); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); - + stats(vvadd(cid, nc, DATA_SIZE, results_data, input2_data, results_data); barrier(nc), DATA_SIZE); + + if(cid == 0) { #ifdef DEBUG - printDoubleArray("results: ", DATA_SIZE, results_data); - printDoubleArray("verify : ", DATA_SIZE, verify_data); + printDoubleArray("in-place results: ", DATA_SIZE, results_data); + printDoubleArray("in-place verify : ", DATA_SIZE, verify_data); #endif + int res = verifyDouble(DATA_SIZE, results_data, verify_data); + if(res) exit(res); + } - // verify - res = verifyDouble(DATA_SIZE, results_data, verify_data); - if (res) - exit(res); barrier(nc); -#endif - exit(0); } diff --git a/benchmarks/mt-vvadd/vvadd.c b/benchmarks/mt-vvadd/vvadd.c new file mode 100644 index 0000000..8f4d43f --- /dev/null +++ b/benchmarks/mt-vvadd/vvadd.c @@ -0,0 +1,16 @@ +#include "stdlib.h" +#include "dataset.h" + +//-------------------------------------------------------------------------- +// vvadd function + +void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z) +{ + size_t i; + + // interleave accesses + for (i = coreid; i < n; i+=ncores) + { + z[i] = x[i] + y[i]; + } +} |