aboutsummaryrefslogtreecommitdiff
path: root/benchmarks
diff options
context:
space:
mode:
Diffstat (limited to 'benchmarks')
-rw-r--r--benchmarks/Makefile3
-rw-r--r--benchmarks/common/util.h11
-rw-r--r--benchmarks/mt-matmul/bmark.mk1
-rw-r--r--benchmarks/mt-matmul/dataset.h7
-rw-r--r--benchmarks/mt-matmul/matmul.c20
-rw-r--r--benchmarks/mt-matmul/mt-matmul.c82
-rw-r--r--benchmarks/mt-vvadd/bmark.mk1
-rw-r--r--benchmarks/mt-vvadd/dataset.h6
-rw-r--r--benchmarks/mt-vvadd/mt-vvadd.c95
-rw-r--r--benchmarks/mt-vvadd/vvadd.c16
10 files changed, 92 insertions, 150 deletions
diff --git a/benchmarks/Makefile b/benchmarks/Makefile
index f8db5b9..3346c7c 100644
--- a/benchmarks/Makefile
+++ b/benchmarks/Makefile
@@ -26,8 +26,8 @@ bmarks = \
dhrystone \
spmv \
mt-vvadd \
+ mt-matmul \
#vec-fft \
- #mt-matmul \
#vec-vvadd \
#vec-cmplxmult \
#vec-matmul \
@@ -38,7 +38,6 @@ bmarks_host = \
towers \
vvadd \
multiply \
- mm \
spmv \
vec-vvadd \
vec-cmplxmult \
diff --git a/benchmarks/common/util.h b/benchmarks/common/util.h
index 6c4f963..638f024 100644
--- a/benchmarks/common/util.h
+++ b/benchmarks/common/util.h
@@ -113,4 +113,15 @@ static void __attribute__((noinline)) barrier(int ncores)
#include "encoding.h"
#endif
+#define stringify_1(s) #s
+#define stringify(s) stringify_1(s)
+#define stats(code, iter) do { \
+ unsigned long _c = -rdcycle(), _i = -rdinstret(); \
+ code; \
+ _c += rdcycle(), _i += rdinstret(); \
+ if (cid == 0) \
+ printf("\n%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
+ stringify(code), _c, _c/iter, 10*_c/iter%10, _c/_i, 10*_c/_i%10); \
+ } while(0)
+
#endif //__UTIL_H
diff --git a/benchmarks/mt-matmul/bmark.mk b/benchmarks/mt-matmul/bmark.mk
index 4b7fcb7..6a7140f 100644
--- a/benchmarks/mt-matmul/bmark.mk
+++ b/benchmarks/mt-matmul/bmark.mk
@@ -10,6 +10,7 @@
mt_matmul_c_src = \
mt-matmul.c \
+ matmul.c \
syscalls.c \
mt_matmul_riscv_src = \
diff --git a/benchmarks/mt-matmul/dataset.h b/benchmarks/mt-matmul/dataset.h
index dde3ee4..2c59a33 100644
--- a/benchmarks/mt-matmul/dataset.h
+++ b/benchmarks/mt-matmul/dataset.h
@@ -1,9 +1,12 @@
+#ifndef __DATASET_H
+#define __DATASET_H
#define ARRAY_SIZE 1024
-
#define DIM_SIZE 32
+typedef double data_t;
+
static data_t input1_data[ARRAY_SIZE] =
{
0, 3, 2, 0, 3, 1, 0, 3, 2, 3, 2, 0, 3, 3, 1, 2, 3, 0, 0, 1,
@@ -172,3 +175,5 @@ static data_t verify_data[ARRAY_SIZE] =
69, 73, 94, 89
};
+
+#endif //__DATASET_H
diff --git a/benchmarks/mt-matmul/matmul.c b/benchmarks/mt-matmul/matmul.c
new file mode 100644
index 0000000..95fbe03
--- /dev/null
+++ b/benchmarks/mt-matmul/matmul.c
@@ -0,0 +1,20 @@
+#include "dataset.h"
+
+//--------------------------------------------------------------------------
+// single-thread, naive version
+//
+void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
+{
+ int i, j, k;
+
+ for ( i = 0; i < lda; i++ )
+ {
+ for ( j = 0; j < lda; j++ )
+ {
+ for ( k = coreid; k < lda; k+=ncores )
+ {
+ C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
+ }
+ }
+ }
+}
diff --git a/benchmarks/mt-matmul/mt-matmul.c b/benchmarks/mt-matmul/mt-matmul.c
index 1584a5d..2353962 100644
--- a/benchmarks/mt-matmul/mt-matmul.c
+++ b/benchmarks/mt-matmul/mt-matmul.c
@@ -25,62 +25,20 @@
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef double data_t;
#include "dataset.h"
-
+
//--------------------------------------------------------------------------
// Basic Utilities and Multi-thread Support
-__thread unsigned long coreid;
-
#include "util.h"
+
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
-
//--------------------------------------------------------------------------
// matmul function
-// single-thread, naive version
-void __attribute__((noinline)) matmul_naive(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
- int i, j, k;
-
- if (coreid > 0)
- return;
-
- for ( i = 0; i < lda; i++ )
- for ( j = 0; j < lda; j++ )
- {
- for ( k = 0; k < lda; k++ )
- {
- C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
- }
- }
-
-}
-
-
+ extern void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] );
-void __attribute__((noinline)) matmul(const int lda, const data_t A[], const data_t B[], data_t C[] )
-{
-
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
- //
- // feel free to make a separate function for MI and MSI versions.
-
-}
//--------------------------------------------------------------------------
// Main
@@ -90,46 +48,16 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
void thread_entry(int cid, int nc)
{
- coreid = cid;
-
- // static allocates data in the binary, which is visible to both threads
static data_t results_data[ARRAY_SIZE];
-
- // Execute the provided, naive matmul
- barrier(nc);
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
+ stats(matmul(cid, nc, DIM_SIZE, input1_data, input2_data, results_data); barrier(nc), DIM_SIZE/DIM_SIZE/DIM_SIZE);
-
- // verify
int res = verifyDouble(ARRAY_SIZE, results_data, verify_data);
- if (res)
- exit(res);
-
-#if 0
- // clear results from the first trial
- size_t i;
- if (coreid == 0)
- for (i=0; i < ARRAY_SIZE; i++)
- results_data[i] = 0;
- barrier(nc);
-
- // Execute your faster matmul
- barrier(nc);
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
-
#ifdef DEBUG
printArray("results:", ARRAY_SIZE, results_data);
printArray("verify :", ARRAY_SIZE, verify_data);
#endif
-
- // verify
- res = verify(ARRAY_SIZE, results_data, verify_data);
- if (res)
- exit(res);
- barrier(nc);
-#endif
- exit(0);
+ exit(res);
}
diff --git a/benchmarks/mt-vvadd/bmark.mk b/benchmarks/mt-vvadd/bmark.mk
index 72b2d34..ff969c1 100644
--- a/benchmarks/mt-vvadd/bmark.mk
+++ b/benchmarks/mt-vvadd/bmark.mk
@@ -10,6 +10,7 @@
mt_vvadd_c_src = \
mt-vvadd.c \
+ vvadd.c \
syscalls.c \
mt_vvadd_riscv_src = \
diff --git a/benchmarks/mt-vvadd/dataset.h b/benchmarks/mt-vvadd/dataset.h
index ce9f936..51f25df 100644
--- a/benchmarks/mt-vvadd/dataset.h
+++ b/benchmarks/mt-vvadd/dataset.h
@@ -1,6 +1,10 @@
+#ifndef __DATASET_H
+#define __DATASET_H
#define DATA_SIZE 1000
+typedef double data_t;
+
static data_t input1_data[DATA_SIZE] =
{
0.00, 15.00, 10.00, 3.00, 14.00, 6.00, 2.00, 18.00, 11.00, 15.00, 11.00, 0.00, 17.00, 16.00, 7.00, 13.00, 18.00, 2.00, 2.00, 5.00,
@@ -163,3 +167,5 @@ static data_t verify_data[DATA_SIZE] =
32.00, 21.00, 26.00, 13.00, 6.00, 24.00, 22.00, 27.00, 17.00, 26.00, 19.00, 21.00, 19.00, 9.00, 21.00, 25.00, 17.00, 27.00, 15.00, 18.00
};
+
+#endif //__DATASET_H
diff --git a/benchmarks/mt-vvadd/mt-vvadd.c b/benchmarks/mt-vvadd/mt-vvadd.c
index 2116115..48eae6a 100644
--- a/benchmarks/mt-vvadd/mt-vvadd.c
+++ b/benchmarks/mt-vvadd/mt-vvadd.c
@@ -24,49 +24,20 @@
//--------------------------------------------------------------------------
// Input/Reference Data
-typedef double data_t;
#include "dataset.h"
//--------------------------------------------------------------------------
// Basic Utilities and Multi-thread Support
-__thread unsigned long coreid;
-
#include "util.h"
-#define stringify_1(s) #s
-#define stringify(s) stringify_1(s)
-#define stats(code) do { \
- unsigned long _c = -rdcycle(), _i = -rdinstret(); \
- code; \
- _c += rdcycle(), _i += rdinstret(); \
- if (coreid == 0) \
- printf("%s: %ld cycles, %ld.%ld cycles/iter, %ld.%ld CPI\n", \
- stringify(code), _c, _c/DATA_SIZE, 10*_c/DATA_SIZE%10, _c/_i, 10*_c/_i%10); \
- } while(0)
//--------------------------------------------------------------------------
// vvadd function
-//perform in-place vvadd
-void __attribute__((noinline)) vvadd(int ncores, size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- size_t i;
-
- // interleave accesses
- for (i = coreid; i < n; i+=ncores)
- {
- x[i] = x[i] + y[i];
- }
-}
+extern void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z);
-void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const data_t* __restrict__ y)
-{
- // ***************************** //
- // **** ADD YOUR CODE HERE ***** //
- // ***************************** //
-}
//--------------------------------------------------------------------------
// Main
@@ -76,57 +47,41 @@ void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const
void thread_entry(int cid, int nc)
{
- coreid = cid;
-
// static allocates data in the binary, which is visible to both threads
static data_t results_data[DATA_SIZE];
- // because we're going to perform an in-place vvadd (and we're going to run
- // it a couple of times) let's copy the input data to a temporary results
- // array
-
- size_t i;
- if (coreid == 0)
- {
- for (i = 0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
- }
-
-
- // Execute the provided, terrible vvadd
+ // First do out-of-place vvadd
barrier(nc);
- stats(vvadd(nc, DATA_SIZE, results_data, input2_data); barrier(nc));
+ stats(vvadd(cid, nc, DATA_SIZE, input1_data, input2_data, results_data); barrier(nc), DATA_SIZE);
-
- // verify
- int res = verifyDouble(DATA_SIZE, results_data, verify_data);
- if (res)
- exit(res);
-
-#if 0
- // reset results from the first trial
- if (coreid == 0)
- {
- for (i=0; i < DATA_SIZE; i++)
- results_data[i] = input1_data[i];
+ if(cid == 0) {
+//#ifdef DEBUG
+ printDoubleArray("out-of-place results: ", DATA_SIZE, results_data);
+ printDoubleArray("out-of-place verify : ", DATA_SIZE, verify_data);
+//#endif
+ int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if(res) exit(res);
}
- barrier(nc);
- // Execute your faster vvadd
+ // Second do in-place vvadd
+ // Copying input
+ size_t i;
+ if(cid == 0) {
+ for (i = 0; i < DATA_SIZE; i++)
+ results_data[i] = input1_data[i];
+ }
barrier(nc);
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
-
+ stats(vvadd(cid, nc, DATA_SIZE, results_data, input2_data, results_data); barrier(nc), DATA_SIZE);
+
+ if(cid == 0) {
#ifdef DEBUG
- printDoubleArray("results: ", DATA_SIZE, results_data);
- printDoubleArray("verify : ", DATA_SIZE, verify_data);
+ printDoubleArray("in-place results: ", DATA_SIZE, results_data);
+ printDoubleArray("in-place verify : ", DATA_SIZE, verify_data);
#endif
+ int res = verifyDouble(DATA_SIZE, results_data, verify_data);
+ if(res) exit(res);
+ }
- // verify
- res = verifyDouble(DATA_SIZE, results_data, verify_data);
- if (res)
- exit(res);
barrier(nc);
-#endif
-
exit(0);
}
diff --git a/benchmarks/mt-vvadd/vvadd.c b/benchmarks/mt-vvadd/vvadd.c
new file mode 100644
index 0000000..8f4d43f
--- /dev/null
+++ b/benchmarks/mt-vvadd/vvadd.c
@@ -0,0 +1,16 @@
+#include "stdlib.h"
+#include "dataset.h"
+
+//--------------------------------------------------------------------------
+// vvadd function
+
+void __attribute__((noinline)) vvadd(int coreid, int ncores, size_t n, const data_t* x, const data_t* y, data_t* z)
+{
+ size_t i;
+
+ // interleave accesses
+ for (i = coreid; i < n; i+=ncores)
+ {
+ z[i] = x[i] + y[i];
+ }
+}