aboutsummaryrefslogtreecommitdiff
path: root/mt
diff options
context:
space:
mode:
authorHenry Cook <hcook@eecs.berkeley.edu>2014-09-24 18:11:42 -0700
committerHenry Cook <hcook@eecs.berkeley.edu>2014-09-24 18:34:50 -0700
commit9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6 (patch)
tree81654ff43e33b08a117cb9b199e60dbce8b32fe5 /mt
parentdbde501592ce20c536cbc97e99d03f54f3e30294 (diff)
downloadriscv-tests-9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6.zip
riscv-tests-9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6.tar.gz
riscv-tests-9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6.tar.bz2
Updated mt tests
Diffstat (limited to 'mt')
-rwxr-xr-xmt/Makefile14
-rwxr-xr-xmt/ab_matmul/ab_matmul.c24
-rwxr-xr-xmt/ab_matmul/matmul_mi.c24
-rwxr-xr-xmt/ab_vvadd/ab_vvadd.c24
-rwxr-xr-xmt/ad_matmul/ad_matmul.c24
-rwxr-xr-xmt/ad_matmul/matmul_mi.c24
-rwxr-xr-xmt/ad_vvadd/ad_vvadd.c24
-rwxr-xr-xmt/ae_matmul/ae_matmul.c26
-rwxr-xr-xmt/ae_matmul/matmul_mi.c26
-rwxr-xr-xmt/ae_vvadd/ae_vvadd.c24
-rw-r--r--mt/af_matmul/Ronald.c24
-rwxr-xr-xmt/af_matmul/af_matmul.c24
-rw-r--r--mt/af_matmul/bestattemptthusfar.c24
-rw-r--r--mt/af_matmul/bestattemptthusfar2.c24
-rw-r--r--mt/af_matmul/failedattempt.c24
-rw-r--r--mt/af_matmul/failedattempt2.c24
-rw-r--r--mt/af_matmul/keeptrying.c24
-rw-r--r--mt/af_matmul/keeptrying2.c24
-rw-r--r--mt/af_matmul/keeptrying3.c24
-rw-r--r--mt/af_matmul/matmul_mi.c24
-rwxr-xr-xmt/af_vvadd/af_vvadd.c24
-rwxr-xr-xmt/ag_matmul/ag_matmul.c26
-rwxr-xr-xmt/ag_matmul/matmul_mi.c26
-rwxr-xr-xmt/ag_vvadd/ag_vvadd.c24
-rwxr-xr-xmt/ai_matmul/ai_matmul.c28
-rwxr-xr-xmt/ai_matmul/matmul_mi.c28
-rwxr-xr-xmt/ai_vvadd/ai_vvadd.c24
-rwxr-xr-xmt/aj_matmul/aj_matmul.c24
-rw-r--r--mt/aj_matmul/matmul_mi.c24
-rwxr-xr-xmt/aj_vvadd/aj_vvadd.c24
-rwxr-xr-xmt/ak_matmul/ak_matmul.c24
-rwxr-xr-xmt/ak_matmul/matmulMI.c24
-rwxr-xr-xmt/ak_matmul/matmul_mi.c24
-rwxr-xr-xmt/ak_vvadd/ak_vvadd.c24
-rw-r--r--mt/al_matmul/al_matmul.c24
-rw-r--r--mt/al_matmul/matmul_mi.c24
-rwxr-xr-xmt/al_vvadd/al_vvadd.c24
-rwxr-xr-xmt/am_matmul/am_matmul.c24
-rwxr-xr-xmt/am_matmul/matmul3.c24
-rwxr-xr-xmt/am_matmul/matmul4.c24
-rwxr-xr-xmt/am_matmul/matmul_mi.c24
-rwxr-xr-xmt/am_matmul/matmul_msi.c24
-rwxr-xr-xmt/am_vvadd/am_vvadd.c24
-rwxr-xr-xmt/an_matmul/an_matmul.c24
-rw-r--r--mt/an_matmul/matmul_mi.c24
-rwxr-xr-xmt/an_vvadd/an_vvadd.c24
-rwxr-xr-xmt/ap_matmul/ap_matmul.c24
-rwxr-xr-xmt/ap_matmul/matmul_mi.c24
-rwxr-xr-xmt/ap_vvadd/ap_vvadd.c24
-rw-r--r--mt/aq_matmul/aq_matmul.c24
-rwxr-xr-xmt/aq_matmul/matmul_mi.c24
-rwxr-xr-xmt/aq_vvadd/aq_vvadd.c24
-rwxr-xr-xmt/ar_matmul/ar_matmul.c24
l---------mt/ar_matmul/matmul_mi.c1
-rwxr-xr-xmt/ar_vvadd/ar_vvadd.c24
-rwxr-xr-xmt/as_matmul/as_matmul.c78
-rw-r--r--mt/as_matmul/matmul_mi.c34
-rwxr-xr-xmt/as_vvadd/as_vvadd.c28
-rwxr-xr-xmt/at_matmul/at_matmul.c24
-rw-r--r--mt/at_matmul/matmul_mi.c24
-rwxr-xr-xmt/at_vvadd/at_vvadd.c24
-rw-r--r--mt/av_matmul/av_matmul.c24
-rw-r--r--mt/av_matmul/matmul_mi.c24
-rw-r--r--mt/av_vvadd/av_vvadd.c24
-rw-r--r--mt/ay_matmul/ay_matmul.c24
-rw-r--r--mt/ay_matmul/matmul_mi.c24
-rwxr-xr-xmt/ay_vvadd/ay_vvadd.c24
-rwxr-xr-xmt/az_matmul/az_matmul.c24
-rwxr-xr-xmt/az_matmul/matmul_mi.c24
-rwxr-xr-xmt/az_vvadd/az_vvadd.c24
-rwxr-xr-xmt/ba_matmul/ba_matmul.c24
-rwxr-xr-xmt/ba_matmul/matmul_mi.c24
-rwxr-xr-xmt/ba_vvadd/ba_vvadd.c24
-rwxr-xr-xmt/bb_matmul/bb_matmul.c26
-rwxr-xr-xmt/bb_matmul/matmul_mi.c26
-rwxr-xr-xmt/bb_vvadd/bb_vvadd.c24
-rwxr-xr-xmt/bc_matmul/bc_matmul.c24
-rwxr-xr-xmt/bc_matmul/matmul_mi.c28
-rwxr-xr-xmt/bc_vvadd/bc_vvadd.c24
-rwxr-xr-xmt/be_matmul/be_matmul.c36
-rwxr-xr-xmt/be_matmul/matmul_mi.c36
-rwxr-xr-xmt/be_vvadd/be_vvadd.c24
-rw-r--r--mt/bf_matmul/bf_matmul.c24
-rwxr-xr-xmt/bf_matmul/matmul_mi.c24
-rwxr-xr-xmt/bf_vvadd/bf_vvadd.c24
-rwxr-xr-xmt/bh_matmul/bh_matmul.c24
-rwxr-xr-xmt/bh_matmul/matmul_mi.c24
-rwxr-xr-xmt/bh_vvadd/bh_vvadd.c24
-rw-r--r--mt/bj_matmul/bj_matmul.c24
-rw-r--r--mt/bj_matmul/matmul_mi.c24
-rwxr-xr-xmt/bj_vvadd/bj_vvadd.c24
-rwxr-xr-xmt/bk_matmul/bk_matmul.c24
-rwxr-xr-xmt/bk_matmul/matmul_mi.c24
-rwxr-xr-xmt/bk_matmul/matmul_msi.c24
-rwxr-xr-xmt/bk_vvadd/bk_vvadd.c24
-rw-r--r--mt/bm_matmul/bm_matmul.c24
-rw-r--r--mt/bm_matmul/matmul_mi.c26
-rwxr-xr-xmt/bm_vvadd/bm_vvadd.c24
-rwxr-xr-xmt/bn_matmul/bn_matmul.c24
-rw-r--r--mt/bn_matmul/matmul_mi.c24
-rwxr-xr-xmt/bn_vvadd/bn_vvadd.c24
-rw-r--r--mt/bo_matmul/bo_matmul.c24
-rw-r--r--mt/bo_matmul/matmul_mi.c24
-rwxr-xr-xmt/bo_vvadd/bo_vvadd.c24
-rwxr-xr-xmt/bp_matmul/bp_matmul.c24
-rwxr-xr-xmt/bp_matmul/matmul_mi.c24
-rwxr-xr-xmt/bp_vvadd/bp_vvadd.c24
-rwxr-xr-xmt/br_matmul/br_matmul.c26
-rwxr-xr-xmt/br_matmul/matmul_mi.c26
-rwxr-xr-xmt/br_vvadd/br_vvadd.c24
-rwxr-xr-xmt/bs_matmul/bs_matmul.c24
-rw-r--r--mt/bs_matmul/matmul_mi.c24
-rwxr-xr-xmt/bs_vvadd/bs_vvadd.c24
-rwxr-xr-xmt/bt_matmul/bt_matmul.c36
-rwxr-xr-xmt/bt_matmul/matmul_mi.c36
-rwxr-xr-xmt/bt_vvadd/bt_vvadd.c24
-rwxr-xr-xmt/matmul/matmul.c24
-rw-r--r--mt/mt-matmul/mt-matmul.c24
-rw-r--r--mt/mt-vvadd/mt-vvadd.c24
119 files changed, 1483 insertions, 1486 deletions
diff --git a/mt/Makefile b/mt/Makefile
index 455f066..1d85ed3 100755
--- a/mt/Makefile
+++ b/mt/Makefile
@@ -89,11 +89,9 @@ bt_vvadd\
#--------------------------------------------------------------------
RISCV_GCC = riscv-gcc
-RISCV_GCC_OPTS = -std=gnu99 -T common/test.ld -O3 -nostdlib -nostartfiles -funroll-all-loops
-RISCV_LINK = riscv-gcc -T $(common)/test.ld
-RISCV_LINK_MT = riscv-gcc -T $(common)/test-mt.ld
-RISCV_LINK_OPTS = -lc
-RISCV_LINK_SYSCALL = -I$(bmarkdir)/../env $(common)/syscalls.c -lc
+RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -ffast-math
+RISCV_LINK = riscv-gcc -T $(common)/test.ld $(incs)
+RISCV_LINK_OPTS = -nostdlib -nostartfiles -ffast-math -lc
RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data
RISCV_SIM = spike -p2
@@ -121,8 +119,8 @@ bmarks_cycles = 80000
%.hex: %
elf2hex 16 32768 $< > $@
-$(bmarks_riscv_bin): %.riscv: %.o crt-mt.o
- $(RISCV_LINK_MT) crt-mt.o $< $(RISCV_LINK_SYSCALL) -o $@
+$(bmarks_riscv_bin): %.riscv: %.o syscalls.o crt.o
+ $(RISCV_LINK) $< syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@
$(bmarks_riscv_dump): %.riscv.dump: %.riscv
$(RISCV_OBJDUMP) $< > $@
@@ -131,7 +129,7 @@ $(bmarks_riscv_out): %.riscv.out: %.riscv
$(RISCV_SIM) $< > $@
%.o: %.c
- $(RISCV_GCC) $(RISCV_GCC_OPTS) $(bmarks_defs) \
+ $(RISCV_GCC) $(RISCV_GCC_OPTS) $(bmarks_defs) -D__ASSEMBLY__=1 \
-c $(incs) $< -o $@
%.o: %.S
diff --git a/mt/ab_matmul/ab_matmul.c b/mt/ab_matmul/ab_matmul.c
index 0cd1bf5..6530a5d 100755
--- a/mt/ab_matmul/ab_matmul.c
+++ b/mt/ab_matmul/ab_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -213,33 +213,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ab_matmul/matmul_mi.c b/mt/ab_matmul/matmul_mi.c
index 0cd1bf5..6530a5d 100755
--- a/mt/ab_matmul/matmul_mi.c
+++ b/mt/ab_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -213,33 +213,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ab_vvadd/ab_vvadd.c b/mt/ab_vvadd/ab_vvadd.c
index 47f5e18..f2c8a65 100755
--- a/mt/ab_vvadd/ab_vvadd.c
+++ b/mt/ab_vvadd/ab_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -138,12 +138,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -151,21 +151,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ad_matmul/ad_matmul.c b/mt/ad_matmul/ad_matmul.c
index 04dd7ef..da9aaec 100755
--- a/mt/ad_matmul/ad_matmul.c
+++ b/mt/ad_matmul/ad_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -163,33 +163,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ad_matmul/matmul_mi.c b/mt/ad_matmul/matmul_mi.c
index 04dd7ef..da9aaec 100755
--- a/mt/ad_matmul/matmul_mi.c
+++ b/mt/ad_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -163,33 +163,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ad_vvadd/ad_vvadd.c b/mt/ad_vvadd/ad_vvadd.c
index 2dfd2bd..4b77dc5 100755
--- a/mt/ad_vvadd/ad_vvadd.c
+++ b/mt/ad_vvadd/ad_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -142,12 +142,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -155,21 +155,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ae_matmul/ae_matmul.c b/mt/ae_matmul/ae_matmul.c
index 7d4ad80..7a2e79d 100755
--- a/mt/ae_matmul/ae_matmul.c
+++ b/mt/ae_matmul/ae_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -146,7 +146,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
}
}
}
- barrier();
+ barrier(ncores);
for ( i = 0; i < lda; i+=4 ) {
for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) {
@@ -229,34 +229,34 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ae_matmul/matmul_mi.c b/mt/ae_matmul/matmul_mi.c
index 5062141..cf464f4 100755
--- a/mt/ae_matmul/matmul_mi.c
+++ b/mt/ae_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -146,7 +146,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
}
}
}
- barrier();
+ barrier(nc);
for ( int x = 0; x < ncores; x++) {
//split the i values into two chunks so the threads don't interfere on the B loads
@@ -277,34 +277,34 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ae_vvadd/ae_vvadd.c b/mt/ae_vvadd/ae_vvadd.c
index 0e6541b..b1d336b 100755
--- a/mt/ae_vvadd/ae_vvadd.c
+++ b/mt/ae_vvadd/ae_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -144,12 +144,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -157,21 +157,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/Ronald.c b/mt/af_matmul/Ronald.c
index 31ea15d..796dbeb 100644
--- a/mt/af_matmul/Ronald.c
+++ b/mt/af_matmul/Ronald.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -214,33 +214,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/af_matmul.c b/mt/af_matmul/af_matmul.c
index c2d72ab..4de06dd 100755
--- a/mt/af_matmul/af_matmul.c
+++ b/mt/af_matmul/af_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -205,33 +205,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/bestattemptthusfar.c b/mt/af_matmul/bestattemptthusfar.c
index ab8e7c1..8ca604d 100644
--- a/mt/af_matmul/bestattemptthusfar.c
+++ b/mt/af_matmul/bestattemptthusfar.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -180,33 +180,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/bestattemptthusfar2.c b/mt/af_matmul/bestattemptthusfar2.c
index a35d302..3264360 100644
--- a/mt/af_matmul/bestattemptthusfar2.c
+++ b/mt/af_matmul/bestattemptthusfar2.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -206,33 +206,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/failedattempt.c b/mt/af_matmul/failedattempt.c
index acd4a12..f56168e 100644
--- a/mt/af_matmul/failedattempt.c
+++ b/mt/af_matmul/failedattempt.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -266,33 +266,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/failedattempt2.c b/mt/af_matmul/failedattempt2.c
index 0493998..657c23d 100644
--- a/mt/af_matmul/failedattempt2.c
+++ b/mt/af_matmul/failedattempt2.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -197,33 +197,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/keeptrying.c b/mt/af_matmul/keeptrying.c
index ebfce6c..501e7fc 100644
--- a/mt/af_matmul/keeptrying.c
+++ b/mt/af_matmul/keeptrying.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -219,33 +219,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/keeptrying2.c b/mt/af_matmul/keeptrying2.c
index ad2ff41..5251cf0 100644
--- a/mt/af_matmul/keeptrying2.c
+++ b/mt/af_matmul/keeptrying2.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -222,33 +222,33 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/keeptrying3.c b/mt/af_matmul/keeptrying3.c
index 9c28faa..627a2d6 100644
--- a/mt/af_matmul/keeptrying3.c
+++ b/mt/af_matmul/keeptrying3.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -221,33 +221,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_matmul/matmul_mi.c b/mt/af_matmul/matmul_mi.c
index 74a43f3..3190c8e 100644
--- a/mt/af_matmul/matmul_mi.c
+++ b/mt/af_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -218,33 +218,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/af_vvadd/af_vvadd.c b/mt/af_vvadd/af_vvadd.c
index 7f7bc7a..5c6b1e9 100755
--- a/mt/af_vvadd/af_vvadd.c
+++ b/mt/af_vvadd/af_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -144,12 +144,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -157,21 +157,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ag_matmul/ag_matmul.c b/mt/ag_matmul/ag_matmul.c
index 9782d78..bd470eb 100755
--- a/mt/ag_matmul/ag_matmul.c
+++ b/mt/ag_matmul/ag_matmul.c
@@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -176,7 +176,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(j+3)*lda + i + 1] += sum;
}
- barrier();
+ barrier(ncores);
}
}
}
@@ -197,33 +197,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ag_matmul/matmul_mi.c b/mt/ag_matmul/matmul_mi.c
index 9782d78..3352c56 100755
--- a/mt/ag_matmul/matmul_mi.c
+++ b/mt/ag_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -176,7 +176,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(j+3)*lda + i + 1] += sum;
}
- barrier();
+ barrier(nc);
}
}
}
@@ -197,33 +197,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ag_vvadd/ag_vvadd.c b/mt/ag_vvadd/ag_vvadd.c
index 8594c5f..51aa384 100755
--- a/mt/ag_vvadd/ag_vvadd.c
+++ b/mt/ag_vvadd/ag_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -137,12 +137,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -150,21 +150,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ai_matmul/ai_matmul.c b/mt/ai_matmul/ai_matmul.c
index e74a5d3..f9640f4 100755
--- a/mt/ai_matmul/ai_matmul.c
+++ b/mt/ai_matmul/ai_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -118,7 +118,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
//----------MSI--------------
///*
int i,j,k;
- barrier();
+ barrier(ncores);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
@@ -142,7 +142,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
//------------------MI-------------------
/*
int i,j,k;
- barrier();
+ barrier(nc);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
@@ -189,33 +189,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ai_matmul/matmul_mi.c b/mt/ai_matmul/matmul_mi.c
index bacfbfc..154569c 100755
--- a/mt/ai_matmul/matmul_mi.c
+++ b/mt/ai_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -118,7 +118,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
//----------MSI--------------
/*
int i,j,k;
- barrier();
+ barrier(nc);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
@@ -142,7 +142,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
//------------------MI-------------------
int i,j,k;
- barrier();
+ barrier(nc);
for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
for(i = 0; i < lda; i+=4) {
data_t Cval0 = 0;
@@ -188,33 +188,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ai_vvadd/ai_vvadd.c b/mt/ai_vvadd/ai_vvadd.c
index 0319126..64d1774 100755
--- a/mt/ai_vvadd/ai_vvadd.c
+++ b/mt/ai_vvadd/ai_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -136,12 +136,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -149,21 +149,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/aj_matmul/aj_matmul.c b/mt/aj_matmul/aj_matmul.c
index 2280771..445f924 100755
--- a/mt/aj_matmul/aj_matmul.c
+++ b/mt/aj_matmul/aj_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -347,33 +347,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/aj_matmul/matmul_mi.c b/mt/aj_matmul/matmul_mi.c
index 2280771..445f924 100644
--- a/mt/aj_matmul/matmul_mi.c
+++ b/mt/aj_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -347,33 +347,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/aj_vvadd/aj_vvadd.c b/mt/aj_vvadd/aj_vvadd.c
index 55d1dbc..0096209 100755
--- a/mt/aj_vvadd/aj_vvadd.c
+++ b/mt/aj_vvadd/aj_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -134,12 +134,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -147,21 +147,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ak_matmul/ak_matmul.c b/mt/ak_matmul/ak_matmul.c
index 614a81f..4d803ee 100755
--- a/mt/ak_matmul/ak_matmul.c
+++ b/mt/ak_matmul/ak_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -180,33 +180,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ak_matmul/matmulMI.c b/mt/ak_matmul/matmulMI.c
index a9068f8..e731501 100755
--- a/mt/ak_matmul/matmulMI.c
+++ b/mt/ak_matmul/matmulMI.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -179,33 +179,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ak_matmul/matmul_mi.c b/mt/ak_matmul/matmul_mi.c
index 992194d..aa41e32 100755
--- a/mt/ak_matmul/matmul_mi.c
+++ b/mt/ak_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -179,33 +179,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ak_vvadd/ak_vvadd.c b/mt/ak_vvadd/ak_vvadd.c
index a63bbe7..ee82f19 100755
--- a/mt/ak_vvadd/ak_vvadd.c
+++ b/mt/ak_vvadd/ak_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -137,12 +137,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -150,21 +150,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/al_matmul/al_matmul.c b/mt/al_matmul/al_matmul.c
index b4d2663..13bc501 100644
--- a/mt/al_matmul/al_matmul.c
+++ b/mt/al_matmul/al_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -240,33 +240,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/al_matmul/matmul_mi.c b/mt/al_matmul/matmul_mi.c
index 47b0992..c8016db 100644
--- a/mt/al_matmul/matmul_mi.c
+++ b/mt/al_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -294,33 +294,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/al_vvadd/al_vvadd.c b/mt/al_vvadd/al_vvadd.c
index 2319f5b..fd89916 100755
--- a/mt/al_vvadd/al_vvadd.c
+++ b/mt/al_vvadd/al_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -139,12 +139,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -152,21 +152,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/am_matmul/am_matmul.c b/mt/am_matmul/am_matmul.c
index 7fe737b..b00d573 100755
--- a/mt/am_matmul/am_matmul.c
+++ b/mt/am_matmul/am_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -183,33 +183,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/am_matmul/matmul3.c b/mt/am_matmul/matmul3.c
index 9a79baa..429be46 100755
--- a/mt/am_matmul/matmul3.c
+++ b/mt/am_matmul/matmul3.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -188,33 +188,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/am_matmul/matmul4.c b/mt/am_matmul/matmul4.c
index 05a1aa4..7953d59 100755
--- a/mt/am_matmul/matmul4.c
+++ b/mt/am_matmul/matmul4.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -249,33 +249,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/am_matmul/matmul_mi.c b/mt/am_matmul/matmul_mi.c
index 841a4b5..0a93ce2 100755
--- a/mt/am_matmul/matmul_mi.c
+++ b/mt/am_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -216,33 +216,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/am_matmul/matmul_msi.c b/mt/am_matmul/matmul_msi.c
index 0b59f8c..b4e5ad8 100755
--- a/mt/am_matmul/matmul_msi.c
+++ b/mt/am_matmul/matmul_msi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -183,33 +183,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/am_vvadd/am_vvadd.c b/mt/am_vvadd/am_vvadd.c
index a4681d0..af10ea9 100755
--- a/mt/am_vvadd/am_vvadd.c
+++ b/mt/am_vvadd/am_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -135,12 +135,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -148,21 +148,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/an_matmul/an_matmul.c b/mt/an_matmul/an_matmul.c
index e7608fe..2150277 100755
--- a/mt/an_matmul/an_matmul.c
+++ b/mt/an_matmul/an_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -163,33 +163,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/an_matmul/matmul_mi.c b/mt/an_matmul/matmul_mi.c
index e7608fe..2150277 100644
--- a/mt/an_matmul/matmul_mi.c
+++ b/mt/an_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -163,33 +163,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/an_vvadd/an_vvadd.c b/mt/an_vvadd/an_vvadd.c
index 497b9bb..d5868e4 100755
--- a/mt/an_vvadd/an_vvadd.c
+++ b/mt/an_vvadd/an_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -131,12 +131,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -144,21 +144,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ap_matmul/ap_matmul.c b/mt/ap_matmul/ap_matmul.c
index ae1c84c..1b31d86 100755
--- a/mt/ap_matmul/ap_matmul.c
+++ b/mt/ap_matmul/ap_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -205,33 +205,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ap_matmul/matmul_mi.c b/mt/ap_matmul/matmul_mi.c
index ae1c84c..1b31d86 100755
--- a/mt/ap_matmul/matmul_mi.c
+++ b/mt/ap_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -205,33 +205,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ap_vvadd/ap_vvadd.c b/mt/ap_vvadd/ap_vvadd.c
index fe1440b..aa3b3ad 100755
--- a/mt/ap_vvadd/ap_vvadd.c
+++ b/mt/ap_vvadd/ap_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -148,12 +148,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -161,21 +161,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/aq_matmul/aq_matmul.c b/mt/aq_matmul/aq_matmul.c
index e7a3c65..a203766 100644
--- a/mt/aq_matmul/aq_matmul.c
+++ b/mt/aq_matmul/aq_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -150,33 +150,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/aq_matmul/matmul_mi.c b/mt/aq_matmul/matmul_mi.c
index 524b13d..932e6bc 100755
--- a/mt/aq_matmul/matmul_mi.c
+++ b/mt/aq_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -150,33 +150,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/aq_vvadd/aq_vvadd.c b/mt/aq_vvadd/aq_vvadd.c
index af88a0b..375569b 100755
--- a/mt/aq_vvadd/aq_vvadd.c
+++ b/mt/aq_vvadd/aq_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -157,12 +157,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -170,21 +170,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ar_matmul/ar_matmul.c b/mt/ar_matmul/ar_matmul.c
index 22ca10b..9286adc 100755
--- a/mt/ar_matmul/ar_matmul.c
+++ b/mt/ar_matmul/ar_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -160,33 +160,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ar_matmul/matmul_mi.c b/mt/ar_matmul/matmul_mi.c
deleted file mode 120000
index bd5f2b1..0000000
--- a/mt/ar_matmul/matmul_mi.c
+++ /dev/null
@@ -1 +0,0 @@
-matmul.c \ No newline at end of file
diff --git a/mt/ar_vvadd/ar_vvadd.c b/mt/ar_vvadd/ar_vvadd.c
index eeb578c..18ad033 100755
--- a/mt/ar_vvadd/ar_vvadd.c
+++ b/mt/ar_vvadd/ar_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -136,12 +136,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -149,21 +149,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/as_matmul/as_matmul.c b/mt/as_matmul/as_matmul.c
index d98da8e..15855e0 100755
--- a/mt/as_matmul/as_matmul.c
+++ b/mt/as_matmul/as_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -119,8 +119,8 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
int i, j, k, n, m;
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 957424 cycles, 29.2 cycles/iter, 3.6 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(): 340408 cycles, 10.3 cycles/iter, 1.8 CPI
+ //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 957424 cycles, 29.2 cycles/iter, 3.6 CPI
+ //matmul(32, input1_data, input2_data, results_data); barrier(nc): 340408 cycles, 10.3 cycles/iter, 1.8 CPI
for (n = 0; n < lda; n += 1) {
for (m = 0; m < lda; m += 1) {
@@ -128,7 +128,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
bTranspose[lda*n + m] = B[lda*m + n];
}
}
- barrier();
+ barrier(ncores);
for ( j = coreid; j < lda; j += 2*ncores ) {
for ( i = 0; i < lda; i += 1 ){
@@ -138,21 +138,21 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
c1 += A[j * lda + k] * bTranspose[i*lda + k];
c2 += A[(j+2) * lda + k] * bTranspose[i*lda + k];
- //barrier();
+ //barrier(nc);
}
C[i + j * lda] = c1;
C[i + (j+2) * lda] = c2;
- barrier();
+ barrier(ncores);
}
- //barrier();
+ //barrier(nc);
}
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 983609 cycles, 30.0 cycles/iter, 3.7 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(): 389942 cycles, 11.9 cycles/iter, 2.5 CPI
+ //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 983609 cycles, 30.0 cycles/iter, 3.7 CPI
+ //matmul(32, input1_data, input2_data, results_data); barrier(nc): 389942 cycles, 11.9 cycles/iter, 2.5 CPI
/*
for ( j = coreid; j < lda; j += 2*ncores ) {
@@ -163,34 +163,34 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
c1 += A[j * lda + k] * B[k*lda + i];
c2 += A[(j+2) * lda + k] * B[k*lda + i];
- //barrier();
+ //barrier(nc);
}
C[i + j * lda] = c1;
C[i + (j+2) * lda] = c2;
- barrier();
+ barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
*/
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 973781 cycles, 29.7 cycles/iter, 3.7 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(): 461066 cycles, 14.0 cycles/iter, 3.5 CPI
+ // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 973781 cycles, 29.7 cycles/iter, 3.7 CPI
+ // matmul(32, input1_data, input2_data, results_data); barrier(nc): 461066 cycles, 14.0 cycles/iter, 3.5 CPI
// for ( k = 0; k < lda; k += 1 ) {
// for ( j = coreid; j < lda; j += 2*ncores ) {
// for ( i = 0; i < lda; i += 1 ){
// C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
// C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
- // //barrier();
+ // //barrier(nc);
// }
- // barrier();
+ // barrier(nc);
// }
- // //barrier();
+ // //barrier(nc);
// }
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 965136 cycles, 29.4 cycles/iter, 3.7 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(): 513779 cycles, 15.6 cycles/iter, 3.2 CPI
+ // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 965136 cycles, 29.4 cycles/iter, 3.7 CPI
+ // matmul(32, input1_data, input2_data, results_data); barrier(nc): 513779 cycles, 15.6 cycles/iter, 3.2 CPI
// for ( j = coreid; j < lda; j += 2*ncores ) {
// for ( i = 0; i < lda; i += 1 ){
@@ -198,16 +198,16 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
// C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
// C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
- // //barrier();
+ // //barrier(nc);
// }
- // barrier();
+ // barrier(nc);
// }
- // //barrier();
+ // //barrier(nc);
//}
- // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 937892 cycles, 28.6 cycles/iter, 3.6 CPI
- // matmul(32, input1_data, input2_data, results_data); barrier(): 576478 cycles, 17.5 cycles/iter, 3.5 CPI
+ // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 937892 cycles, 28.6 cycles/iter, 3.6 CPI
+ // matmul(32, input1_data, input2_data, results_data); barrier(nc): 576478 cycles, 17.5 cycles/iter, 3.5 CPI
// for ( i = 0; i < lda; i += 1 ){
// for ( j = coreid; j < lda; j += 2*ncores ) {
@@ -215,11 +215,11 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
// C[i + j * lda] += A[j * lda + k] * B[k*lda + i];
// C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i];
- // //barrier();
+ // //barrier(nc);
// }
- // barrier();
+ // barrier(nc);
// }
- // //barrier();
+ // //barrier(nc);
// }
//for ( i = coreid; i < lda; i += ncores ){
@@ -227,7 +227,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
// for ( k = coreid; k < lda; k += ncores ) {
// C[i + j*lda] += A[j*lda + k] * B[k*lda + i];
// }
- //barrier();
+ //barrier(nc);
// }
//}
}
@@ -248,33 +248,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/as_matmul/matmul_mi.c b/mt/as_matmul/matmul_mi.c
index 130fdb7..88534b8 100644
--- a/mt/as_matmul/matmul_mi.c
+++ b/mt/as_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -118,8 +118,8 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
int i, j, k, n, m, c1, c2;
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 952596 cycles, 29.0 cycles/iter, 3.6 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(): 570135 cycles, 17.3 cycles/iter, 3.4 CPI
+ //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 952596 cycles, 29.0 cycles/iter, 3.6 CPI
+ //matmul(32, input1_data, input2_data, results_data); barrier(nc): 570135 cycles, 17.3 cycles/iter, 3.4 CPI
for ( j = coreid; j < lda; j += 2*ncores ) {
for ( i = 0; i < lda; i += 1 ){
@@ -129,14 +129,14 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
c1 += A[j * lda + k] * B[k*lda + i];
c2 += A[(j+2) * lda + k] * B[k*lda + i];
- //barrier();
+ //barrier(nc);
}
C[i + j * lda] = c1;
C[i + (j+2) * lda] = c2;
- barrier();
+ barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
@@ -157,33 +157,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/as_vvadd/as_vvadd.c b/mt/as_vvadd/as_vvadd.c
index dd1f94b..3034ae3 100755
--- a/mt/as_vvadd/as_vvadd.c
+++ b/mt/as_vvadd/as_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -107,9 +107,9 @@ void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const
for (i = coreid; i < n; i += 2*ncores) {
x[i] = x[i] + y[i];
x[i+2] = x[i+2] + y[i+2];
- //barrier();
+ //barrier(nc);
}
- barrier(); //adding a barrier so there aren't any OOB errors due to faster threads
+ barrier(ncores); //adding a barrier so there aren't any OOB errors due to faster threads
}
@@ -140,12 +140,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -153,21 +153,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/at_matmul/at_matmul.c b/mt/at_matmul/at_matmul.c
index d69f8fe..ccda17a 100755
--- a/mt/at_matmul/at_matmul.c
+++ b/mt/at_matmul/at_matmul.c
@@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -284,33 +284,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/at_matmul/matmul_mi.c b/mt/at_matmul/matmul_mi.c
index 0c5115f..640c32d 100644
--- a/mt/at_matmul/matmul_mi.c
+++ b/mt/at_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -284,33 +284,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/at_vvadd/at_vvadd.c b/mt/at_vvadd/at_vvadd.c
index 55fb8de..b271d67 100755
--- a/mt/at_vvadd/at_vvadd.c
+++ b/mt/at_vvadd/at_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -145,12 +145,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -158,21 +158,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/av_matmul/av_matmul.c b/mt/av_matmul/av_matmul.c
index 8a28949..ad5ccdb 100644
--- a/mt/av_matmul/av_matmul.c
+++ b/mt/av_matmul/av_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -2869,33 +2869,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/av_matmul/matmul_mi.c b/mt/av_matmul/matmul_mi.c
index 4cdac76..b0ce08a 100644
--- a/mt/av_matmul/matmul_mi.c
+++ b/mt/av_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -2176,33 +2176,33 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/av_vvadd/av_vvadd.c b/mt/av_vvadd/av_vvadd.c
index 2f213d8..11202c7 100644
--- a/mt/av_vvadd/av_vvadd.c
+++ b/mt/av_vvadd/av_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -162,12 +162,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -175,21 +175,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ay_matmul/ay_matmul.c b/mt/ay_matmul/ay_matmul.c
index 2a1e04c..857a78f 100644
--- a/mt/ay_matmul/ay_matmul.c
+++ b/mt/ay_matmul/ay_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -176,33 +176,33 @@ void thread_entry(int cid, int nc)
//// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ay_matmul/matmul_mi.c b/mt/ay_matmul/matmul_mi.c
index d58c5b8..1a42e83 100644
--- a/mt/ay_matmul/matmul_mi.c
+++ b/mt/ay_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -224,33 +224,33 @@ void thread_entry(int cid, int nc)
//// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ay_vvadd/ay_vvadd.c b/mt/ay_vvadd/ay_vvadd.c
index 0455a41..2bf8da9 100755
--- a/mt/ay_vvadd/ay_vvadd.c
+++ b/mt/ay_vvadd/ay_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -141,12 +141,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -154,21 +154,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/az_matmul/az_matmul.c b/mt/az_matmul/az_matmul.c
index 56f02d3..5bfd15f 100755
--- a/mt/az_matmul/az_matmul.c
+++ b/mt/az_matmul/az_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -383,33 +383,33 @@ void thread_entry(int cid, int nc)
//// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/az_matmul/matmul_mi.c b/mt/az_matmul/matmul_mi.c
index 56f02d3..5bfd15f 100755
--- a/mt/az_matmul/matmul_mi.c
+++ b/mt/az_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -383,33 +383,33 @@ void thread_entry(int cid, int nc)
//// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/az_vvadd/az_vvadd.c b/mt/az_vvadd/az_vvadd.c
index 7b40fb1..cf32ac7 100755
--- a/mt/az_vvadd/az_vvadd.c
+++ b/mt/az_vvadd/az_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -140,12 +140,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -153,21 +153,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ba_matmul/ba_matmul.c b/mt/ba_matmul/ba_matmul.c
index da9a764..3f712c1 100755
--- a/mt/ba_matmul/ba_matmul.c
+++ b/mt/ba_matmul/ba_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -238,33 +238,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ba_matmul/matmul_mi.c b/mt/ba_matmul/matmul_mi.c
index da9a764..3f712c1 100755
--- a/mt/ba_matmul/matmul_mi.c
+++ b/mt/ba_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -238,33 +238,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ba_vvadd/ba_vvadd.c b/mt/ba_vvadd/ba_vvadd.c
index 30703df..90aec9d 100755
--- a/mt/ba_vvadd/ba_vvadd.c
+++ b/mt/ba_vvadd/ba_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -134,12 +134,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -147,21 +147,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bb_matmul/bb_matmul.c b/mt/bb_matmul/bb_matmul.c
index 067e3e3..d9b2add 100755
--- a/mt/bb_matmul/bb_matmul.c
+++ b/mt/bb_matmul/bb_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -184,7 +184,7 @@ void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const
}
}
- barrier();
+ barrier(ncores);
curhalf++;
curhalf %= ncores;
@@ -240,33 +240,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bb_matmul/matmul_mi.c b/mt/bb_matmul/matmul_mi.c
index 919e2ce..346f178 100755
--- a/mt/bb_matmul/matmul_mi.c
+++ b/mt/bb_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -184,7 +184,7 @@ void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const
}
}
- barrier();
+ barrier(nc);
curhalf++;
curhalf %= ncores;
@@ -240,33 +240,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bb_vvadd/bb_vvadd.c b/mt/bb_vvadd/bb_vvadd.c
index 327da10..664ce35 100755
--- a/mt/bb_vvadd/bb_vvadd.c
+++ b/mt/bb_vvadd/bb_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -133,12 +133,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -146,21 +146,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bc_matmul/bc_matmul.c b/mt/bc_matmul/bc_matmul.c
index 088f38f..bac98cb 100755
--- a/mt/bc_matmul/bc_matmul.c
+++ b/mt/bc_matmul/bc_matmul.c
@@ -60,7 +60,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -72,7 +72,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -254,33 +254,33 @@ void thread_entry(int cid, int nc)
// /* // Execute the provided, naive matmul */
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bc_matmul/matmul_mi.c b/mt/bc_matmul/matmul_mi.c
index 86bd562..35abdc8 100755
--- a/mt/bc_matmul/matmul_mi.c
+++ b/mt/bc_matmul/matmul_mi.c
@@ -60,7 +60,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -72,7 +72,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -172,7 +172,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
}
}
}
- /* barrier(); */
+ /* barrier(nc); */
/* kk_start= (coreid == 1 ? 0 : LDA/2); */
/* kk_end = (coreid == 1 ? LDA/2 : LDA); */
@@ -216,7 +216,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
}
- //barrier();
+ //barrier(nc);
for (jj = start; jj < end; jj += BLOCK_J) {
int kk_start= (coreid != 0 ? 0 : LDA/2), kk_end = (coreid != 0 ? LDA/2 : LDA);
for (kk = kk_start; kk < kk_end; kk += BLOCK_K) {
@@ -285,33 +285,33 @@ void thread_entry(int cid, int nc)
// /* // Execute the provided, naive matmul */
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bc_vvadd/bc_vvadd.c b/mt/bc_vvadd/bc_vvadd.c
index 50673ed..c7af6b8 100755
--- a/mt/bc_vvadd/bc_vvadd.c
+++ b/mt/bc_vvadd/bc_vvadd.c
@@ -53,7 +53,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -65,7 +65,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -138,12 +138,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -151,21 +151,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/be_matmul/be_matmul.c b/mt/be_matmul/be_matmul.c
index da4b531..e8bff41 100755
--- a/mt/be_matmul/be_matmul.c
+++ b/mt/be_matmul/be_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -227,7 +227,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
@@ -237,7 +237,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
@@ -249,15 +249,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
@@ -277,35 +277,35 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
diff --git a/mt/be_matmul/matmul_mi.c b/mt/be_matmul/matmul_mi.c
index da4b531..e8bff41 100755
--- a/mt/be_matmul/matmul_mi.c
+++ b/mt/be_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -227,7 +227,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
@@ -237,7 +237,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
@@ -249,15 +249,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
@@ -277,35 +277,35 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
diff --git a/mt/be_vvadd/be_vvadd.c b/mt/be_vvadd/be_vvadd.c
index 1090c5a..b1bf72b 100755
--- a/mt/be_vvadd/be_vvadd.c
+++ b/mt/be_vvadd/be_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -137,12 +137,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -150,21 +150,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bf_matmul/bf_matmul.c b/mt/bf_matmul/bf_matmul.c
index 0bab50c..24fa7e6 100644
--- a/mt/bf_matmul/bf_matmul.c
+++ b/mt/bf_matmul/bf_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -246,33 +246,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bf_matmul/matmul_mi.c b/mt/bf_matmul/matmul_mi.c
index 1eb4145..a063df0 100755
--- a/mt/bf_matmul/matmul_mi.c
+++ b/mt/bf_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -359,33 +359,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bf_vvadd/bf_vvadd.c b/mt/bf_vvadd/bf_vvadd.c
index 1c64793..f783ee1 100755
--- a/mt/bf_vvadd/bf_vvadd.c
+++ b/mt/bf_vvadd/bf_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -146,12 +146,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -159,21 +159,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bh_matmul/bh_matmul.c b/mt/bh_matmul/bh_matmul.c
index 990c935..a496bba 100755
--- a/mt/bh_matmul/bh_matmul.c
+++ b/mt/bh_matmul/bh_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -215,33 +215,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bh_matmul/matmul_mi.c b/mt/bh_matmul/matmul_mi.c
index 990c935..a496bba 100755
--- a/mt/bh_matmul/matmul_mi.c
+++ b/mt/bh_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -215,33 +215,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bh_vvadd/bh_vvadd.c b/mt/bh_vvadd/bh_vvadd.c
index 216f9ad..c4d06d3 100755
--- a/mt/bh_vvadd/bh_vvadd.c
+++ b/mt/bh_vvadd/bh_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -153,12 +153,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -166,21 +166,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bj_matmul/bj_matmul.c b/mt/bj_matmul/bj_matmul.c
index 5766e91..1642d10 100644
--- a/mt/bj_matmul/bj_matmul.c
+++ b/mt/bj_matmul/bj_matmul.c
@@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -215,33 +215,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bj_matmul/matmul_mi.c b/mt/bj_matmul/matmul_mi.c
index 5766e91..1642d10 100644
--- a/mt/bj_matmul/matmul_mi.c
+++ b/mt/bj_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -215,33 +215,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bj_vvadd/bj_vvadd.c b/mt/bj_vvadd/bj_vvadd.c
index 3e2ed6e..4f69c35 100755
--- a/mt/bj_vvadd/bj_vvadd.c
+++ b/mt/bj_vvadd/bj_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -135,12 +135,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -148,21 +148,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bk_matmul/bk_matmul.c b/mt/bk_matmul/bk_matmul.c
index eddbcfb..9fa22b1 100755
--- a/mt/bk_matmul/bk_matmul.c
+++ b/mt/bk_matmul/bk_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -293,33 +293,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bk_matmul/matmul_mi.c b/mt/bk_matmul/matmul_mi.c
index b1c0a39..b45071b 100755
--- a/mt/bk_matmul/matmul_mi.c
+++ b/mt/bk_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -337,33 +337,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bk_matmul/matmul_msi.c b/mt/bk_matmul/matmul_msi.c
index 5890d2f..253ea12 100755
--- a/mt/bk_matmul/matmul_msi.c
+++ b/mt/bk_matmul/matmul_msi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -293,33 +293,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bk_vvadd/bk_vvadd.c b/mt/bk_vvadd/bk_vvadd.c
index cf95374..20fe4af 100755
--- a/mt/bk_vvadd/bk_vvadd.c
+++ b/mt/bk_vvadd/bk_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -144,12 +144,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -157,21 +157,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bm_matmul/bm_matmul.c b/mt/bm_matmul/bm_matmul.c
index 3f267dc..9897d99 100644
--- a/mt/bm_matmul/bm_matmul.c
+++ b/mt/bm_matmul/bm_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -324,33 +324,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bm_matmul/matmul_mi.c b/mt/bm_matmul/matmul_mi.c
index 2471a4a..3bdd096 100644
--- a/mt/bm_matmul/matmul_mi.c
+++ b/mt/bm_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -152,7 +152,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
data_t temp2_3=0;
data_t temp3_3=0;
data_t tempB_3=0;
- barrier();
+ barrier(nc);
if (coreid!=ncores-1){
for (i=space*coreid;i<max/4*4;i+=4)
{
@@ -315,33 +315,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bm_vvadd/bm_vvadd.c b/mt/bm_vvadd/bm_vvadd.c
index d60f4ec..914c711 100755
--- a/mt/bm_vvadd/bm_vvadd.c
+++ b/mt/bm_vvadd/bm_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -160,12 +160,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -173,21 +173,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bn_matmul/bn_matmul.c b/mt/bn_matmul/bn_matmul.c
index eddbcfb..9fa22b1 100755
--- a/mt/bn_matmul/bn_matmul.c
+++ b/mt/bn_matmul/bn_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -293,33 +293,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bn_matmul/matmul_mi.c b/mt/bn_matmul/matmul_mi.c
index b1c0a39..b45071b 100644
--- a/mt/bn_matmul/matmul_mi.c
+++ b/mt/bn_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -337,33 +337,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bn_vvadd/bn_vvadd.c b/mt/bn_vvadd/bn_vvadd.c
index 143d437..6c1459e 100755
--- a/mt/bn_vvadd/bn_vvadd.c
+++ b/mt/bn_vvadd/bn_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -137,12 +137,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -150,21 +150,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bo_matmul/bo_matmul.c b/mt/bo_matmul/bo_matmul.c
index de964db..dd06110 100644
--- a/mt/bo_matmul/bo_matmul.c
+++ b/mt/bo_matmul/bo_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -308,33 +308,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bo_matmul/matmul_mi.c b/mt/bo_matmul/matmul_mi.c
index ccd3987..3ccdb62 100644
--- a/mt/bo_matmul/matmul_mi.c
+++ b/mt/bo_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -308,33 +308,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
//
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bo_vvadd/bo_vvadd.c b/mt/bo_vvadd/bo_vvadd.c
index 74b0351..efdee6c 100755
--- a/mt/bo_vvadd/bo_vvadd.c
+++ b/mt/bo_vvadd/bo_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -138,12 +138,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -151,21 +151,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bp_matmul/bp_matmul.c b/mt/bp_matmul/bp_matmul.c
index de964db..dd06110 100755
--- a/mt/bp_matmul/bp_matmul.c
+++ b/mt/bp_matmul/bp_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -308,33 +308,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bp_matmul/matmul_mi.c b/mt/bp_matmul/matmul_mi.c
index de964db..dd06110 100755
--- a/mt/bp_matmul/matmul_mi.c
+++ b/mt/bp_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -308,33 +308,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// //verify(ARRAY_SIZE, results_data, verify_data);
+// //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bp_vvadd/bp_vvadd.c b/mt/bp_vvadd/bp_vvadd.c
index 5d073cf..41d56ab 100755
--- a/mt/bp_vvadd/bp_vvadd.c
+++ b/mt/bp_vvadd/bp_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -144,12 +144,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -157,21 +157,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/br_matmul/br_matmul.c b/mt/br_matmul/br_matmul.c
index 5ca1dbe..f831ac2 100755
--- a/mt/br_matmul/br_matmul.c
+++ b/mt/br_matmul/br_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -138,7 +138,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
}
}
- barrier();
+ barrier(ncores);
// compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
for ( j2 = 0; j2 < lda; j2 += jBLOCK )
@@ -250,33 +250,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/br_matmul/matmul_mi.c b/mt/br_matmul/matmul_mi.c
index 5ca1dbe..14a0705 100755
--- a/mt/br_matmul/matmul_mi.c
+++ b/mt/br_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -138,7 +138,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1];
}
}
- barrier();
+ barrier(nc);
// compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k]
for ( j2 = 0; j2 < lda; j2 += jBLOCK )
@@ -250,33 +250,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/br_vvadd/br_vvadd.c b/mt/br_vvadd/br_vvadd.c
index b27ed64..ce0d6bb 100755
--- a/mt/br_vvadd/br_vvadd.c
+++ b/mt/br_vvadd/br_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -140,12 +140,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -153,21 +153,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bs_matmul/bs_matmul.c b/mt/bs_matmul/bs_matmul.c
index f382a42..336c083 100755
--- a/mt/bs_matmul/bs_matmul.c
+++ b/mt/bs_matmul/bs_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -151,33 +151,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bs_matmul/matmul_mi.c b/mt/bs_matmul/matmul_mi.c
index d1500d2..ec0d89b 100644
--- a/mt/bs_matmul/matmul_mi.c
+++ b/mt/bs_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -157,33 +157,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bs_vvadd/bs_vvadd.c b/mt/bs_vvadd/bs_vvadd.c
index 01d708b..af126f0 100755
--- a/mt/bs_vvadd/bs_vvadd.c
+++ b/mt/bs_vvadd/bs_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -145,12 +145,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -158,21 +158,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bt_matmul/bt_matmul.c b/mt/bt_matmul/bt_matmul.c
index 0215491..4cf652d 100755
--- a/mt/bt_matmul/bt_matmul.c
+++ b/mt/bt_matmul/bt_matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -209,7 +209,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
@@ -219,7 +219,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
@@ -231,15 +231,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
@@ -259,35 +259,35 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
diff --git a/mt/bt_matmul/matmul_mi.c b/mt/bt_matmul/matmul_mi.c
index dc9ae1b..181314e 100755
--- a/mt/bt_matmul/matmul_mi.c
+++ b/mt/bt_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -210,7 +210,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
pos_B += (lda*step_k) ;
pos_A += step_k;
}
- //barrier();
+ //barrier(nc);
C[(pos_C + 0)] = temp10;
C[(pos_C + 1)] = temp11;
@@ -220,7 +220,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp15;
C[(pos_C + 6)] = temp16;
C[(pos_C + 7)] = temp17;
- //barrier();
+ //barrier(nc);
pos_C = i + j*lda;
//pos_C -= lda;
@@ -232,15 +232,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
C[(pos_C + 5)] = temp05;
C[(pos_C + 6)] = temp06;
C[(pos_C + 7)] = temp07;
- //barrier();
+ //barrier(nc);
//pos_C += step_j * lda;
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
@@ -260,35 +260,35 @@ void thread_entry(int cid, int nc)
/*
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
*/
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
//printf("input1_data");
diff --git a/mt/bt_vvadd/bt_vvadd.c b/mt/bt_vvadd/bt_vvadd.c
index d2a01c4..7af3562 100755
--- a/mt/bt_vvadd/bt_vvadd.c
+++ b/mt/bt_vvadd/bt_vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -139,12 +139,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -152,21 +152,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/matmul/matmul.c b/mt/matmul/matmul.c
index 93f8ea9..b009d26 100755
--- a/mt/matmul/matmul.c
+++ b/mt/matmul/matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -134,33 +134,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/mt-matmul/mt-matmul.c b/mt/mt-matmul/mt-matmul.c
index 93f8ea9..b009d26 100644
--- a/mt/mt-matmul/mt-matmul.c
+++ b/mt/mt-matmul/mt-matmul.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -134,33 +134,33 @@ void thread_entry(int cid, int nc)
// Execute the provided, naive matmul
- barrier();
- stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
// clear results from the first trial
size_t i;
if (coreid == 0)
for (i=0; i < ARRAY_SIZE; i++)
results_data[i] = 0;
- barrier();
+ barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/mt-vvadd/mt-vvadd.c b/mt/mt-vvadd/mt-vvadd.c
index 497b9bb..d5868e4 100644
--- a/mt/mt-vvadd/mt-vvadd.c
+++ b/mt/mt-vvadd/mt-vvadd.c
@@ -51,7 +51,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -131,12 +131,12 @@ void thread_entry(int cid, int nc)
// Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -144,21 +144,21 @@ void thread_entry(int cid, int nc)
for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}