diff options
author | Henry Cook <hcook@eecs.berkeley.edu> | 2014-09-24 18:11:42 -0700 |
---|---|---|
committer | Henry Cook <hcook@eecs.berkeley.edu> | 2014-09-24 18:34:50 -0700 |
commit | 9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6 (patch) | |
tree | 81654ff43e33b08a117cb9b199e60dbce8b32fe5 | |
parent | dbde501592ce20c536cbc97e99d03f54f3e30294 (diff) | |
download | riscv-tests-9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6.zip riscv-tests-9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6.tar.gz riscv-tests-9f7ed92e1aeb91593ceb8baf26ff4fe0a1a9efa6.tar.bz2 |
Updated mt tests
119 files changed, 1483 insertions, 1486 deletions
diff --git a/mt/Makefile b/mt/Makefile index 455f066..1d85ed3 100755 --- a/mt/Makefile +++ b/mt/Makefile @@ -89,11 +89,9 @@ bt_vvadd\ #-------------------------------------------------------------------- RISCV_GCC = riscv-gcc -RISCV_GCC_OPTS = -std=gnu99 -T common/test.ld -O3 -nostdlib -nostartfiles -funroll-all-loops -RISCV_LINK = riscv-gcc -T $(common)/test.ld -RISCV_LINK_MT = riscv-gcc -T $(common)/test-mt.ld -RISCV_LINK_OPTS = -lc -RISCV_LINK_SYSCALL = -I$(bmarkdir)/../env $(common)/syscalls.c -lc +RISCV_GCC_OPTS = -Wa,-march=RVIMAFDXhwacha -std=gnu99 -O2 -ffast-math +RISCV_LINK = riscv-gcc -T $(common)/test.ld $(incs) +RISCV_LINK_OPTS = -nostdlib -nostartfiles -ffast-math -lc RISCV_OBJDUMP = riscv-objdump --disassemble-all --disassemble-zeroes --section=.text --section=.text.startup --section=.data RISCV_SIM = spike -p2 @@ -121,8 +119,8 @@ bmarks_cycles = 80000 %.hex: % elf2hex 16 32768 $< > $@ -$(bmarks_riscv_bin): %.riscv: %.o crt-mt.o - $(RISCV_LINK_MT) crt-mt.o $< $(RISCV_LINK_SYSCALL) -o $@ +$(bmarks_riscv_bin): %.riscv: %.o syscalls.o crt.o + $(RISCV_LINK) $< syscalls.o crt.o $(RISCV_LINK_OPTS) -o $@ $(bmarks_riscv_dump): %.riscv.dump: %.riscv $(RISCV_OBJDUMP) $< > $@ @@ -131,7 +129,7 @@ $(bmarks_riscv_out): %.riscv.out: %.riscv $(RISCV_SIM) $< > $@ %.o: %.c - $(RISCV_GCC) $(RISCV_GCC_OPTS) $(bmarks_defs) \ + $(RISCV_GCC) $(RISCV_GCC_OPTS) $(bmarks_defs) -D__ASSEMBLY__=1 \ -c $(incs) $< -o $@ %.o: %.S diff --git a/mt/ab_matmul/ab_matmul.c b/mt/ab_matmul/ab_matmul.c index 0cd1bf5..6530a5d 100755 --- a/mt/ab_matmul/ab_matmul.c +++ b/mt/ab_matmul/ab_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -213,33 +213,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ab_matmul/matmul_mi.c b/mt/ab_matmul/matmul_mi.c index 0cd1bf5..6530a5d 100755 --- a/mt/ab_matmul/matmul_mi.c +++ b/mt/ab_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -213,33 +213,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ab_vvadd/ab_vvadd.c b/mt/ab_vvadd/ab_vvadd.c index 47f5e18..f2c8a65 100755 --- a/mt/ab_vvadd/ab_vvadd.c +++ b/mt/ab_vvadd/ab_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -138,12 +138,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -151,21 +151,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ad_matmul/ad_matmul.c b/mt/ad_matmul/ad_matmul.c index 04dd7ef..da9aaec 100755 --- a/mt/ad_matmul/ad_matmul.c +++ b/mt/ad_matmul/ad_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -163,33 +163,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ad_matmul/matmul_mi.c b/mt/ad_matmul/matmul_mi.c index 04dd7ef..da9aaec 100755 --- a/mt/ad_matmul/matmul_mi.c +++ b/mt/ad_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -163,33 +163,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ad_vvadd/ad_vvadd.c b/mt/ad_vvadd/ad_vvadd.c index 2dfd2bd..4b77dc5 100755 --- a/mt/ad_vvadd/ad_vvadd.c +++ b/mt/ad_vvadd/ad_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -142,12 +142,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -155,21 +155,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ae_matmul/ae_matmul.c b/mt/ae_matmul/ae_matmul.c index 7d4ad80..7a2e79d 100755 --- a/mt/ae_matmul/ae_matmul.c +++ b/mt/ae_matmul/ae_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -146,7 +146,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da } } } - barrier(); + barrier(ncores); for ( i = 0; i < lda; i+=4 ) { for ( j = coreid*(lda/ncores); j < (coreid+1)*(lda/ncores); j++ ) { @@ -229,34 +229,34 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ae_matmul/matmul_mi.c b/mt/ae_matmul/matmul_mi.c index 5062141..cf464f4 100755 --- a/mt/ae_matmul/matmul_mi.c +++ b/mt/ae_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -146,7 +146,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da } } } - barrier(); + barrier(nc); for ( int x = 0; x < ncores; x++) { //split the i values into two chunks so the threads don't interfere on the B loads @@ -277,34 +277,34 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ae_vvadd/ae_vvadd.c b/mt/ae_vvadd/ae_vvadd.c index 0e6541b..b1d336b 100755 --- a/mt/ae_vvadd/ae_vvadd.c +++ b/mt/ae_vvadd/ae_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -144,12 +144,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -157,21 +157,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/Ronald.c b/mt/af_matmul/Ronald.c index 31ea15d..796dbeb 100644 --- a/mt/af_matmul/Ronald.c +++ b/mt/af_matmul/Ronald.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -214,33 +214,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/af_matmul.c b/mt/af_matmul/af_matmul.c index c2d72ab..4de06dd 100755 --- a/mt/af_matmul/af_matmul.c +++ b/mt/af_matmul/af_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -205,33 +205,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/bestattemptthusfar.c b/mt/af_matmul/bestattemptthusfar.c index ab8e7c1..8ca604d 100644 --- a/mt/af_matmul/bestattemptthusfar.c +++ b/mt/af_matmul/bestattemptthusfar.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -180,33 +180,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/bestattemptthusfar2.c b/mt/af_matmul/bestattemptthusfar2.c index a35d302..3264360 100644 --- a/mt/af_matmul/bestattemptthusfar2.c +++ b/mt/af_matmul/bestattemptthusfar2.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -206,33 +206,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/failedattempt.c b/mt/af_matmul/failedattempt.c index acd4a12..f56168e 100644 --- a/mt/af_matmul/failedattempt.c +++ b/mt/af_matmul/failedattempt.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -266,33 +266,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/failedattempt2.c b/mt/af_matmul/failedattempt2.c index 0493998..657c23d 100644 --- a/mt/af_matmul/failedattempt2.c +++ b/mt/af_matmul/failedattempt2.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -197,33 +197,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/keeptrying.c b/mt/af_matmul/keeptrying.c index ebfce6c..501e7fc 100644 --- a/mt/af_matmul/keeptrying.c +++ b/mt/af_matmul/keeptrying.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -219,33 +219,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/keeptrying2.c b/mt/af_matmul/keeptrying2.c index ad2ff41..5251cf0 100644 --- a/mt/af_matmul/keeptrying2.c +++ b/mt/af_matmul/keeptrying2.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -222,33 +222,33 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/keeptrying3.c b/mt/af_matmul/keeptrying3.c index 9c28faa..627a2d6 100644 --- a/mt/af_matmul/keeptrying3.c +++ b/mt/af_matmul/keeptrying3.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -221,33 +221,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_matmul/matmul_mi.c b/mt/af_matmul/matmul_mi.c index 74a43f3..3190c8e 100644 --- a/mt/af_matmul/matmul_mi.c +++ b/mt/af_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -218,33 +218,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/af_vvadd/af_vvadd.c b/mt/af_vvadd/af_vvadd.c index 7f7bc7a..5c6b1e9 100755 --- a/mt/af_vvadd/af_vvadd.c +++ b/mt/af_vvadd/af_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -144,12 +144,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -157,21 +157,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ag_matmul/ag_matmul.c b/mt/ag_matmul/ag_matmul.c index 9782d78..bd470eb 100755 --- a/mt/ag_matmul/ag_matmul.c +++ b/mt/ag_matmul/ag_matmul.c @@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_ //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -176,7 +176,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(j+3)*lda + i + 1] += sum; } - barrier(); + barrier(ncores); } } } @@ -197,33 +197,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ag_matmul/matmul_mi.c b/mt/ag_matmul/matmul_mi.c index 9782d78..3352c56 100755 --- a/mt/ag_matmul/matmul_mi.c +++ b/mt/ag_matmul/matmul_mi.c @@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_ //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -176,7 +176,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(j+3)*lda + i + 1] += sum; } - barrier(); + barrier(nc); } } } @@ -197,33 +197,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ag_vvadd/ag_vvadd.c b/mt/ag_vvadd/ag_vvadd.c index 8594c5f..51aa384 100755 --- a/mt/ag_vvadd/ag_vvadd.c +++ b/mt/ag_vvadd/ag_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -137,12 +137,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -150,21 +150,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ai_matmul/ai_matmul.c b/mt/ai_matmul/ai_matmul.c index e74a5d3..f9640f4 100755 --- a/mt/ai_matmul/ai_matmul.c +++ b/mt/ai_matmul/ai_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -118,7 +118,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da //----------MSI-------------- ///* int i,j,k; - barrier(); + barrier(ncores); for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { for(i = 0; i < lda; i+=4) { data_t Cval0 = 0; @@ -142,7 +142,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da //------------------MI------------------- /* int i,j,k; - barrier(); + barrier(nc); for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { for(i = 0; i < lda; i+=4) { data_t Cval0 = 0; @@ -189,33 +189,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ai_matmul/matmul_mi.c b/mt/ai_matmul/matmul_mi.c index bacfbfc..154569c 100755 --- a/mt/ai_matmul/matmul_mi.c +++ b/mt/ai_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -118,7 +118,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da //----------MSI-------------- /* int i,j,k; - barrier(); + barrier(nc); for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { for(i = 0; i < lda; i+=4) { data_t Cval0 = 0; @@ -142,7 +142,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da //------------------MI------------------- int i,j,k; - barrier(); + barrier(nc); for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) { for(i = 0; i < lda; i+=4) { data_t Cval0 = 0; @@ -188,33 +188,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ai_vvadd/ai_vvadd.c b/mt/ai_vvadd/ai_vvadd.c index 0319126..64d1774 100755 --- a/mt/ai_vvadd/ai_vvadd.c +++ b/mt/ai_vvadd/ai_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -136,12 +136,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -149,21 +149,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/aj_matmul/aj_matmul.c b/mt/aj_matmul/aj_matmul.c index 2280771..445f924 100755 --- a/mt/aj_matmul/aj_matmul.c +++ b/mt/aj_matmul/aj_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -347,33 +347,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/aj_matmul/matmul_mi.c b/mt/aj_matmul/matmul_mi.c index 2280771..445f924 100644 --- a/mt/aj_matmul/matmul_mi.c +++ b/mt/aj_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -347,33 +347,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/aj_vvadd/aj_vvadd.c b/mt/aj_vvadd/aj_vvadd.c index 55d1dbc..0096209 100755 --- a/mt/aj_vvadd/aj_vvadd.c +++ b/mt/aj_vvadd/aj_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -134,12 +134,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -147,21 +147,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ak_matmul/ak_matmul.c b/mt/ak_matmul/ak_matmul.c index 614a81f..4d803ee 100755 --- a/mt/ak_matmul/ak_matmul.c +++ b/mt/ak_matmul/ak_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -180,33 +180,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ak_matmul/matmulMI.c b/mt/ak_matmul/matmulMI.c index a9068f8..e731501 100755 --- a/mt/ak_matmul/matmulMI.c +++ b/mt/ak_matmul/matmulMI.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -179,33 +179,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ak_matmul/matmul_mi.c b/mt/ak_matmul/matmul_mi.c index 992194d..aa41e32 100755 --- a/mt/ak_matmul/matmul_mi.c +++ b/mt/ak_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -179,33 +179,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ak_vvadd/ak_vvadd.c b/mt/ak_vvadd/ak_vvadd.c index a63bbe7..ee82f19 100755 --- a/mt/ak_vvadd/ak_vvadd.c +++ b/mt/ak_vvadd/ak_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -137,12 +137,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -150,21 +150,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/al_matmul/al_matmul.c b/mt/al_matmul/al_matmul.c index b4d2663..13bc501 100644 --- a/mt/al_matmul/al_matmul.c +++ b/mt/al_matmul/al_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -240,33 +240,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/al_matmul/matmul_mi.c b/mt/al_matmul/matmul_mi.c index 47b0992..c8016db 100644 --- a/mt/al_matmul/matmul_mi.c +++ b/mt/al_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -294,33 +294,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/al_vvadd/al_vvadd.c b/mt/al_vvadd/al_vvadd.c index 2319f5b..fd89916 100755 --- a/mt/al_vvadd/al_vvadd.c +++ b/mt/al_vvadd/al_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -139,12 +139,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -152,21 +152,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/am_matmul/am_matmul.c b/mt/am_matmul/am_matmul.c index 7fe737b..b00d573 100755 --- a/mt/am_matmul/am_matmul.c +++ b/mt/am_matmul/am_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -183,33 +183,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/am_matmul/matmul3.c b/mt/am_matmul/matmul3.c index 9a79baa..429be46 100755 --- a/mt/am_matmul/matmul3.c +++ b/mt/am_matmul/matmul3.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -188,33 +188,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/am_matmul/matmul4.c b/mt/am_matmul/matmul4.c index 05a1aa4..7953d59 100755 --- a/mt/am_matmul/matmul4.c +++ b/mt/am_matmul/matmul4.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -249,33 +249,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/am_matmul/matmul_mi.c b/mt/am_matmul/matmul_mi.c index 841a4b5..0a93ce2 100755 --- a/mt/am_matmul/matmul_mi.c +++ b/mt/am_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -216,33 +216,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/am_matmul/matmul_msi.c b/mt/am_matmul/matmul_msi.c index 0b59f8c..b4e5ad8 100755 --- a/mt/am_matmul/matmul_msi.c +++ b/mt/am_matmul/matmul_msi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -183,33 +183,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/am_vvadd/am_vvadd.c b/mt/am_vvadd/am_vvadd.c index a4681d0..af10ea9 100755 --- a/mt/am_vvadd/am_vvadd.c +++ b/mt/am_vvadd/am_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -135,12 +135,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -148,21 +148,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/an_matmul/an_matmul.c b/mt/an_matmul/an_matmul.c index e7608fe..2150277 100755 --- a/mt/an_matmul/an_matmul.c +++ b/mt/an_matmul/an_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -163,33 +163,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/an_matmul/matmul_mi.c b/mt/an_matmul/matmul_mi.c index e7608fe..2150277 100644 --- a/mt/an_matmul/matmul_mi.c +++ b/mt/an_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -163,33 +163,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/an_vvadd/an_vvadd.c b/mt/an_vvadd/an_vvadd.c index 497b9bb..d5868e4 100755 --- a/mt/an_vvadd/an_vvadd.c +++ b/mt/an_vvadd/an_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -131,12 +131,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -144,21 +144,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ap_matmul/ap_matmul.c b/mt/ap_matmul/ap_matmul.c index ae1c84c..1b31d86 100755 --- a/mt/ap_matmul/ap_matmul.c +++ b/mt/ap_matmul/ap_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -205,33 +205,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ap_matmul/matmul_mi.c b/mt/ap_matmul/matmul_mi.c index ae1c84c..1b31d86 100755 --- a/mt/ap_matmul/matmul_mi.c +++ b/mt/ap_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -205,33 +205,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ap_vvadd/ap_vvadd.c b/mt/ap_vvadd/ap_vvadd.c index fe1440b..aa3b3ad 100755 --- a/mt/ap_vvadd/ap_vvadd.c +++ b/mt/ap_vvadd/ap_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -148,12 +148,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -161,21 +161,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/aq_matmul/aq_matmul.c b/mt/aq_matmul/aq_matmul.c index e7a3c65..a203766 100644 --- a/mt/aq_matmul/aq_matmul.c +++ b/mt/aq_matmul/aq_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -150,33 +150,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/aq_matmul/matmul_mi.c b/mt/aq_matmul/matmul_mi.c index 524b13d..932e6bc 100755 --- a/mt/aq_matmul/matmul_mi.c +++ b/mt/aq_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -150,33 +150,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/aq_vvadd/aq_vvadd.c b/mt/aq_vvadd/aq_vvadd.c index af88a0b..375569b 100755 --- a/mt/aq_vvadd/aq_vvadd.c +++ b/mt/aq_vvadd/aq_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -157,12 +157,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -170,21 +170,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ar_matmul/ar_matmul.c b/mt/ar_matmul/ar_matmul.c index 22ca10b..9286adc 100755 --- a/mt/ar_matmul/ar_matmul.c +++ b/mt/ar_matmul/ar_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -160,33 +160,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ar_matmul/matmul_mi.c b/mt/ar_matmul/matmul_mi.c deleted file mode 120000 index bd5f2b1..0000000 --- a/mt/ar_matmul/matmul_mi.c +++ /dev/null @@ -1 +0,0 @@ -matmul.c
\ No newline at end of file diff --git a/mt/ar_vvadd/ar_vvadd.c b/mt/ar_vvadd/ar_vvadd.c index eeb578c..18ad033 100755 --- a/mt/ar_vvadd/ar_vvadd.c +++ b/mt/ar_vvadd/ar_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -136,12 +136,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -149,21 +149,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/as_matmul/as_matmul.c b/mt/as_matmul/as_matmul.c index d98da8e..15855e0 100755 --- a/mt/as_matmul/as_matmul.c +++ b/mt/as_matmul/as_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -119,8 +119,8 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da int i, j, k, n, m; - //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 957424 cycles, 29.2 cycles/iter, 3.6 CPI - //matmul(32, input1_data, input2_data, results_data); barrier(): 340408 cycles, 10.3 cycles/iter, 1.8 CPI + //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 957424 cycles, 29.2 cycles/iter, 3.6 CPI + //matmul(32, input1_data, input2_data, results_data); barrier(nc): 340408 cycles, 10.3 cycles/iter, 1.8 CPI for (n = 0; n < lda; n += 1) { for (m = 0; m < lda; m += 1) { @@ -128,7 +128,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da bTranspose[lda*n + m] = B[lda*m + n]; } } - barrier(); + barrier(ncores); for ( j = coreid; j < lda; j += 2*ncores ) { for ( i = 0; i < lda; i += 1 ){ @@ -138,21 +138,21 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da c1 += A[j * lda + k] * bTranspose[i*lda + k]; c2 += A[(j+2) * lda + k] * bTranspose[i*lda + k]; - //barrier(); + //barrier(nc); } C[i + j * lda] = c1; C[i + (j+2) * lda] = c2; - barrier(); + barrier(ncores); } - //barrier(); + //barrier(nc); } - //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 983609 cycles, 30.0 cycles/iter, 3.7 CPI - //matmul(32, input1_data, input2_data, results_data); barrier(): 389942 cycles, 11.9 cycles/iter, 2.5 CPI + //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 983609 cycles, 30.0 cycles/iter, 3.7 CPI + //matmul(32, input1_data, input2_data, results_data); barrier(nc): 389942 cycles, 11.9 cycles/iter, 2.5 CPI /* for ( j = coreid; j < lda; j += 2*ncores ) { @@ -163,34 +163,34 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da c1 += A[j * lda + k] * B[k*lda + i]; c2 += A[(j+2) * lda + k] * B[k*lda + i]; - //barrier(); + //barrier(nc); } C[i + j * lda] = c1; C[i + (j+2) * lda] = c2; - barrier(); + barrier(nc); } - //barrier(); + //barrier(nc); } */ - // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 973781 cycles, 29.7 cycles/iter, 3.7 CPI - // matmul(32, input1_data, input2_data, results_data); barrier(): 461066 cycles, 14.0 cycles/iter, 3.5 CPI + // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 973781 cycles, 29.7 cycles/iter, 3.7 CPI + // matmul(32, input1_data, input2_data, results_data); barrier(nc): 461066 cycles, 14.0 cycles/iter, 3.5 CPI // for ( k = 0; k < lda; k += 1 ) { // for ( j = coreid; j < lda; j += 2*ncores ) { // for ( i = 0; i < lda; i += 1 ){ // C[i + j * lda] += A[j * lda + k] * B[k*lda + i]; // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i]; - // //barrier(); + // //barrier(nc); // } - // barrier(); + // barrier(nc); // } - // //barrier(); + // //barrier(nc); // } - // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 965136 cycles, 29.4 cycles/iter, 3.7 CPI - // matmul(32, input1_data, input2_data, results_data); barrier(): 513779 cycles, 15.6 cycles/iter, 3.2 CPI + // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 965136 cycles, 29.4 cycles/iter, 3.7 CPI + // matmul(32, input1_data, input2_data, results_data); barrier(nc): 513779 cycles, 15.6 cycles/iter, 3.2 CPI // for ( j = coreid; j < lda; j += 2*ncores ) { // for ( i = 0; i < lda; i += 1 ){ @@ -198,16 +198,16 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da // C[i + j * lda] += A[j * lda + k] * B[k*lda + i]; // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i]; - // //barrier(); + // //barrier(nc); // } - // barrier(); + // barrier(nc); // } - // //barrier(); + // //barrier(nc); //} - // matmul_naive(32, input1_data, input2_data, results_data); barrier(): 937892 cycles, 28.6 cycles/iter, 3.6 CPI - // matmul(32, input1_data, input2_data, results_data); barrier(): 576478 cycles, 17.5 cycles/iter, 3.5 CPI + // matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 937892 cycles, 28.6 cycles/iter, 3.6 CPI + // matmul(32, input1_data, input2_data, results_data); barrier(nc): 576478 cycles, 17.5 cycles/iter, 3.5 CPI // for ( i = 0; i < lda; i += 1 ){ // for ( j = coreid; j < lda; j += 2*ncores ) { @@ -215,11 +215,11 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da // C[i + j * lda] += A[j * lda + k] * B[k*lda + i]; // C[i + (j+2) * lda] += A[(j+2) * lda + k] * B[k*lda + i]; - // //barrier(); + // //barrier(nc); // } - // barrier(); + // barrier(nc); // } - // //barrier(); + // //barrier(nc); // } //for ( i = coreid; i < lda; i += ncores ){ @@ -227,7 +227,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da // for ( k = coreid; k < lda; k += ncores ) { // C[i + j*lda] += A[j*lda + k] * B[k*lda + i]; // } - //barrier(); + //barrier(nc); // } //} } @@ -248,33 +248,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/as_matmul/matmul_mi.c b/mt/as_matmul/matmul_mi.c index 130fdb7..88534b8 100644 --- a/mt/as_matmul/matmul_mi.c +++ b/mt/as_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -118,8 +118,8 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da int i, j, k, n, m, c1, c2; - //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 952596 cycles, 29.0 cycles/iter, 3.6 CPI - //matmul(32, input1_data, input2_data, results_data); barrier(): 570135 cycles, 17.3 cycles/iter, 3.4 CPI + //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 952596 cycles, 29.0 cycles/iter, 3.6 CPI + //matmul(32, input1_data, input2_data, results_data); barrier(nc): 570135 cycles, 17.3 cycles/iter, 3.4 CPI for ( j = coreid; j < lda; j += 2*ncores ) { for ( i = 0; i < lda; i += 1 ){ @@ -129,14 +129,14 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da c1 += A[j * lda + k] * B[k*lda + i]; c2 += A[(j+2) * lda + k] * B[k*lda + i]; - //barrier(); + //barrier(nc); } C[i + j * lda] = c1; C[i + (j+2) * lda] = c2; - barrier(); + barrier(nc); } - //barrier(); + //barrier(nc); } } @@ -157,33 +157,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/as_vvadd/as_vvadd.c b/mt/as_vvadd/as_vvadd.c index dd1f94b..3034ae3 100755 --- a/mt/as_vvadd/as_vvadd.c +++ b/mt/as_vvadd/as_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -107,9 +107,9 @@ void __attribute__((noinline)) vvadd_opt(size_t n, data_t* __restrict__ x, const for (i = coreid; i < n; i += 2*ncores) { x[i] = x[i] + y[i]; x[i+2] = x[i+2] + y[i+2]; - //barrier(); + //barrier(nc); } - barrier(); //adding a barrier so there aren't any OOB errors due to faster threads + barrier(ncores); //adding a barrier so there aren't any OOB errors due to faster threads } @@ -140,12 +140,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -153,21 +153,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/at_matmul/at_matmul.c b/mt/at_matmul/at_matmul.c index d69f8fe..ccda17a 100755 --- a/mt/at_matmul/at_matmul.c +++ b/mt/at_matmul/at_matmul.c @@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_ //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -284,33 +284,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/at_matmul/matmul_mi.c b/mt/at_matmul/matmul_mi.c index 0c5115f..640c32d 100644 --- a/mt/at_matmul/matmul_mi.c +++ b/mt/at_matmul/matmul_mi.c @@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_ //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -284,33 +284,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/at_vvadd/at_vvadd.c b/mt/at_vvadd/at_vvadd.c index 55fb8de..b271d67 100755 --- a/mt/at_vvadd/at_vvadd.c +++ b/mt/at_vvadd/at_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -145,12 +145,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -158,21 +158,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/av_matmul/av_matmul.c b/mt/av_matmul/av_matmul.c index 8a28949..ad5ccdb 100644 --- a/mt/av_matmul/av_matmul.c +++ b/mt/av_matmul/av_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -2869,33 +2869,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/av_matmul/matmul_mi.c b/mt/av_matmul/matmul_mi.c index 4cdac76..b0ce08a 100644 --- a/mt/av_matmul/matmul_mi.c +++ b/mt/av_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -2176,33 +2176,33 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/av_vvadd/av_vvadd.c b/mt/av_vvadd/av_vvadd.c index 2f213d8..11202c7 100644 --- a/mt/av_vvadd/av_vvadd.c +++ b/mt/av_vvadd/av_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -162,12 +162,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -175,21 +175,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ay_matmul/ay_matmul.c b/mt/ay_matmul/ay_matmul.c index 2a1e04c..857a78f 100644 --- a/mt/ay_matmul/ay_matmul.c +++ b/mt/ay_matmul/ay_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -176,33 +176,33 @@ void thread_entry(int cid, int nc) //// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ay_matmul/matmul_mi.c b/mt/ay_matmul/matmul_mi.c index d58c5b8..1a42e83 100644 --- a/mt/ay_matmul/matmul_mi.c +++ b/mt/ay_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -224,33 +224,33 @@ void thread_entry(int cid, int nc) //// Execute the provided, naive matmul
- //barrier();
- //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ //barrier(nc);
+ //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//// verify
- //verify(ARRAY_SIZE, results_data, verify_data);
+ //verifyMT(ARRAY_SIZE, results_data, verify_data);
//
//// clear results from the first trial
//size_t i;
//if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
- //barrier();
+ //barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/ay_vvadd/ay_vvadd.c b/mt/ay_vvadd/ay_vvadd.c index 0455a41..2bf8da9 100755 --- a/mt/ay_vvadd/ay_vvadd.c +++ b/mt/ay_vvadd/ay_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -141,12 +141,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -154,21 +154,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/az_matmul/az_matmul.c b/mt/az_matmul/az_matmul.c index 56f02d3..5bfd15f 100755 --- a/mt/az_matmul/az_matmul.c +++ b/mt/az_matmul/az_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -383,33 +383,33 @@ void thread_entry(int cid, int nc) //// Execute the provided, naive matmul - //barrier(); - //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + //barrier(nc); + //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // //// verify - //verify(ARRAY_SIZE, results_data, verify_data); + //verifyMT(ARRAY_SIZE, results_data, verify_data); // //// clear results from the first trial //size_t i; //if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; - //barrier(); + //barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/az_matmul/matmul_mi.c b/mt/az_matmul/matmul_mi.c index 56f02d3..5bfd15f 100755 --- a/mt/az_matmul/matmul_mi.c +++ b/mt/az_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -383,33 +383,33 @@ void thread_entry(int cid, int nc) //// Execute the provided, naive matmul - //barrier(); - //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + //barrier(nc); + //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // //// verify - //verify(ARRAY_SIZE, results_data, verify_data); + //verifyMT(ARRAY_SIZE, results_data, verify_data); // //// clear results from the first trial //size_t i; //if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; - //barrier(); + //barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/az_vvadd/az_vvadd.c b/mt/az_vvadd/az_vvadd.c index 7b40fb1..cf32ac7 100755 --- a/mt/az_vvadd/az_vvadd.c +++ b/mt/az_vvadd/az_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -140,12 +140,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -153,21 +153,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ba_matmul/ba_matmul.c b/mt/ba_matmul/ba_matmul.c index da9a764..3f712c1 100755 --- a/mt/ba_matmul/ba_matmul.c +++ b/mt/ba_matmul/ba_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -238,33 +238,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ba_matmul/matmul_mi.c b/mt/ba_matmul/matmul_mi.c index da9a764..3f712c1 100755 --- a/mt/ba_matmul/matmul_mi.c +++ b/mt/ba_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -238,33 +238,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/ba_vvadd/ba_vvadd.c b/mt/ba_vvadd/ba_vvadd.c index 30703df..90aec9d 100755 --- a/mt/ba_vvadd/ba_vvadd.c +++ b/mt/ba_vvadd/ba_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -134,12 +134,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -147,21 +147,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bb_matmul/bb_matmul.c b/mt/bb_matmul/bb_matmul.c index 067e3e3..d9b2add 100755 --- a/mt/bb_matmul/bb_matmul.c +++ b/mt/bb_matmul/bb_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -184,7 +184,7 @@ void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const } } - barrier(); + barrier(ncores); curhalf++; curhalf %= ncores; @@ -240,33 +240,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bb_matmul/matmul_mi.c b/mt/bb_matmul/matmul_mi.c index 919e2ce..346f178 100755 --- a/mt/bb_matmul/matmul_mi.c +++ b/mt/bb_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -184,7 +184,7 @@ void __attribute__((noinline)) matmul_mi(const int lda, const data_t A[], const } } - barrier(); + barrier(nc); curhalf++; curhalf %= ncores; @@ -240,33 +240,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bb_vvadd/bb_vvadd.c b/mt/bb_vvadd/bb_vvadd.c index 327da10..664ce35 100755 --- a/mt/bb_vvadd/bb_vvadd.c +++ b/mt/bb_vvadd/bb_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -133,12 +133,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -146,21 +146,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bc_matmul/bc_matmul.c b/mt/bc_matmul/bc_matmul.c index 088f38f..bac98cb 100755 --- a/mt/bc_matmul/bc_matmul.c +++ b/mt/bc_matmul/bc_matmul.c @@ -60,7 +60,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -72,7 +72,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -254,33 +254,33 @@ void thread_entry(int cid, int nc) // /* // Execute the provided, naive matmul */ -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bc_matmul/matmul_mi.c b/mt/bc_matmul/matmul_mi.c index 86bd562..35abdc8 100755 --- a/mt/bc_matmul/matmul_mi.c +++ b/mt/bc_matmul/matmul_mi.c @@ -60,7 +60,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -72,7 +72,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -172,7 +172,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da } } } - /* barrier(); */ + /* barrier(nc); */ /* kk_start= (coreid == 1 ? 0 : LDA/2); */ /* kk_end = (coreid == 1 ? LDA/2 : LDA); */ @@ -216,7 +216,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da } - //barrier(); + //barrier(nc); for (jj = start; jj < end; jj += BLOCK_J) { int kk_start= (coreid != 0 ? 0 : LDA/2), kk_end = (coreid != 0 ? LDA/2 : LDA); for (kk = kk_start; kk < kk_end; kk += BLOCK_K) { @@ -285,33 +285,33 @@ void thread_entry(int cid, int nc) // /* // Execute the provided, naive matmul */ -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bc_vvadd/bc_vvadd.c b/mt/bc_vvadd/bc_vvadd.c index 50673ed..c7af6b8 100755 --- a/mt/bc_vvadd/bc_vvadd.c +++ b/mt/bc_vvadd/bc_vvadd.c @@ -53,7 +53,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -65,7 +65,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -138,12 +138,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -151,21 +151,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/be_matmul/be_matmul.c b/mt/be_matmul/be_matmul.c index da4b531..e8bff41 100755 --- a/mt/be_matmul/be_matmul.c +++ b/mt/be_matmul/be_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -227,7 +227,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da pos_B += (lda*step_k) ; pos_A += step_k; } - //barrier(); + //barrier(nc); C[(pos_C + 0)] = temp10; C[(pos_C + 1)] = temp11; @@ -237,7 +237,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp15; C[(pos_C + 6)] = temp16; C[(pos_C + 7)] = temp17; - //barrier(); + //barrier(nc); pos_C = i + j*lda; //pos_C -= lda; @@ -249,15 +249,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp05; C[(pos_C + 6)] = temp06; C[(pos_C + 7)] = temp07; - //barrier(); + //barrier(nc); //pos_C += step_j * lda; } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } } @@ -277,35 +277,35 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); //printf("input1_data"); diff --git a/mt/be_matmul/matmul_mi.c b/mt/be_matmul/matmul_mi.c index da4b531..e8bff41 100755 --- a/mt/be_matmul/matmul_mi.c +++ b/mt/be_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -227,7 +227,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da pos_B += (lda*step_k) ; pos_A += step_k; } - //barrier(); + //barrier(nc); C[(pos_C + 0)] = temp10; C[(pos_C + 1)] = temp11; @@ -237,7 +237,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp15; C[(pos_C + 6)] = temp16; C[(pos_C + 7)] = temp17; - //barrier(); + //barrier(nc); pos_C = i + j*lda; //pos_C -= lda; @@ -249,15 +249,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp05; C[(pos_C + 6)] = temp06; C[(pos_C + 7)] = temp07; - //barrier(); + //barrier(nc); //pos_C += step_j * lda; } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } } @@ -277,35 +277,35 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); //printf("input1_data"); diff --git a/mt/be_vvadd/be_vvadd.c b/mt/be_vvadd/be_vvadd.c index 1090c5a..b1bf72b 100755 --- a/mt/be_vvadd/be_vvadd.c +++ b/mt/be_vvadd/be_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -137,12 +137,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -150,21 +150,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bf_matmul/bf_matmul.c b/mt/bf_matmul/bf_matmul.c index 0bab50c..24fa7e6 100644 --- a/mt/bf_matmul/bf_matmul.c +++ b/mt/bf_matmul/bf_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -246,33 +246,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bf_matmul/matmul_mi.c b/mt/bf_matmul/matmul_mi.c index 1eb4145..a063df0 100755 --- a/mt/bf_matmul/matmul_mi.c +++ b/mt/bf_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -359,33 +359,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bf_vvadd/bf_vvadd.c b/mt/bf_vvadd/bf_vvadd.c index 1c64793..f783ee1 100755 --- a/mt/bf_vvadd/bf_vvadd.c +++ b/mt/bf_vvadd/bf_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -146,12 +146,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -159,21 +159,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bh_matmul/bh_matmul.c b/mt/bh_matmul/bh_matmul.c index 990c935..a496bba 100755 --- a/mt/bh_matmul/bh_matmul.c +++ b/mt/bh_matmul/bh_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -215,33 +215,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bh_matmul/matmul_mi.c b/mt/bh_matmul/matmul_mi.c index 990c935..a496bba 100755 --- a/mt/bh_matmul/matmul_mi.c +++ b/mt/bh_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -215,33 +215,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bh_vvadd/bh_vvadd.c b/mt/bh_vvadd/bh_vvadd.c index 216f9ad..c4d06d3 100755 --- a/mt/bh_vvadd/bh_vvadd.c +++ b/mt/bh_vvadd/bh_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -153,12 +153,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -166,21 +166,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bj_matmul/bj_matmul.c b/mt/bj_matmul/bj_matmul.c index 5766e91..1642d10 100644 --- a/mt/bj_matmul/bj_matmul.c +++ b/mt/bj_matmul/bj_matmul.c @@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_ //--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -215,33 +215,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bj_matmul/matmul_mi.c b/mt/bj_matmul/matmul_mi.c index 5766e91..1642d10 100644 --- a/mt/bj_matmul/matmul_mi.c +++ b/mt/bj_matmul/matmul_mi.c @@ -52,7 +52,7 @@ stringify(code), _c, _c/DIM_SIZE/DIM_SIZE/DIM_SIZE, 10*_c/DIM_SIZE/DIM_SIZE/DIM_ //--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -215,33 +215,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bj_vvadd/bj_vvadd.c b/mt/bj_vvadd/bj_vvadd.c index 3e2ed6e..4f69c35 100755 --- a/mt/bj_vvadd/bj_vvadd.c +++ b/mt/bj_vvadd/bj_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -135,12 +135,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -148,21 +148,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bk_matmul/bk_matmul.c b/mt/bk_matmul/bk_matmul.c index eddbcfb..9fa22b1 100755 --- a/mt/bk_matmul/bk_matmul.c +++ b/mt/bk_matmul/bk_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -293,33 +293,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bk_matmul/matmul_mi.c b/mt/bk_matmul/matmul_mi.c index b1c0a39..b45071b 100755 --- a/mt/bk_matmul/matmul_mi.c +++ b/mt/bk_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -337,33 +337,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bk_matmul/matmul_msi.c b/mt/bk_matmul/matmul_msi.c index 5890d2f..253ea12 100755 --- a/mt/bk_matmul/matmul_msi.c +++ b/mt/bk_matmul/matmul_msi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -293,33 +293,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bk_vvadd/bk_vvadd.c b/mt/bk_vvadd/bk_vvadd.c index cf95374..20fe4af 100755 --- a/mt/bk_vvadd/bk_vvadd.c +++ b/mt/bk_vvadd/bk_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -144,12 +144,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -157,21 +157,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bm_matmul/bm_matmul.c b/mt/bm_matmul/bm_matmul.c index 3f267dc..9897d99 100644 --- a/mt/bm_matmul/bm_matmul.c +++ b/mt/bm_matmul/bm_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -324,33 +324,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bm_matmul/matmul_mi.c b/mt/bm_matmul/matmul_mi.c index 2471a4a..3bdd096 100644 --- a/mt/bm_matmul/matmul_mi.c +++ b/mt/bm_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -152,7 +152,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da data_t temp2_3=0; data_t temp3_3=0; data_t tempB_3=0; - barrier(); + barrier(nc); if (coreid!=ncores-1){ for (i=space*coreid;i<max/4*4;i+=4) { @@ -315,33 +315,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bm_vvadd/bm_vvadd.c b/mt/bm_vvadd/bm_vvadd.c index d60f4ec..914c711 100755 --- a/mt/bm_vvadd/bm_vvadd.c +++ b/mt/bm_vvadd/bm_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -160,12 +160,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -173,21 +173,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bn_matmul/bn_matmul.c b/mt/bn_matmul/bn_matmul.c index eddbcfb..9fa22b1 100755 --- a/mt/bn_matmul/bn_matmul.c +++ b/mt/bn_matmul/bn_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -293,33 +293,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bn_matmul/matmul_mi.c b/mt/bn_matmul/matmul_mi.c index b1c0a39..b45071b 100644 --- a/mt/bn_matmul/matmul_mi.c +++ b/mt/bn_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -337,33 +337,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bn_vvadd/bn_vvadd.c b/mt/bn_vvadd/bn_vvadd.c index 143d437..6c1459e 100755 --- a/mt/bn_vvadd/bn_vvadd.c +++ b/mt/bn_vvadd/bn_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -137,12 +137,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd
- barrier();
- stats(vvadd(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc));
// verify
- verify(DATA_SIZE, results_data, verify_data);
+ verifyMT(DATA_SIZE, results_data, verify_data);
// reset results from the first trial
if (coreid == 0)
@@ -150,21 +150,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++)
results_data[i] = input1_data[i];
}
- barrier();
+ barrier(nc);
// Execute your faster vvadd
- barrier();
- stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier());
+ barrier(nc);
+ stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc));
#ifdef DEBUG
- printArray("results: ", DATA_SIZE, results_data);
- printArray("verify : ", DATA_SIZE, verify_data);
+ printArrayMT("results: ", DATA_SIZE, results_data);
+ printArrayMT("verify : ", DATA_SIZE, verify_data);
#endif
// verify
- verify(DATA_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(DATA_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}
diff --git a/mt/bo_matmul/bo_matmul.c b/mt/bo_matmul/bo_matmul.c index de964db..dd06110 100644 --- a/mt/bo_matmul/bo_matmul.c +++ b/mt/bo_matmul/bo_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -308,33 +308,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// //verify(ARRAY_SIZE, results_data, verify_data); +// //verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bo_matmul/matmul_mi.c b/mt/bo_matmul/matmul_mi.c index ccd3987..3ccdb62 100644 --- a/mt/bo_matmul/matmul_mi.c +++ b/mt/bo_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -308,33 +308,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// //verify(ARRAY_SIZE, results_data, verify_data); +// //verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bo_vvadd/bo_vvadd.c b/mt/bo_vvadd/bo_vvadd.c index 74b0351..efdee6c 100755 --- a/mt/bo_vvadd/bo_vvadd.c +++ b/mt/bo_vvadd/bo_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -138,12 +138,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -151,21 +151,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bp_matmul/bp_matmul.c b/mt/bp_matmul/bp_matmul.c index de964db..dd06110 100755 --- a/mt/bp_matmul/bp_matmul.c +++ b/mt/bp_matmul/bp_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -308,33 +308,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// //verify(ARRAY_SIZE, results_data, verify_data); +// //verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bp_matmul/matmul_mi.c b/mt/bp_matmul/matmul_mi.c index de964db..dd06110 100755 --- a/mt/bp_matmul/matmul_mi.c +++ b/mt/bp_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -308,33 +308,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// //stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// //verify(ARRAY_SIZE, results_data, verify_data); +// //verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bp_vvadd/bp_vvadd.c b/mt/bp_vvadd/bp_vvadd.c index 5d073cf..41d56ab 100755 --- a/mt/bp_vvadd/bp_vvadd.c +++ b/mt/bp_vvadd/bp_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -144,12 +144,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -157,21 +157,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/br_matmul/br_matmul.c b/mt/br_matmul/br_matmul.c index 5ca1dbe..f831ac2 100755 --- a/mt/br_matmul/br_matmul.c +++ b/mt/br_matmul/br_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -138,7 +138,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1]; } } - barrier(); + barrier(ncores); // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k] for ( j2 = 0; j2 < lda; j2 += jBLOCK ) @@ -250,33 +250,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/br_matmul/matmul_mi.c b/mt/br_matmul/matmul_mi.c index 5ca1dbe..14a0705 100755 --- a/mt/br_matmul/matmul_mi.c +++ b/mt/br_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -138,7 +138,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da tB[(j + 1)*lda + i + 1] = B[(i + 1)*lda + j + 1]; } } - barrier(); + barrier(nc); // compute C[j*n + i] += A[j*n + k] + Btranspose[i*n + k] for ( j2 = 0; j2 < lda; j2 += jBLOCK ) @@ -250,33 +250,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/br_vvadd/br_vvadd.c b/mt/br_vvadd/br_vvadd.c index b27ed64..ce0d6bb 100755 --- a/mt/br_vvadd/br_vvadd.c +++ b/mt/br_vvadd/br_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -140,12 +140,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -153,21 +153,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bs_matmul/bs_matmul.c b/mt/bs_matmul/bs_matmul.c index f382a42..336c083 100755 --- a/mt/bs_matmul/bs_matmul.c +++ b/mt/bs_matmul/bs_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -151,33 +151,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bs_matmul/matmul_mi.c b/mt/bs_matmul/matmul_mi.c index d1500d2..ec0d89b 100644 --- a/mt/bs_matmul/matmul_mi.c +++ b/mt/bs_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -157,33 +157,33 @@ void thread_entry(int cid, int nc) // // Execute the provided, naive matmul -// barrier(); -// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); +// barrier(nc); +// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // // // // verify -// verify(ARRAY_SIZE, results_data, verify_data); +// verifyMT(ARRAY_SIZE, results_data, verify_data); // // // clear results from the first trial // size_t i; // if (coreid == 0) // for (i=0; i < ARRAY_SIZE; i++) // results_data[i] = 0; -// barrier(); +// barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bs_vvadd/bs_vvadd.c b/mt/bs_vvadd/bs_vvadd.c index 01d708b..af126f0 100755 --- a/mt/bs_vvadd/bs_vvadd.c +++ b/mt/bs_vvadd/bs_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -145,12 +145,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -158,21 +158,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/bt_matmul/bt_matmul.c b/mt/bt_matmul/bt_matmul.c index 0215491..4cf652d 100755 --- a/mt/bt_matmul/bt_matmul.c +++ b/mt/bt_matmul/bt_matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -209,7 +209,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da pos_B += (lda*step_k) ; pos_A += step_k; } - //barrier(); + //barrier(nc); C[(pos_C + 0)] = temp10; C[(pos_C + 1)] = temp11; @@ -219,7 +219,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp15; C[(pos_C + 6)] = temp16; C[(pos_C + 7)] = temp17; - //barrier(); + //barrier(nc); pos_C = i + j*lda; //pos_C -= lda; @@ -231,15 +231,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp05; C[(pos_C + 6)] = temp06; C[(pos_C + 7)] = temp07; - //barrier(); + //barrier(nc); //pos_C += step_j * lda; } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } } @@ -259,35 +259,35 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); //printf("input1_data"); diff --git a/mt/bt_matmul/matmul_mi.c b/mt/bt_matmul/matmul_mi.c index dc9ae1b..181314e 100755 --- a/mt/bt_matmul/matmul_mi.c +++ b/mt/bt_matmul/matmul_mi.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -210,7 +210,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da pos_B += (lda*step_k) ; pos_A += step_k; } - //barrier(); + //barrier(nc); C[(pos_C + 0)] = temp10; C[(pos_C + 1)] = temp11; @@ -220,7 +220,7 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp15; C[(pos_C + 6)] = temp16; C[(pos_C + 7)] = temp17; - //barrier(); + //barrier(nc); pos_C = i + j*lda; //pos_C -= lda; @@ -232,15 +232,15 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da C[(pos_C + 5)] = temp05; C[(pos_C + 6)] = temp06; C[(pos_C + 7)] = temp07; - //barrier(); + //barrier(nc); //pos_C += step_j * lda; } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } - //barrier(); + //barrier(nc); } } @@ -260,35 +260,35 @@ void thread_entry(int cid, int nc) /* // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); */ // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); //printf("input1_data"); diff --git a/mt/bt_vvadd/bt_vvadd.c b/mt/bt_vvadd/bt_vvadd.c index d2a01c4..7af3562 100755 --- a/mt/bt_vvadd/bt_vvadd.c +++ b/mt/bt_vvadd/bt_vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -139,12 +139,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -152,21 +152,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/matmul/matmul.c b/mt/matmul/matmul.c index 93f8ea9..b009d26 100755 --- a/mt/matmul/matmul.c +++ b/mt/matmul/matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -134,33 +134,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/mt-matmul/mt-matmul.c b/mt/mt-matmul/mt-matmul.c index 93f8ea9..b009d26 100644 --- a/mt/mt-matmul/mt-matmul.c +++ b/mt/mt-matmul/mt-matmul.c @@ -52,7 +52,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -134,33 +134,33 @@ void thread_entry(int cid, int nc) // Execute the provided, naive matmul - barrier(); - stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); // verify - verify(ARRAY_SIZE, results_data, verify_data); + verifyMT(ARRAY_SIZE, results_data, verify_data); // clear results from the first trial size_t i; if (coreid == 0) for (i=0; i < ARRAY_SIZE; i++) results_data[i] = 0; - barrier(); + barrier(nc); // Execute your faster matmul - barrier(); - stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier()); + barrier(nc); + stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc)); #ifdef DEBUG - printArray("results:", ARRAY_SIZE, results_data); - printArray("verify :", ARRAY_SIZE, verify_data); + printArrayMT("results:", ARRAY_SIZE, results_data); + printArrayMT("verify :", ARRAY_SIZE, verify_data); #endif // verify - verify(ARRAY_SIZE, results_data, verify_data); - barrier(); + verifyMT(ARRAY_SIZE, results_data, verify_data); + barrier(nc); exit(0); } diff --git a/mt/mt-vvadd/mt-vvadd.c b/mt/mt-vvadd/mt-vvadd.c index 497b9bb..d5868e4 100644 --- a/mt/mt-vvadd/mt-vvadd.c +++ b/mt/mt-vvadd/mt-vvadd.c @@ -51,7 +51,7 @@ unsigned long ncores; //-------------------------------------------------------------------------- // Helper functions -void printArray( char name[], int n, data_t arr[] ) +void printArrayMT( char name[], int n, data_t arr[] ) { int i; if (coreid != 0) @@ -63,7 +63,7 @@ void printArray( char name[], int n, data_t arr[] ) printf( "\n" ); } -void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct) +void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct) { if (coreid != 0) return; @@ -131,12 +131,12 @@ void thread_entry(int cid, int nc) // Execute the provided, terrible vvadd - barrier(); - stats(vvadd(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd(DATA_SIZE, results_data, input2_data); barrier(nc)); // verify - verify(DATA_SIZE, results_data, verify_data); + verifyMT(DATA_SIZE, results_data, verify_data); // reset results from the first trial if (coreid == 0) @@ -144,21 +144,21 @@ void thread_entry(int cid, int nc) for (i=0; i < DATA_SIZE; i++) results_data[i] = input1_data[i]; } - barrier(); + barrier(nc); // Execute your faster vvadd - barrier(); - stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier()); + barrier(nc); + stats(vvadd_opt(DATA_SIZE, results_data, input2_data); barrier(nc)); #ifdef DEBUG - printArray("results: ", DATA_SIZE, results_data); - printArray("verify : ", DATA_SIZE, verify_data); + printArrayMT("results: ", DATA_SIZE, results_data); + printArrayMT("verify : ", DATA_SIZE, verify_data); #endif // verify - verify(DATA_SIZE, results_data, verify_data); - barrier(); + verifyMT(DATA_SIZE, results_data, verify_data); + barrier(nc); exit(0); } |