aboutsummaryrefslogtreecommitdiff
path: root/mt/as_matmul/matmul_mi.c
diff options
context:
space:
mode:
Diffstat (limited to 'mt/as_matmul/matmul_mi.c')
-rw-r--r--mt/as_matmul/matmul_mi.c34
1 files changed, 17 insertions, 17 deletions
diff --git a/mt/as_matmul/matmul_mi.c b/mt/as_matmul/matmul_mi.c
index 130fdb7..88534b8 100644
--- a/mt/as_matmul/matmul_mi.c
+++ b/mt/as_matmul/matmul_mi.c
@@ -52,7 +52,7 @@ unsigned long ncores;
//--------------------------------------------------------------------------
// Helper functions
-void printArray( char name[], int n, data_t arr[] )
+void printArrayMT( char name[], int n, data_t arr[] )
{
int i;
if (coreid != 0)
@@ -64,7 +64,7 @@ void printArray( char name[], int n, data_t arr[] )
printf( "\n" );
}
-void __attribute__((noinline)) verify(size_t n, const data_t* test, const data_t* correct)
+void __attribute__((noinline)) verifyMT(size_t n, const data_t* test, const data_t* correct)
{
if (coreid != 0)
return;
@@ -118,8 +118,8 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
int i, j, k, n, m, c1, c2;
- //matmul_naive(32, input1_data, input2_data, results_data); barrier(): 952596 cycles, 29.0 cycles/iter, 3.6 CPI
- //matmul(32, input1_data, input2_data, results_data); barrier(): 570135 cycles, 17.3 cycles/iter, 3.4 CPI
+ //matmul_naive(32, input1_data, input2_data, results_data); barrier(nc): 952596 cycles, 29.0 cycles/iter, 3.6 CPI
+ //matmul(32, input1_data, input2_data, results_data); barrier(nc): 570135 cycles, 17.3 cycles/iter, 3.4 CPI
for ( j = coreid; j < lda; j += 2*ncores ) {
for ( i = 0; i < lda; i += 1 ){
@@ -129,14 +129,14 @@ void __attribute__((noinline)) matmul(const int lda, const data_t A[], const da
c1 += A[j * lda + k] * B[k*lda + i];
c2 += A[(j+2) * lda + k] * B[k*lda + i];
- //barrier();
+ //barrier(nc);
}
C[i + j * lda] = c1;
C[i + (j+2) * lda] = c2;
- barrier();
+ barrier(nc);
}
- //barrier();
+ //barrier(nc);
}
}
@@ -157,33 +157,33 @@ void thread_entry(int cid, int nc)
// // Execute the provided, naive matmul
-// barrier();
-// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+// barrier(nc);
+// stats(matmul_naive(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
//
//
// // verify
-// verify(ARRAY_SIZE, results_data, verify_data);
+// verifyMT(ARRAY_SIZE, results_data, verify_data);
//
// // clear results from the first trial
// size_t i;
// if (coreid == 0)
// for (i=0; i < ARRAY_SIZE; i++)
// results_data[i] = 0;
-// barrier();
+// barrier(nc);
// Execute your faster matmul
- barrier();
- stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier());
+ barrier(nc);
+ stats(matmul(DIM_SIZE, input1_data, input2_data, results_data); barrier(nc));
#ifdef DEBUG
- printArray("results:", ARRAY_SIZE, results_data);
- printArray("verify :", ARRAY_SIZE, verify_data);
+ printArrayMT("results:", ARRAY_SIZE, results_data);
+ printArrayMT("verify :", ARRAY_SIZE, verify_data);
#endif
// verify
- verify(ARRAY_SIZE, results_data, verify_data);
- barrier();
+ verifyMT(ARRAY_SIZE, results_data, verify_data);
+ barrier(nc);
exit(0);
}