blob: 858b05e5960321e58fce181f37c5560d95e1e099 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
#include "stdlib.h"
#include "util.h"
#include "dataset.h"
void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda, const data_t A[], const data_t B[], data_t C[] )
{
int i, j, k, n, m, c1, c2;
for ( j = coreid; j < lda; j += 2*ncores ) {
for ( i = 0; i < lda; i += 1 ){
c1 = 0; //global vars c1, c2
c2 = 0;
for ( k = 0; k < lda; k += 1 ) {
c1 += A[j * lda + k] * B[k*lda + i];
c2 += A[(j+ncores) * lda + k] * B[k*lda + i];
}
C[i + j * lda] = c1;
C[i + (j+ncores) * lda] = c2;
barrier(ncores);
}
}
}
|