aboutsummaryrefslogtreecommitdiff
path: root/mt/ai_matmul.c
blob: 9d808f339acbbcc666c28e42b0e5cf93993d4023 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
#include "stdlib.h"

#include "util.h"

#include "dataset.h"
#include "util.h"
void __attribute__((noinline)) matmul(const int coreid, const int ncores, const int lda,  const data_t A[], const data_t B[], data_t C[] )
{
   
   // ***************************** //
   // **** ADD YOUR CODE HERE ***** //
   // ***************************** //
   //
   // feel free to make a separate function for MI and MSI versions.
   
//----------MSI--------------
///*
   int i,j,k;
   barrier(ncores);
   for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
	for(i = 0; i < lda; i+=4) {
		data_t Cval0 = 0;
		data_t Cval1 = 0;
		data_t Cval2 = 0;
		data_t Cval3 = 0;
		for(k = 0; k < lda; k++) {
			Cval0 += A[j*lda+k]*B[k*lda+i];
			Cval1 += A[j*lda+k]*B[k*lda+i+1];
			Cval2 += A[j*lda+k]*B[k*lda+i+2];
			Cval3 += A[j*lda+k]*B[k*lda+i+3];
		}
		C[j*lda+i] = Cval0;
		C[j*lda+i+1] = Cval1;
		C[j*lda+i+2] = Cval2;
		C[j*lda+i+3] = Cval3;
	}
   }
//*/

//------------------MI-------------------
/*
   int i,j,k;
   barrier(nc);
   for(j = coreid*lda/ncores; j < coreid*lda/ncores + lda/ncores; j++) {
        for(i = 0; i < lda; i+=4) {
		data_t Cval0 = 0;
	        data_t Cval1 = 0;
        	data_t Cval2 = 0;
		data_t Cval3 = 0;
		if(coreid == 0) {
	               	for(k = 0; k < lda; k++) {
        	              	Cval0 += A[j*lda+k]*B[k*lda+i];
				Cval1 += A[j*lda+k]*B[k*lda+i+1];
				Cval2 += A[j*lda+k]*B[k*lda+i+2];
				Cval3 += A[j*lda+k]*B[k*lda+i+3];
			}
		} else {
			for(k = lda-1; k >= 0; k--) {
                                Cval0 += A[j*lda+k]*B[k*lda+i];
	                        Cval1 += A[j*lda+k]*B[k*lda+i+1];
                                Cval2 += A[j*lda+k]*B[k*lda+i+2];
                                Cval3 += A[j*lda+k]*B[k*lda+i+3];
                        }
		}
		C[j*lda+i] = Cval0;
                C[j*lda+i+1] = Cval1;
                C[j*lda+i+2] = Cval2;
                C[j*lda+i+3] = Cval3;
	}
   }
*/
}