blob: 658af294f1c81ab28fb961202e165ea8813dff88 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
void add90 (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
{
#if defined (UNROLL)
#pragma GCC unroll 16
#endif
for (int i=0; i < N; i+=2)
{
c[i] = a[i] - b[i+1];
c[i+1] = a[i+1] + b[i];
}
}
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
void add270 (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
{
#if defined (UNROLL)
#pragma GCC unroll 16
#endif
for (int i=0; i < N; i+=2)
{
c[i] = a[i] + b[i+1];
c[i+1] = a[i+1] - b[i];
}
}
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
void addMixed (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
{
#if defined (UNROLL)
#pragma GCC unroll 16
#endif
for (int i=0; i < N; i+=4)
{
c[i] = a[i] - b[i+1];
c[i+1] = a[i+1] + b[i];
c[i+2] = a[i+2] + b[i+3];
c[i+3] = a[i+3] - b[i+2];
}
}
void add90HandUnrolled (TYPE a[restrict N], TYPE b[restrict N],
TYPE c[restrict N])
{
#if defined (UNROLL)
#pragma GCC unroll 16
#endif
for (int i=0; i < (N /2); i+=4)
{
c[i] = a[i] - b[i+1];
c[i+2] = a[i+2] - b[i+3];
c[i+1] = a[i+1] + b[i];
c[i+3] = a[i+3] + b[i+2];
}
}
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
void add90Hybrid (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N],
TYPE d[restrict N])
{
#if defined (UNROLL)
#pragma GCC unroll 16
#endif
for (int i=0; i < N; i+=2)
{
c[i] = a[i] - b[i+1];
c[i+1] = a[i+1] + b[i];
d[i] = a[i] - b[i];
d[i+1] = a[i+1] - b[i+1];
}
}
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 2 "vect" } } */
|