llvm/test/Transforms/SLPVectorizer/semanticly-same.ll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: %if x86-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s %}
; RUN: %if aarch64-registered-target %{ opt -S --passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu -slp-threshold=-50 < %s | FileCheck %s %}

; Don't care about the profitability with these tests, just want to demonstrate the ability
; to combine opcodes

define void @sub_mul(ptr %p, ptr %s) {
; CHECK-LABEL: define void @sub_mul(
; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*:]]
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2
; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3>
; CHECK-NEXT:    store <4 x i16> [[TMP1]], ptr [[S]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr i16, ptr %p, i64 1
  %p2 = getelementptr i16, ptr %p, i64 2
  %p3 = getelementptr i16, ptr %p, i64 3

  %l0 = load i16, ptr %p
  %l1 = load i16, ptr %p1
  %l2 = load i16, ptr %p2
  %l3 = load i16, ptr %p3

  %mul0 = sub i16 %l0, 0
  %mul1 = mul i16 %l1, 5
  %mul2 = mul i16 %l2, 2
  %mul3 = mul i16 %l3, 3

  %s1 = getelementptr i16, ptr %s, i64 1
  %s2 = getelementptr i16, ptr %s, i64 2
  %s3 = getelementptr i16, ptr %s, i64 3

  store i16 %mul0, ptr %s
  store i16 %mul1, ptr %s1
  store i16 %mul2, ptr %s2
  store i16 %mul3, ptr %s3
  ret void
}

define void @add_mul(ptr %p, ptr %s) {
; CHECK-LABEL: define void @add_mul(
; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*:]]
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2
; CHECK-NEXT:    [[TMP1:%.*]] = mul <4 x i16> [[TMP0]], <i16 1, i16 5, i16 2, i16 3>
; CHECK-NEXT:    store <4 x i16> [[TMP1]], ptr [[S]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr i16, ptr %p, i64 1
  %p2 = getelementptr i16, ptr %p, i64 2
  %p3 = getelementptr i16, ptr %p, i64 3

  %l0 = load i16, ptr %p
  %l1 = load i16, ptr %p1
  %l2 = load i16, ptr %p2
  %l3 = load i16, ptr %p3

  %mul0 = add i16 %l0, 0
  %mul1 = mul i16 %l1, 5
  %mul2 = mul i16 %l2, 2
  %mul3 = mul i16 %l3, 3

  %s1 = getelementptr i16, ptr %s, i64 1
  %s2 = getelementptr i16, ptr %s, i64 2
  %s3 = getelementptr i16, ptr %s, i64 3

  store i16 %mul0, ptr %s
  store i16 %mul1, ptr %s1
  store i16 %mul2, ptr %s2
  store i16 %mul3, ptr %s3
  ret void
}

define void @sub_and(ptr %p, ptr %s) {
; CHECK-LABEL: define void @sub_and(
; CHECK-SAME: ptr [[P:%.*]], ptr [[S:%.*]]) {
; CHECK-NEXT:  [[ENTRY:.*:]]
; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i16>, ptr [[P]], align 2
; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i16> [[TMP0]], <i16 -1, i16 5, i16 2, i16 3>
; CHECK-NEXT:    store <4 x i16> [[TMP1]], ptr [[S]], align 2
; CHECK-NEXT:    ret void
;
entry:
  %p1 = getelementptr i16, ptr %p, i64 1
  %p2 = getelementptr i16, ptr %p, i64 2
  %p3 = getelementptr i16, ptr %p, i64 3

  %l0 = load i16, ptr %p
  %l1 = load i16, ptr %p1
  %l2 = load i16, ptr %p2
  %l3 = load i16, ptr %p3

  %mul0 = add i16 %l0, 0
  %mul1 = and i16 %l1, 5
  %mul2 = and i16 %l2, 2
  %mul3 = and i16 %l3, 3

  %s1 = getelementptr i16, ptr %s, i64 1
  %s2 = getelementptr i16, ptr %s, i64 2
  %s3 = getelementptr i16, ptr %s, i64 3

  store i16 %mul0, ptr %s
  store i16 %mul1, ptr %s1
  store i16 %mul2, ptr %s2
  store i16 %mul3, ptr %s3
  ret void
}