aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/InstSimplify/add_vp.ll
blob: 320f3fbb303797a585473cbc1180e585bde20c51 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instsimplify -S | FileCheck %s

declare <2 x i32> @llvm.vp.add.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)
declare <2 x i32> @llvm.vp.sub.v2i32(<2 x i32>, <2 x i32>, <2 x i1>, i32)

declare <2 x i8> @llvm.vp.add.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)
declare <2 x i8> @llvm.vp.sub.v2i8(<2 x i8>, <2 x i8>, <2 x i1>, i32)

; Constant folding should just work.
define <2 x i32> @constant_vp_add(<2 x i1> %mask, i32 %evl) {
; CHECK-LABEL: @constant_vp_add(
; CHECK-NEXT:    [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> splat (i32 3), <2 x i32> splat (i32 7), <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT:    ret <2 x i32> [[Q]]
;
  %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> <i32 3, i32 3>, <2 x i32> <i32 7, i32 7>, <2 x i1> %mask, i32 %evl)
  ret <2 x i32> %Q
}

; Simplifying pure VP intrinsic patterns.
define <2 x i32> @common_sub_operand(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) {
; CHECK-LABEL: @common_sub_operand(
; CHECK-NEXT:    [[Z:%.*]] = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT:    [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> [[Z]], <2 x i32> [[Y]], <2 x i1> [[MASK]], i32 [[EVL]])
; CHECK-NEXT:    ret <2 x i32> [[Q]]
;
  ; %Z = sub i32 %X, %Y, vp(%mask, %evl)
  %Z = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl)
  ; %Q = add i32 %Z, %Y, vp(%mask, %evl)
  %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %Z, <2 x i32> %Y, <2 x i1> %mask, i32 %evl)
  ret <2 x i32> %Q
}

; Mixing regular SIMD with vp intrinsics (vp add match root).
define <2 x i32> @common_sub_operand_vproot(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) {
; CHECK-LABEL: @common_sub_operand_vproot(
; CHECK-NEXT:    [[Z:%.*]] = sub <2 x i32> [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT:    [[Q:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> [[Z]], <2 x i32> [[Y]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT:    ret <2 x i32> [[Q]]
;
  %Z = sub <2 x i32> %X, %Y
  ; %Q = add i32 %Z, %Y, vp(%mask, %evl)
  %Q = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %Z, <2 x i32> %Y, <2 x i1> %mask, i32 %evl)
  ret <2 x i32> %Q
}

; Mixing regular SIMD with vp intrinsics (vp inside pattern, regular instruction root).
define <2 x i32> @common_sub_operand_vpinner(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl) {
; CHECK-LABEL: @common_sub_operand_vpinner(
; CHECK-NEXT:    [[Z:%.*]] = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT:    [[Q:%.*]] = add <2 x i32> [[Z]], [[Y]]
; CHECK-NEXT:    ret <2 x i32> [[Q]]
;
  ; %Z = sub i32 %X, %Y, vp(%mask, %evl)
  %Z = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> %X, <2 x i32> %Y, <2 x i1> %mask, i32 %evl)
  %Q = add <2 x i32> %Z, %Y
  ret <2 x i32> %Q
}

define <2 x i32> @negated_operand(<2 x i32> %x, <2 x i1> %mask, i32 %evl) {
; CHECK-LABEL: @negated_operand(
; CHECK-NEXT:    [[NEGX:%.*]] = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> zeroinitializer, <2 x i32> [[X:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT:    [[R:%.*]] = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> [[NEGX]], <2 x i32> [[X]], <2 x i1> [[MASK]], i32 [[EVL]])
; CHECK-NEXT:    ret <2 x i32> [[R]]
;
  ; %negx = sub i32 0, %x
  %negx = call <2 x i32> @llvm.vp.sub.v2i32(<2 x i32> zeroinitializer, <2 x i32> %x, <2 x i1> %mask, i32 %evl)
  ; %r = add i32 %negx, %x
  %r = call <2 x i32> @llvm.vp.add.v2i32(<2 x i32> %negx, <2 x i32> %x, <2 x i1> %mask, i32 %evl)
  ret <2 x i32> %r
}

; TODO Lift InstSimplify::SimplifyAdd to the trait framework to optimize this.
define <2 x i8> @knownnegation(<2 x i8> %x, <2 x i8> %y, <2 x i1> %mask, i32 %evl) {
; TODO-CHECK-LABEL: @knownnegation(
; TODO-XHECK-NEXT:    ret i8 <2 x i8> zeroinitializer
;
  ; %xy = sub i8 %x, %y
; CHECK-LABEL: @knownnegation(
; CHECK-NEXT:    [[XY:%.*]] = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <2 x i1> [[MASK:%.*]], i32 [[EVL:%.*]])
; CHECK-NEXT:    [[YX:%.*]] = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> [[Y]], <2 x i8> [[X]], <2 x i1> [[MASK]], i32 [[EVL]])
; CHECK-NEXT:    [[R:%.*]] = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> [[XY]], <2 x i8> [[YX]], <2 x i1> [[MASK]], i32 [[EVL]])
; CHECK-NEXT:    ret <2 x i8> [[R]]
;
  %xy = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i1> %mask, i32 %evl)
  ; %yx = sub i8 %y, %x
  %yx = call <2 x i8> @llvm.vp.sub.v2i8(<2 x i8> %y, <2 x i8> %x, <2 x i1> %mask, i32 %evl)
  ; %r = add i8 %xy, %yx
  %r = call <2 x i8> @llvm.vp.add.v2i8(<2 x i8> %xy, <2 x i8> %yx, <2 x i1> %mask, i32 %evl)
  ret <2 x i8> %r
}