llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s

target triple = "aarch64-unknown-linux-gnu"

define <vscale x 16 x i1> @pred_load_v2i8(ptr %addr) #0 {
; CHECK-LABEL: @pred_load_v2i8(
; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
  %load = load <2 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

define <vscale x 16 x i1> @pred_load_v4i8(ptr %addr) #1 {
; CHECK-LABEL: @pred_load_v4i8(
; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
  %load = load <4 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> poison, <4 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

define <vscale x 16 x i1> @pred_load_v8i8(ptr %addr) #2 {
; CHECK-LABEL: @pred_load_v8i8(
; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
  %load = load <8 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> poison, <8 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

; Ensure the insertion point is at the load
define <vscale x 16 x i1> @pred_load_insertion_point(ptr %addr) #0 {
; CHECK-LABEL: @pred_load_insertion_point(
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
; CHECK-NEXT:    br label %bb1
; CHECK:       bb1:
; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
entry:
  %load = load <2 x i8>, ptr %addr, align 4
  br label %bb1

bb1:
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> poison, <2 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

; Check that too small of a vscale prevents optimization
define <vscale x 16 x i1> @pred_load_neg1(ptr %addr) #0 {
; CHECK-LABEL: @pred_load_neg1(
; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  %load = load <4 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> poison, <4 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

; Check that too large of a vscale prevents optimization
define <vscale x 16 x i1> @pred_load_neg2(ptr %addr) #2 {
; CHECK-LABEL: @pred_load_neg2(
; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  %load = load <4 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> poison, <4 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

; Check that a non-zero index prevents optimization
define <vscale x 16 x i1> @pred_load_neg3(ptr %addr) #1 {
; CHECK-LABEL: @pred_load_neg3(
; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  %load = load <4 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> poison, <4 x i8> %load, i64 4)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

; Check that differing vscale min/max prevents optimization
define <vscale x 16 x i1> @pred_load_neg4(ptr %addr) #3 {
; CHECK-LABEL: @pred_load_neg4(
; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  %load = load <4 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> poison, <4 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

; Check that insertion into a non-undef vector prevents optimization
define <vscale x 16 x i1> @pred_load_neg5(ptr %addr, <vscale x 2 x i8> %passthru) #1 {
; CHECK-LABEL: @pred_load_neg5(
; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
  %load = load <4 x i8>, ptr %addr, align 4
  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
  ret <vscale x 16 x i1> %ret
}

declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)

attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
attributes #3 = { "target-features"="+sve" vscale_range(2,4) }