aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/Hexagon/bfloat_vec.ll
blob: 47be92aaf0b31d0507d322728cfe0437eb68131c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=hexagon < %s | FileCheck %s

; Function Attrs: mustprogress nounwind

define dso_local void @bf16_vec_add(ptr noundef %c, ptr noundef %a, ptr noundef %b) local_unnamed_addr #0 {
; CHECK-LABEL: bf16_vec_add:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    {
; CHECK-NEXT:     [[R7:r[0-9]+]] = #-4
; CHECK-NEXT:     [[V0:v[0-9]+]] = vmemu([[R2:r[0-9]+]]+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[R2]] = ##32768
; CHECK-NEXT:     [[V1:v[0-9]+]] = vmemu([[R1:r[0-9]+]]+#0)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[R6:r[0-9]+]] = ##131071
; CHECK-NEXT:     [[V2:v[0-9]+]] = vxor([[V0]],[[V0]])
; CHECK-NEXT:     [[V3:v[0-9]+]] = vxor([[V1]],[[V1]])
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V25:v[0-9]+]] = vsplat([[R2]])
; CHECK-NEXT:     [[R5:r[0-9]+]] = #16
; CHECK-NEXT:     [[V5_4:v[0-9]+:[0-9]+]].h = vshuffoe([[V0]].h,[[V2]].h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V26:v[0-9]+]] = vsplat([[R6]])
; CHECK-NEXT:     [[R4:r[0-9]+]] = #32767
; CHECK-NEXT:     [[V31_30:v[0-9]+:[0-9]+]].h = vshuffoe([[V1]].h,[[V3]].h)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V5_4]] = vshuff([[V5:v[0-9]+]],[[V4:v[0-9]+]],[[R7]])
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V31:v[0-9]+]].h = vsplat([[R4]])
; CHECK-NEXT:     [[V3_2:v[0-9]+:[0-9]+]] = vshuff([[V31]],[[V30:v[0-9]+]],[[R7]])
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V2]].qf32 = vadd([[V2]].sf,[[V4]].sf)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V3]].qf32 = vadd([[V3]].sf,[[V5]].sf)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V2]].sf = [[V2]].qf32
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V3]].sf = [[V3]].qf32
; CHECK-NEXT:     [[V27:v[0-9]+]] = vand([[V2]],[[V25]])
; CHECK-NEXT:     [[V28:v[0-9]+]] = vand([[V2]],[[V26]])
; CHECK-NEXT:     [[Q2:q[0-9]+]] = vcmp.eq([[V2]].sf,[[V2]].sf)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V29:v[0-9]+]] = vand([[V3]],[[V25]])
; CHECK-NEXT:     [[V1]] = vand([[V3]],[[V26]])
; CHECK-NEXT:     [[Q0:q[0-9]+]] = vcmp.eq([[V28]].w,[[V25]].w)
; CHECK-NEXT:     [[V4]].w = vadd([[V2]].w,[[V27]].w)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V5]].w = vadd([[V3]].w,[[V29]].w)
; CHECK-NEXT:     [[Q1:q[0-9]+]] = vcmp.eq([[V1]].w,[[V25]].w)
; CHECK-NEXT:     [[V30:v[0-9]+]] = vmux([[Q0]],[[V2]],[[V4]])
; CHECK-NEXT:     [[Q3:q[0-9]+]] = vcmp.eq([[V3]].sf,[[V3]].sf)
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V1]] = vmux([[Q1]],[[V3]],[[V5]])
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V0]].uw = vlsr([[V30]].uw,[[R5]])
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V1]].uw = vlsr([[V1]].uw,[[R5]])
; CHECK-NEXT:     [[V0]] = vmux([[Q2]],[[V0]],[[V31]])
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V1]] = vmux([[Q3]],[[V1]],[[V31]])
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     [[V0]].uh = vpack([[V1]].w,[[V0]].w):sat
; CHECK-NEXT:    }
; CHECK-NEXT:    {
; CHECK-NEXT:     jumpr [[R31:r[0-9]+]]
; CHECK-NEXT:     vmemu([[R0:r[0-9]+]]+#0) = [[V0]]
; CHECK-NEXT:    }


entry:
  %0 = load <64 x bfloat>, ptr %a, align 2
  %1 = load <64 x bfloat>, ptr %b, align 2
  %add.ripple.vectorized = fadd <64 x bfloat> %0, %1
  store <64 x bfloat> %add.ripple.vectorized, ptr %c, align 2
  ret void
}

define dso_local void @copy1d(ptr noundef readonly captures(none) %X, ptr noundef writeonly captures(none) %Y) local_unnamed_addr #0 {
; CHECK-LABEL: copy1d:
; CHECK: v[[X_HI:[0-9]+]] = vmemu(r0+#1)
; CHECK: v[[X_LO:[0-9]+]] = vmemu(r0+#0)
; CHECK: vmemu(r1+#1) = v[[X_HI]]
; CHECK: jumpr [[RET:r[0-9]+]]
; CHECK: vmemu(r1+#0) = v[[X_LO]]
entry:
  %0 = load <128 x half>, ptr %X, align 2
  store <128 x half> %0, ptr %Y, align 2
  ret void
}

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite, inaccessiblemem: readwrite) "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="hexagonv81" "target-features"="+hvx-length128b,+hvx-qfloat,+hvxv81,+v81,-long-calls" }