aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/SLPVectorizer/NVPTX/v2f16.ll
blob: 866b3170e98ea525a2716839ca876dede8f067dd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 | FileCheck %s -check-prefix=VECTOR
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_80 | FileCheck %s -check-prefix=VECTOR
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_70 | FileCheck %s -check-prefix=VECTOR
; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=nvptx64-nvidia-cuda -mcpu=sm_50 | FileCheck %s -check-prefix=NOVECTOR

define void @fusion(ptr noalias nocapture align 256 dereferenceable(19267584) %arg, ptr noalias nocapture readonly align 256 dereferenceable(19267584) %arg1, i32 %arg2, i32 %arg3) local_unnamed_addr #0 {
; VECTOR-LABEL: @fusion(
; VECTOR-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6
; VECTOR-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[ARG3:%.*]]
; VECTOR-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP2]], 2
; VECTOR-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; VECTOR-NEXT:    [[TMP5:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP4]]
; VECTOR-NEXT:    [[TMP6:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP4]]
; VECTOR-NEXT:    [[TMP7:%.*]] = load <2 x half>, ptr [[TMP5]], align 8
; VECTOR-NEXT:    [[TMP8:%.*]] = fmul fast <2 x half> [[TMP7]], splat (half 0xH5380)
; VECTOR-NEXT:    [[TMP9:%.*]] = fadd fast <2 x half> [[TMP8]], splat (half 0xH57F0)
; VECTOR-NEXT:    store <2 x half> [[TMP9]], ptr [[TMP6]], align 8
; VECTOR-NEXT:    ret void
;
; NOVECTOR-LABEL: @fusion(
; NOVECTOR-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[ARG2:%.*]], 6
; NOVECTOR-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[ARG3:%.*]]
; NOVECTOR-NEXT:    [[TMP3:%.*]] = shl nuw nsw i32 [[TMP2]], 2
; NOVECTOR-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
; NOVECTOR-NEXT:    [[TMP10:%.*]] = or disjoint i64 [[TMP4]], 1
; NOVECTOR-NEXT:    [[TMP5:%.*]] = getelementptr inbounds half, ptr [[ARG1:%.*]], i64 [[TMP4]]
; NOVECTOR-NEXT:    [[TMP6:%.*]] = getelementptr inbounds half, ptr [[ARG:%.*]], i64 [[TMP4]]
; NOVECTOR-NEXT:    [[TMP11:%.*]] = getelementptr inbounds half, ptr [[ARG1]], i64 [[TMP10]]
; NOVECTOR-NEXT:    [[TMP12:%.*]] = load half, ptr [[TMP11]], align 2
; NOVECTOR-NEXT:    [[TMP17:%.*]] = load half, ptr [[TMP5]], align 8
; NOVECTOR-NEXT:    [[TMP13:%.*]] = fmul fast half [[TMP12]], 0xH5380
; NOVECTOR-NEXT:    [[TMP18:%.*]] = fmul fast half [[TMP17]], 0xH5380
; NOVECTOR-NEXT:    [[TMP14:%.*]] = fadd fast half [[TMP13]], 0xH57F0
; NOVECTOR-NEXT:    [[TMP19:%.*]] = fadd fast half [[TMP18]], 0xH57F0
; NOVECTOR-NEXT:    [[TMP15:%.*]] = insertelement <2 x half> poison, half [[TMP19]], i32 0
; NOVECTOR-NEXT:    [[TMP16:%.*]] = insertelement <2 x half> [[TMP15]], half [[TMP14]], i32 1
; NOVECTOR-NEXT:    store <2 x half> [[TMP16]], ptr [[TMP6]], align 8
; NOVECTOR-NEXT:    ret void
;
  %1 = shl nuw nsw i32 %arg2, 6
  %4 = or i32 %1, %arg3
  %5 = shl nuw nsw i32 %4, 2
  %6 = zext i32 %5 to i64
  %7 = or disjoint i64 %6, 1
  %11 = getelementptr inbounds half, ptr %arg1, i64 %6
  %12 = load half, ptr %11, align 8
  %13 = fmul fast half %12, 0xH5380
  %14 = fadd fast half %13, 0xH57F0
  %16 = getelementptr inbounds half, ptr %arg, i64 %6
  store half %14, ptr %16, align 8
  %17 = getelementptr inbounds half, ptr %arg1, i64 %7
  %18 = load half, ptr %17, align 2
  %19 = fmul fast half %18, 0xH5380
  %20 = fadd fast half %19, 0xH57F0
  %21 = getelementptr inbounds half, ptr %arg, i64 %7
  store half %20, ptr %21, align 2
  ret void
}

define ptx_kernel void @add_f16(ptr addrspace(1) %0, { half, half } %1, { half, half } %2) {
; VECTOR-LABEL: @add_f16(
; VECTOR-NEXT:    [[TMP4:%.*]] = extractvalue { half, half } [[TMP1:%.*]], 0
; VECTOR-NEXT:    [[TMP5:%.*]] = extractvalue { half, half } [[TMP1]], 1
; VECTOR-NEXT:    [[TMP6:%.*]] = extractvalue { half, half } [[TMP2:%.*]], 0
; VECTOR-NEXT:    [[TMP7:%.*]] = extractvalue { half, half } [[TMP2]], 1
; VECTOR-NEXT:    [[TMP8:%.*]] = insertelement <2 x half> poison, half [[TMP4]], i32 0
; VECTOR-NEXT:    [[TMP9:%.*]] = insertelement <2 x half> [[TMP8]], half [[TMP5]], i32 1
; VECTOR-NEXT:    [[TMP10:%.*]] = insertelement <2 x half> poison, half [[TMP6]], i32 0
; VECTOR-NEXT:    [[TMP11:%.*]] = insertelement <2 x half> [[TMP10]], half [[TMP7]], i32 1
; VECTOR-NEXT:    [[TMP12:%.*]] = fadd <2 x half> [[TMP9]], [[TMP11]]
; VECTOR-NEXT:    [[TMP13:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; VECTOR-NEXT:    [[TMP14:%.*]] = shl i32 [[TMP13]], 1
; VECTOR-NEXT:    [[TMP15:%.*]] = and i32 [[TMP14]], 62
; VECTOR-NEXT:    [[TMP16:%.*]] = zext nneg i32 [[TMP15]] to i64
; VECTOR-NEXT:    [[TMP17:%.*]] = getelementptr half, ptr addrspace(1) [[TMP0:%.*]], i64 [[TMP16]]
; VECTOR-NEXT:    store <2 x half> [[TMP12]], ptr addrspace(1) [[TMP17]], align 4
; VECTOR-NEXT:    ret void
;
; NOVECTOR-LABEL: @add_f16(
; NOVECTOR-NEXT:    [[TMP4:%.*]] = extractvalue { half, half } [[TMP1:%.*]], 0
; NOVECTOR-NEXT:    [[TMP5:%.*]] = extractvalue { half, half } [[TMP1]], 1
; NOVECTOR-NEXT:    [[TMP6:%.*]] = extractvalue { half, half } [[TMP2:%.*]], 0
; NOVECTOR-NEXT:    [[TMP7:%.*]] = extractvalue { half, half } [[TMP2]], 1
; NOVECTOR-NEXT:    [[TMP8:%.*]] = fadd half [[TMP4]], [[TMP6]]
; NOVECTOR-NEXT:    [[TMP9:%.*]] = fadd half [[TMP5]], [[TMP7]]
; NOVECTOR-NEXT:    [[TMP13:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
; NOVECTOR-NEXT:    [[TMP14:%.*]] = shl i32 [[TMP13]], 1
; NOVECTOR-NEXT:    [[TMP15:%.*]] = and i32 [[TMP14]], 62
; NOVECTOR-NEXT:    [[TMP16:%.*]] = zext nneg i32 [[TMP15]] to i64
; NOVECTOR-NEXT:    [[TMP17:%.*]] = getelementptr half, ptr addrspace(1) [[TMP0:%.*]], i64 [[TMP16]]
; NOVECTOR-NEXT:    [[TMP19:%.*]] = insertelement <2 x half> poison, half [[TMP8]], i64 0
; NOVECTOR-NEXT:    [[TMP12:%.*]] = insertelement <2 x half> [[TMP19]], half [[TMP9]], i64 1
; NOVECTOR-NEXT:    store <2 x half> [[TMP12]], ptr addrspace(1) [[TMP17]], align 4
; NOVECTOR-NEXT:    ret void
;
  %5 = extractvalue { half, half } %1, 0
  %6 = extractvalue { half, half } %1, 1
  %7 = extractvalue { half, half } %2, 0
  %8 = extractvalue { half, half } %2, 1
  %9 = fadd half %5, %7
  %10 = fadd half %6, %8
  %11 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
  %12 = shl i32 %11, 1
  %13 = and i32 %12, 62
  %14 = zext nneg i32 %13 to i64
  %15 = getelementptr half, ptr addrspace(1) %0, i64 %14
  %18 = insertelement <2 x half> poison, half %9, i64 0
  %19 = insertelement <2 x half> %18, half %10, i64 1
  store <2 x half> %19, ptr addrspace(1) %15, align 4
  ret void
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare noundef i32 @llvm.nvvm.read.ptx.sreg.tid.x() #1

attributes #0 = { nounwind }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }