aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
blob: 98f94bb7b3ac1c9f7e88505229df6f53aa8332cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all  \
; RUN:     -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON
; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all  \
; RUN:     -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON
; RUN: %if ptxas %{                                                            \
; RUN:  llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
; RUN:     -verify-machineinstrs -O0                                           \
; RUN:   | %ptxas-verify -arch=sm_90                                           \
; RUN: %}
; RUN: %if ptxas %{                                                            \
; RUN:  llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
; RUN:     -verify-machineinstrs                                               \
; RUN:   | %ptxas-verify -arch=sm_90                                           \
; RUN: %}

target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"

define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
; O0-LABEL: test_bitcast_2xi8_i16(
; O0:       {
; O0-NEXT:    .reg .b16 %rs<5>;
; O0-NEXT:    .reg .b32 %r<3>;
; O0-EMPTY:
; O0-NEXT:  // %bb.0:
; O0-NEXT:    ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
; O0-NEXT:    mov.b32 %r1, {%rs1, %rs2};
; O0-NEXT:    shl.b16 %rs3, %rs2, 8;
; O0-NEXT:    or.b16 %rs4, %rs1, %rs3;
; O0-NEXT:    cvt.u32.u16 %r2, %rs4;
; O0-NEXT:    st.param.b32 [func_retval0], %r2;
; O0-NEXT:    ret;
;
; O3-LABEL: test_bitcast_2xi8_i16(
; O3:       {
; O3-NEXT:    .reg .b32 %r<2>;
; O3-EMPTY:
; O3-NEXT:  // %bb.0:
; O3-NEXT:    ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0];
; O3-NEXT:    st.param.b32 [func_retval0], %r1;
; O3-NEXT:    ret;
  %res = bitcast <2 x i8> %a to i16
  ret i16 %res
}

define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) {
; O0-LABEL: test_bitcast_i16_2xi8(
; O0:       {
; O0-NEXT:    .reg .b16 %rs<2>;
; O0-EMPTY:
; O0-NEXT:  // %bb.0:
; O0-NEXT:    ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
; O0-NEXT:    st.param.b16 [func_retval0], %rs1;
; O0-NEXT:    ret;
;
; O3-LABEL: test_bitcast_i16_2xi8(
; O3:       {
; O3-NEXT:    .reg .b16 %rs<2>;
; O3-EMPTY:
; O3-NEXT:  // %bb.0:
; O3-NEXT:    ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
; O3-NEXT:    st.param.b16 [func_retval0], %rs1;
; O3-NEXT:    ret;
  %res = bitcast i16 %a to <2 x i8>
  ret <2 x i8> %res
}

define <2 x i8> @test_call_2xi8(<2 x i8> %a) {
; O0-LABEL: test_call_2xi8(
; O0:       {
; O0-NEXT:    .reg .b16 %rs<7>;
; O0-NEXT:    .reg .b32 %r<2>;
; O0-EMPTY:
; O0-NEXT:  // %bb.0:
; O0-NEXT:    ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
; O0-NEXT:    mov.b32 %r1, {%rs1, %rs2};
; O0-NEXT:    { // callseq 0, 0
; O0-NEXT:    .param .align 2 .b8 param0[2];
; O0-NEXT:    .param .align 2 .b8 retval0[2];
; O0-NEXT:    st.param.v2.b8 [param0], {%rs1, %rs2};
; O0-NEXT:    call.uni (retval0), test_call_2xi8, (param0);
; O0-NEXT:    ld.param.v2.b8 {%rs3, %rs4}, [retval0];
; O0-NEXT:    } // callseq 0
; O0-NEXT:    st.param.v2.b8 [func_retval0], {%rs3, %rs4};
; O0-NEXT:    ret;
;
; O3-LABEL: test_call_2xi8(
; O3:       {
; O3-NEXT:    .reg .b16 %rs<7>;
; O3-EMPTY:
; O3-NEXT:  // %bb.0:
; O3-NEXT:    ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
; O3-NEXT:    { // callseq 0, 0
; O3-NEXT:    .param .align 2 .b8 param0[2];
; O3-NEXT:    .param .align 2 .b8 retval0[2];
; O3-NEXT:    st.param.v2.b8 [param0], {%rs1, %rs2};
; O3-NEXT:    call.uni (retval0), test_call_2xi8, (param0);
; O3-NEXT:    ld.param.v2.b8 {%rs3, %rs4}, [retval0];
; O3-NEXT:    } // callseq 0
; O3-NEXT:    st.param.v2.b8 [func_retval0], {%rs3, %rs4};
; O3-NEXT:    ret;
  %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a)
  ret <2 x i8> %res
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; COMMON: {{.*}}