aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/i8x2-instructions.ll')
-rw-r--r--llvm/test/CodeGen/NVPTX/i8x2-instructions.ll121
1 files changed, 93 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
index 3edd4e4..98f94bb 100644
--- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
@@ -1,42 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \
-; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: | FileCheck %s
-; RUN: %if ptxas %{ \
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
-; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: | %ptxas-verify -arch=sm_90 \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs -O0 \
+; RUN: | %ptxas-verify -arch=sm_90 \
+; RUN: %}
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs \
+; RUN: | %ptxas-verify -arch=sm_90 \
; RUN: %}
+target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
-; CHECK-LABEL: test_bitcast_2xi8_i16(
-; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<5>;
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
-; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
-; CHECK-NEXT: shl.b16 %rs3, %rs2, 8;
-; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r2, %rs4;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
+; O0-LABEL: test_bitcast_2xi8_i16(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<5>;
+; O0-NEXT: .reg .b32 %r<3>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
+; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; O0-NEXT: shl.b16 %rs3, %rs2, 8;
+; O0-NEXT: or.b16 %rs4, %rs1, %rs3;
+; O0-NEXT: cvt.u32.u16 %r2, %rs4;
+; O0-NEXT: st.param.b32 [func_retval0], %r2;
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_bitcast_2xi8_i16(
+; O3: {
+; O3-NEXT: .reg .b32 %r<2>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0];
+; O3-NEXT: st.param.b32 [func_retval0], %r1;
+; O3-NEXT: ret;
%res = bitcast <2 x i8> %a to i16
ret i16 %res
}
define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) {
-; CHECK-LABEL: test_bitcast_i16_2xi8(
-; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
-; CHECK-NEXT: ret;
+; O0-LABEL: test_bitcast_i16_2xi8(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<2>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
+; O0-NEXT: st.param.b16 [func_retval0], %rs1;
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_bitcast_i16_2xi8(
+; O3: {
+; O3-NEXT: .reg .b16 %rs<2>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
+; O3-NEXT: st.param.b16 [func_retval0], %rs1;
+; O3-NEXT: ret;
%res = bitcast i16 %a to <2 x i8>
ret <2 x i8> %res
}
+
+define <2 x i8> @test_call_2xi8(<2 x i8> %a) {
+; O0-LABEL: test_call_2xi8(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<7>;
+; O0-NEXT: .reg .b32 %r<2>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
+; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; O0-NEXT: { // callseq 0, 0
+; O0-NEXT: .param .align 2 .b8 param0[2];
+; O0-NEXT: .param .align 2 .b8 retval0[2];
+; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
+; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0);
+; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
+; O0-NEXT: } // callseq 0
+; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_call_2xi8(
+; O3: {
+; O3-NEXT: .reg .b16 %rs<7>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
+; O3-NEXT: { // callseq 0, 0
+; O3-NEXT: .param .align 2 .b8 param0[2];
+; O3-NEXT: .param .align 2 .b8 retval0[2];
+; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
+; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0);
+; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
+; O3-NEXT: } // callseq 0
+; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
+; O3-NEXT: ret;
+ %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a)
+ ret <2 x i8> %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; COMMON: {{.*}}