diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/i8x2-instructions.ll')
-rw-r--r-- | llvm/test/CodeGen/NVPTX/i8x2-instructions.ll | 121 |
1 files changed, 93 insertions, 28 deletions
diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll index 3edd4e4..98f94bb 100644 --- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll @@ -1,42 +1,107 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \ -; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | FileCheck %s -; RUN: %if ptxas %{ \ -; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \ -; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs -O0 \ +; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: %} +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs \ +; RUN: | %ptxas-verify -arch=sm_90 \ ; RUN: %} +target triple = "nvptx64-nvidia-cuda" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) { -; CHECK-LABEL: test_bitcast_2xi8_i16( -; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<5>; -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: shl.b16 %rs3, %rs2, 8; -; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r2, %rs4; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; -; CHECK-NEXT: ret; +; O0-LABEL: test_bitcast_2xi8_i16( +; O0: { +; O0-NEXT: .reg .b16 %rs<5>; +; O0-NEXT: .reg .b32 %r<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; +; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; +; O0-NEXT: shl.b16 %rs3, %rs2, 8; +; O0-NEXT: or.b16 %rs4, %rs1, %rs3; +; O0-NEXT: cvt.u32.u16 %r2, %rs4; +; O0-NEXT: st.param.b32 [func_retval0], %r2; +; O0-NEXT: ret; +; +; O3-LABEL: test_bitcast_2xi8_i16( +; O3: { +; O3-NEXT: .reg .b32 %r<2>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0]; +; O3-NEXT: st.param.b32 [func_retval0], %r1; +; O3-NEXT: ret; %res = bitcast <2 x i8> %a to i16 ret i16 %res } define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) { -; CHECK-LABEL: test_bitcast_i16_2xi8( -; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; -; CHECK-NEXT: ret; +; O0-LABEL: test_bitcast_i16_2xi8( +; O0: { +; O0-NEXT: .reg .b16 %rs<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; +; O0-NEXT: st.param.b16 [func_retval0], %rs1; +; O0-NEXT: ret; +; +; O3-LABEL: test_bitcast_i16_2xi8( +; O3: { +; O3-NEXT: .reg .b16 %rs<2>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; +; O3-NEXT: st.param.b16 [func_retval0], %rs1; +; O3-NEXT: ret; %res = bitcast i16 %a to <2 x i8> ret <2 x i8> %res } + +define <2 x i8> @test_call_2xi8(<2 x i8> %a) { +; O0-LABEL: test_call_2xi8( +; O0: { +; O0-NEXT: .reg .b16 %rs<7>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; +; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; +; O0-NEXT: { // callseq 0, 0 +; O0-NEXT: .param .align 2 .b8 param0[2]; +; O0-NEXT: .param .align 2 .b8 retval0[2]; +; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; +; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0); +; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; +; O0-NEXT: } // callseq 0 +; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; +; O0-NEXT: ret; +; +; O3-LABEL: test_call_2xi8( +; O3: { +; O3-NEXT: .reg .b16 %rs<7>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; +; O3-NEXT: { // callseq 0, 0 +; O3-NEXT: .param .align 2 .b8 param0[2]; +; O3-NEXT: .param .align 2 .b8 retval0[2]; +; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; +; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0); +; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; +; O3-NEXT: } // callseq 0 +; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; +; O3-NEXT: ret; + %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a) + ret <2 x i8> %res +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; COMMON: {{.*}} |