aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/NVPTX')
-rw-r--r--llvm/test/CodeGen/NVPTX/aggregate-return.ll48
-rw-r--r--llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll6
-rw-r--r--llvm/test/CodeGen/NVPTX/byval-const-global.ll8
-rw-r--r--llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll10
-rw-r--r--llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll16
-rw-r--r--llvm/test/CodeGen/NVPTX/combine-mad.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/compare-int.ll621
-rw-r--r--llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll172
-rw-r--r--llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/f16x2-instructions.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/f32x2-instructions.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/fma.ll8
-rw-r--r--llvm/test/CodeGen/NVPTX/forward-ld-param.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/i128-param.ll20
-rw-r--r--llvm/test/CodeGen/NVPTX/i16x2-instructions.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/i8x2-instructions.ll121
-rw-r--r--llvm/test/CodeGen/NVPTX/i8x4-instructions.ll30
-rw-r--r--llvm/test/CodeGen/NVPTX/idioms.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/indirect_byval.ll20
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll33
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-args.ll10
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-byval-args.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/misched_func_call.ll15
-rw-r--r--llvm/test/CodeGen/NVPTX/param-add.ll8
-rw-r--r--llvm/test/CodeGen/NVPTX/param-load-store.ll280
-rw-r--r--llvm/test/CodeGen/NVPTX/param-overalign.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/param-vectorize-device.ll28
-rw-r--r--llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir4
-rw-r--r--llvm/test/CodeGen/NVPTX/st-param-imm.ll201
-rw-r--r--llvm/test/CodeGen/NVPTX/store-undef.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/tanhf.ll40
-rw-r--r--llvm/test/CodeGen/NVPTX/tex-read-cuda.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll594
-rw-r--r--llvm/test/CodeGen/NVPTX/vaargs.ll16
-rw-r--r--llvm/test/CodeGen/NVPTX/variadics-backend.ll32
35 files changed, 1481 insertions, 920 deletions
diff --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll
index 7f52e52..abc873e 100644
--- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll
+++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll
@@ -16,8 +16,8 @@ define void @test_v2f32(<2 x float> %input, ptr %output) {
; CHECK-NEXT: ld.param.b64 %rd1, [test_v2f32_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), barv, (param0);
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -32,24 +32,24 @@ define void @test_v2f32(<2 x float> %input, ptr %output) {
define void @test_v3f32(<3 x float> %input, ptr %output) {
; CHECK-LABEL: test_v3f32(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<10>;
-; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v3f32_param_0];
-; CHECK-NEXT: ld.param.b32 %r3, [test_v3f32_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_0];
+; CHECK-NEXT: ld.param.b32 %r1, [test_v3f32_param_0+8];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
-; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
-; CHECK-NEXT: st.param.b32 [param0+8], %r3;
; CHECK-NEXT: .param .align 16 .b8 retval0[16];
+; CHECK-NEXT: st.param.b32 [param0+8], %r1;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), barv3, (param0);
-; CHECK-NEXT: ld.param.v2.b32 {%r4, %r5}, [retval0];
-; CHECK-NEXT: ld.param.b32 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b32 %r2, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
; CHECK-NEXT: } // callseq 1
-; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_1];
-; CHECK-NEXT: st.v2.b32 [%rd1], {%r4, %r5};
-; CHECK-NEXT: st.b32 [%rd1+8], %r6;
+; CHECK-NEXT: ld.param.b64 %rd4, [test_v3f32_param_1];
+; CHECK-NEXT: st.b32 [%rd4+8], %r2;
+; CHECK-NEXT: st.b64 [%rd4], %rd2;
; CHECK-NEXT: ret;
%call = tail call <3 x float> @barv3(<3 x float> %input)
; Make sure we don't load more values than than we need to.
@@ -68,16 +68,16 @@ define void @test_a2f32([2 x float] %input, ptr %output) {
; CHECK-NEXT: ld.param.b32 %r2, [test_a2f32_param_0+4];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 4 .b8 param0[8];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b32 [param0+4], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[8];
+; CHECK-NEXT: st.param.b32 [param0+4], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), bara, (param0);
-; CHECK-NEXT: ld.param.b32 %r3, [retval0];
-; CHECK-NEXT: ld.param.b32 %r4, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r3, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r4, [retval0];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: ld.param.b64 %rd1, [test_a2f32_param_1];
-; CHECK-NEXT: st.b32 [%rd1+4], %r4;
-; CHECK-NEXT: st.b32 [%rd1], %r3;
+; CHECK-NEXT: st.b32 [%rd1+4], %r3;
+; CHECK-NEXT: st.b32 [%rd1], %r4;
; CHECK-NEXT: ret;
%call = tail call [2 x float] @bara([2 x float] %input)
store [2 x float] %call, ptr %output, align 4
@@ -95,16 +95,16 @@ define void @test_s2f32({float, float} %input, ptr %output) {
; CHECK-NEXT: ld.param.b32 %r2, [test_s2f32_param_0+4];
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .align 4 .b8 param0[8];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b32 [param0+4], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[8];
+; CHECK-NEXT: st.param.b32 [param0+4], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), bars, (param0);
-; CHECK-NEXT: ld.param.b32 %r3, [retval0];
-; CHECK-NEXT: ld.param.b32 %r4, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r3, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r4, [retval0];
; CHECK-NEXT: } // callseq 3
; CHECK-NEXT: ld.param.b64 %rd1, [test_s2f32_param_1];
-; CHECK-NEXT: st.b32 [%rd1+4], %r4;
-; CHECK-NEXT: st.b32 [%rd1], %r3;
+; CHECK-NEXT: st.b32 [%rd1+4], %r3;
+; CHECK-NEXT: st.b32 [%rd1], %r4;
; CHECK-NEXT: ret;
%call = tail call {float, float} @bars({float, float} %input)
store {float, float} %call, ptr %output, align 4
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index ba5813c..b4641d0 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -208,13 +208,13 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0];
-; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1];
+; CHECK-NEXT: st.param.b32 [param1], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
index ad9e4b0..b4934e1a 100644
--- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
@@ -13,12 +13,12 @@ define void @foo() {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.global.b64 %rd1, [G];
-; CHECK-NEXT: ld.global.b64 %rd2, [G+8];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
+; CHECK-NEXT: ld.global.b64 %rd1, [G+8];
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: ld.global.b64 %rd2, [G];
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni bar, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 0cd7058..0eb7f64 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -44,11 +44,11 @@ entry:
%arrayidx7 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 3
store float %3, ptr %arrayidx7, align 4
-; CHECK: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]]
-; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]]
-; CHECK-NEXT: call.uni callee,
+; CHECK-DAG: .param .b64 param0;
+; CHECK-DAG: .param .b64 param1;
+; CHECK-DAG: st.param.b64 [param0], %rd[[A_REG]]
+; CHECK-DAG: st.param.b64 [param1], %rd[[SP_REG]]
+; CHECK: call.uni callee,
call void @callee(ptr %a, ptr %buf) #2
ret void
diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
index f67145d..483d48a 100644
--- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
@@ -14,11 +14,11 @@ target triple = "nvptx64-nvidia-cuda"
%complex_half = type { half, half }
; CHECK: .param .align 2 .b8 param2[4];
-; CHECK: st.param.b16 [param2], %rs1;
-; CHECK: st.param.b16 [param2+2], %rs2;
; CHECK: .param .align 2 .b8 retval0[4];
-; CHECK-NEXT: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]);
-; CHECK-NEXT: call (retval0),
+; CHECK-DAG: st.param.b16 [param2], %rs{{[0-9]+}};
+; CHECK-DAG: st.param.b16 [param2+2], %rs{{[0-9]+}};
+; CHECK: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]);
+; CHECK: call (retval0),
define weak_odr void @foo() {
entry:
%call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null)
@@ -36,10 +36,10 @@ define internal void @callee(ptr byval(%"class.complex") %byval_arg) {
}
define void @boom() {
%fp = call ptr @usefp(ptr @callee)
- ; CHECK: .param .align 2 .b8 param0[4];
- ; CHECK: st.param.b16 [param0], %rs1;
- ; CHECK: st.param.b16 [param0+2], %rs2;
- ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]);
+ ; CHECK-DAG: .param .align 2 .b8 param0[4];
+ ; CHECK-DAG: st.param.b16 [param0], %rs{{[0-9]+}};
+ ; CHECK-DAG: st.param.b16 [param0+2], %rs{{[0-9]+}};
+ ; CHECK-DAG: .callprototype ()_ (.param .align 2 .b8 _[4]);
call void %fp(ptr byval(%"class.complex") null)
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll
index 2232810..da303b7 100644
--- a/llvm/test/CodeGen/NVPTX/combine-mad.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll
@@ -199,10 +199,10 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: add.s32 %r5, %r3, %r4;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b32 param0;
-; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: .param .b32 param1;
-; CHECK-NEXT: st.param.b32 [param1], %r5;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
+; CHECK-NEXT: st.param.b32 [param1], %r5;
; CHECK-NEXT: call.uni (retval0), use, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll
index b44ae47d..9338172d 100644
--- a/llvm/test/CodeGen/NVPTX/compare-int.ll
+++ b/llvm/test/CodeGen/NVPTX/compare-int.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
@@ -11,90 +12,180 @@
;;; i64
define i64 @icmp_eq_i64(i64 %a, i64 %b) {
-; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_eq_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_eq_i64_param_1];
+; CHECK-NEXT: setp.eq.b64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp eq i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ne_i64(i64 %a, i64 %b) {
-; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ne_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ne_i64_param_1];
+; CHECK-NEXT: setp.ne.b64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ne i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
-; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ugt_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ugt_i64_param_1];
+; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_uge_i64(i64 %a, i64 %b) {
-; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_uge_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_uge_i64_param_1];
+; CHECK-NEXT: setp.ge.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp uge i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ult_i64(i64 %a, i64 %b) {
-; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ult_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ult_i64_param_1];
+; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ult i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ule_i64(i64 %a, i64 %b) {
-; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ule_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ule_i64_param_1];
+; CHECK-NEXT: setp.le.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ule i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
-; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sgt_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sgt_i64_param_1];
+; CHECK-NEXT: setp.gt.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sge_i64(i64 %a, i64 %b) {
-; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sge_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sge_i64_param_1];
+; CHECK-NEXT: setp.ge.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp sge i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_slt_i64(i64 %a, i64 %b) {
-; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_slt_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_slt_i64_param_1];
+; CHECK-NEXT: setp.lt.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp slt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sle_i64(i64 %a, i64 %b) {
-; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sle_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sle_i64_param_1];
+; CHECK-NEXT: setp.le.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp sle i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
@@ -103,90 +194,180 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) {
;;; i32
define i32 @icmp_eq_i32(i32 %a, i32 %b) {
-; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_eq_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_eq_i32_param_1];
+; CHECK-NEXT: setp.eq.b32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp eq i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ne_i32(i32 %a, i32 %b) {
-; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ne_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ne_i32_param_1];
+; CHECK-NEXT: setp.ne.b32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ne i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ugt_i32(i32 %a, i32 %b) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ugt_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ugt_i32_param_1];
+; CHECK-NEXT: setp.gt.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_uge_i32(i32 %a, i32 %b) {
-; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_uge_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_uge_i32_param_1];
+; CHECK-NEXT: setp.ge.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp uge i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ult_i32(i32 %a, i32 %b) {
-; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ult_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ult_i32_param_1];
+; CHECK-NEXT: setp.lt.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ult i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ule_i32(i32 %a, i32 %b) {
-; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ule_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ule_i32_param_1];
+; CHECK-NEXT: setp.le.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ule i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sgt_i32(i32 %a, i32 %b) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_sgt_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_sgt_i32_param_1];
+; CHECK-NEXT: setp.gt.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sge_i32(i32 %a, i32 %b) {
-; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_sge_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_sge_i32_param_1];
+; CHECK-NEXT: setp.ge.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp sge i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_slt_i32(i32 %a, i32 %b) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_slt_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_slt_i32_param_1];
+; CHECK-NEXT: setp.lt.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp slt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sle_i32(i32 %a, i32 %b) {
-; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_sle_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_sle_i32_param_1];
+; CHECK-NEXT: setp.le.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp sle i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
@@ -196,90 +377,190 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) {
;;; i16
define i16 @icmp_eq_i16(i16 %a, i16 %b) {
-; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_eq_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_eq_i16_param_1];
+; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp eq i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ne_i16(i16 %a, i16 %b) {
-; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ne_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ne_i16_param_1];
+; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ne i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ugt_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ugt_i16_param_1];
+; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_uge_i16(i16 %a, i16 %b) {
-; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_uge_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_uge_i16_param_1];
+; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp uge i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ult_i16(i16 %a, i16 %b) {
-; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ult_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ult_i16_param_1];
+; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ult i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ule_i16(i16 %a, i16 %b) {
-; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ule_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ule_i16_param_1];
+; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ule i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
-; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sgt_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sgt_i16_param_1];
+; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sge_i16(i16 %a, i16 %b) {
-; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sge_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sge_i16_param_1];
+; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sge i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_slt_i16(i16 %a, i16 %b) {
-; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_slt_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_slt_i16_param_1];
+; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp slt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sle_i16(i16 %a, i16 %b) {
-; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sle_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sle_i16_param_1];
+; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sle i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
@@ -290,9 +571,19 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_eq_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_eq_i8_param_1];
+; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp eq i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -300,9 +591,19 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ne_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ne_i8_param_1];
+; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ne i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -310,9 +611,19 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) {
define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ugt_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ugt_i8_param_1];
+; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -320,9 +631,19 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
define i8 @icmp_uge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_uge_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_uge_i8_param_1];
+; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp uge i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -330,9 +651,19 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) {
define i8 @icmp_ult_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ult_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ult_i8_param_1];
+; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ult i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -340,9 +671,19 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) {
define i8 @icmp_ule_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ule_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ule_i8_param_1];
+; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ule i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -350,9 +691,19 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) {
define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sgt_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sgt_i8_param_1];
+; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -360,9 +711,19 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
define i8 @icmp_sge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sge_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sge_i8_param_1];
+; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sge i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -370,9 +731,19 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) {
define i8 @icmp_slt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_slt_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_slt_i8_param_1];
+; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp slt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -380,9 +751,19 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) {
define i8 @icmp_sle_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sle_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sle_i8_param_1];
+; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sle i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
index d1b478d..48209a8 100644
--- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
@@ -7,52 +8,203 @@ declare i64 @callee_variadic(ptr %p, ...);
define %struct.64 @test_return_type_mismatch(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch(
-; CHECK: .param .align 1 .b8 retval0[8];
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<40>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_param_0];
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .align 1 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0;
+; CHECK-NEXT: mov.b64 %rd1, callee;
+; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_0;
+; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
+; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: shl.b64 %rd13, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10;
+; CHECK-NEXT: shl.b64 %rd16, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd18, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16;
+; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14;
+; CHECK-NEXT: shl.b64 %rd23, %rd5, 8;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6;
+; CHECK-NEXT: shl.b64 %rd26, %rd4, 16;
+; CHECK-NEXT: shl.b64 %rd28, %rd3, 24;
+; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26;
+; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24;
+; CHECK-NEXT: shl.b64 %rd31, %rd30, 32;
+; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20;
+; CHECK-NEXT: st.param.b8 [func_retval0], %rd10;
+; CHECK-NEXT: shr.u64 %rd33, %rd32, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33;
+; CHECK-NEXT: shr.u64 %rd34, %rd32, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34;
+; CHECK-NEXT: shr.u64 %rd35, %rd32, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35;
+; CHECK-NEXT: shr.u64 %rd36, %rd32, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36;
+; CHECK-NEXT: shr.u64 %rd37, %rd32, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37;
+; CHECK-NEXT: shr.u64 %rd38, %rd32, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38;
+; CHECK-NEXT: shr.u64 %rd39, %rd32, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39;
+; CHECK-NEXT: ret;
%ret = call %struct.64 @callee(ptr %p)
ret %struct.64 %ret
}
define i64 @test_param_type_mismatch(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: { // callseq 1, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 retval0;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1;
+; CHECK-NEXT: st.param.b64 [param0], 7;
+; CHECK-NEXT: mov.b64 %rd1, callee;
+; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_1;
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
+; CHECK-NEXT: } // callseq 1
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%ret = call i64 @callee(i64 7)
ret i64 %ret
}
define i64 @test_param_count_mismatch(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd2, [test_param_count_mismatch_param_0];
+; CHECK-NEXT: { // callseq 2, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 param1;
+; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2;
+; CHECK-NEXT: st.param.b64 [param1], 7;
+; CHECK-NEXT: mov.b64 %rd1, callee;
+; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_2;
+; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
+; CHECK-NEXT: } // callseq 2
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%ret = call i64 @callee(ptr %p, i64 7)
ret i64 %ret
}
define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch_variadic(
-; CHECK: .param .align 1 .b8 retval0[8];
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<40>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_variadic_param_0];
+; CHECK-NEXT: { // callseq 3, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .align 1 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3;
+; CHECK-NEXT: mov.b64 %rd1, callee_variadic;
+; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_3;
+; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
+; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0];
+; CHECK-NEXT: } // callseq 3
+; CHECK-NEXT: shl.b64 %rd13, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10;
+; CHECK-NEXT: shl.b64 %rd16, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd18, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16;
+; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14;
+; CHECK-NEXT: shl.b64 %rd23, %rd5, 8;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6;
+; CHECK-NEXT: shl.b64 %rd26, %rd4, 16;
+; CHECK-NEXT: shl.b64 %rd28, %rd3, 24;
+; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26;
+; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24;
+; CHECK-NEXT: shl.b64 %rd31, %rd30, 32;
+; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20;
+; CHECK-NEXT: st.param.b8 [func_retval0], %rd10;
+; CHECK-NEXT: shr.u64 %rd33, %rd32, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33;
+; CHECK-NEXT: shr.u64 %rd34, %rd32, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34;
+; CHECK-NEXT: shr.u64 %rd35, %rd32, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35;
+; CHECK-NEXT: shr.u64 %rd36, %rd32, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36;
+; CHECK-NEXT: shr.u64 %rd37, %rd32, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37;
+; CHECK-NEXT: shr.u64 %rd38, %rd32, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38;
+; CHECK-NEXT: shr.u64 %rd39, %rd32, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39;
+; CHECK-NEXT: ret;
%ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p)
ret %struct.64 %ret
}
define i64 @test_param_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch_variadic(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_param_type_mismatch_variadic_param_0];
+; CHECK-NEXT: { // callseq 4, 0
+; CHECK-NEXT: .param .align 8 .b8 param1[8];
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
+; CHECK-NEXT: st.param.b64 [param1], 7;
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
+; CHECK-NEXT: } // callseq 4
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}
define i64 @test_param_count_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch_variadic(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_param_count_mismatch_variadic_param_0];
+; CHECK-NEXT: { // callseq 5, 0
+; CHECK-NEXT: .param .align 8 .b8 param1[8];
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
+; CHECK-NEXT: st.param.b64 [param1], 7;
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
+; CHECK-NEXT: } // callseq 5
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}
diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
index 4d2ba7d..06fb8d2 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
@@ -22,8 +22,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-32-NEXT: cvta.local.u32 %r5, %r4;
; CHECK-32-NEXT: { // callseq 0, 0
; CHECK-32-NEXT: .param .b32 param0;
-; CHECK-32-NEXT: st.param.b32 [param0], %r5;
; CHECK-32-NEXT: .param .b32 retval0;
+; CHECK-32-NEXT: st.param.b32 [param0], %r5;
; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-32-NEXT: } // callseq 0
@@ -43,8 +43,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4;
; CHECK-64-NEXT: { // callseq 0, 0
; CHECK-64-NEXT: .param .b64 param0;
-; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
; CHECK-64-NEXT: .param .b32 retval0;
+; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-64-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 8918fbd..d4fcea3 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -462,10 +462,10 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: st.param.b32 [param1], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -485,10 +485,10 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: st.param.b32 [param1], %r1;
+; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
@@ -508,10 +508,10 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: st.param.b32 [param1], %r1;
+; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index 30afd69..b84a0ec 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -859,10 +859,10 @@ define <2 x float> @test_call(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-NEXT: ld.param.b64 %rd1, [test_call_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: .param .align 8 .b8 param1[8];
-; CHECK-NEXT: st.param.b64 [param1], %rd2;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -882,10 +882,10 @@ define <2 x float> @test_call_flipped(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: .param .align 8 .b8 param1[8];
-; CHECK-NEXT: st.param.b64 [param1], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
; CHECK-NEXT: } // callseq 1
@@ -905,10 +905,10 @@ define <2 x float> @test_tailcall_flipped(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: .param .align 8 .b8 param1[8];
-; CHECK-NEXT: st.param.b64 [param1], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
; CHECK-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll
index 5aa12b0..87274aa 100644
--- a/llvm/test/CodeGen/NVPTX/fma.ll
+++ b/llvm/test/CodeGen/NVPTX/fma.ll
@@ -36,10 +36,10 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
; CHECK-NEXT: fma.rn.f32 %r6, %r1, %r2, %r5;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b32 param0;
-; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: .param .b32 param1;
-; CHECK-NEXT: st.param.b32 [param1], %r6;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.b32 [param1], %r6;
+; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r7, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -83,10 +83,10 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
; CHECK-NEXT: fma.rn.f64 %rd6, %rd1, %rd2, %rd5;
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], %rd4;
; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd6;
; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param1], %rd6;
+; CHECK-NEXT: st.param.b64 [param0], %rd4;
; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd7, [retval0];
; CHECK-NEXT: } // callseq 1
diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
index ed8f6b4..636e12b 100644
--- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
+++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
@@ -64,9 +64,9 @@ define void @test_ld_param_byval(ptr byval(i32) %a) {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
+; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0];
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni byval_user, (param0);
; CHECK-NEXT: } // callseq 1
diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll
index 4f4c2fe..79abca0 100644
--- a/llvm/test/CodeGen/NVPTX/i128-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i128-param.ll
@@ -29,11 +29,11 @@ start:
; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
- ; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK-NEXT: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
- ; CHECK: .param .align 16 .b8 param1[16];
- ; CHECK-NEXT: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
- ; CHECK: } // callseq [[CALLSEQ_ID]]
+ ; CHECK-DAG: .param .align 16 .b8 param0[16];
+ ; CHECK-DAG: .param .align 16 .b8 param1[16];
+ ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
+ ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
+ ; CHECK: } // callseq [[CALLSEQ_ID]]
call void @callee(i128 %0, i128 %1, ptr %2)
ret void
@@ -48,11 +48,11 @@ start:
; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
- ; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
- ; CHECK: .param .align 16 .b8 param1[16];
- ; CHECK: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
- ; CHECK: } // callseq [[CALLSEQ_ID]]
+ ; CHECK-DAG: .param .align 16 .b8 param0[16];
+ ; CHECK-DAG: .param .align 16 .b8 param1[16];
+ ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
+ ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
+ ; CHECK: } // callseq [[CALLSEQ_ID]]
call void @callee(i128 %0, i128 %1, ptr %2)
ret void
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index 2b7a06c..74136bb 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -642,10 +642,10 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: ld.param.b32 %r1, [test_call_param_0];
; COMMON-NEXT: { // callseq 0, 0
; COMMON-NEXT: .param .align 4 .b8 param0[4];
-; COMMON-NEXT: st.param.b32 [param0], %r1;
; COMMON-NEXT: .param .align 4 .b8 param1[4];
-; COMMON-NEXT: st.param.b32 [param1], %r2;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
+; COMMON-NEXT: st.param.b32 [param1], %r2;
+; COMMON-NEXT: st.param.b32 [param0], %r1;
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 0
@@ -665,10 +665,10 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
; COMMON-NEXT: { // callseq 1, 0
; COMMON-NEXT: .param .align 4 .b8 param0[4];
-; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: .param .align 4 .b8 param1[4];
-; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
+; COMMON-NEXT: st.param.b32 [param1], %r1;
+; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 1
@@ -688,10 +688,10 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
; COMMON-NEXT: { // callseq 2, 0
; COMMON-NEXT: .param .align 4 .b8 param0[4];
-; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: .param .align 4 .b8 param1[4];
-; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
+; COMMON-NEXT: st.param.b32 [param1], %r1;
+; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
index 3edd4e4..98f94bb 100644
--- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
@@ -1,42 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \
-; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: | FileCheck %s
-; RUN: %if ptxas %{ \
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
-; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: | %ptxas-verify -arch=sm_90 \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs -O0 \
+; RUN: | %ptxas-verify -arch=sm_90 \
+; RUN: %}
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs \
+; RUN: | %ptxas-verify -arch=sm_90 \
; RUN: %}
+target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
-; CHECK-LABEL: test_bitcast_2xi8_i16(
-; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<5>;
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
-; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
-; CHECK-NEXT: shl.b16 %rs3, %rs2, 8;
-; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r2, %rs4;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
+; O0-LABEL: test_bitcast_2xi8_i16(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<5>;
+; O0-NEXT: .reg .b32 %r<3>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
+; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; O0-NEXT: shl.b16 %rs3, %rs2, 8;
+; O0-NEXT: or.b16 %rs4, %rs1, %rs3;
+; O0-NEXT: cvt.u32.u16 %r2, %rs4;
+; O0-NEXT: st.param.b32 [func_retval0], %r2;
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_bitcast_2xi8_i16(
+; O3: {
+; O3-NEXT: .reg .b32 %r<2>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0];
+; O3-NEXT: st.param.b32 [func_retval0], %r1;
+; O3-NEXT: ret;
%res = bitcast <2 x i8> %a to i16
ret i16 %res
}
define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) {
-; CHECK-LABEL: test_bitcast_i16_2xi8(
-; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
-; CHECK-NEXT: ret;
+; O0-LABEL: test_bitcast_i16_2xi8(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<2>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
+; O0-NEXT: st.param.b16 [func_retval0], %rs1;
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_bitcast_i16_2xi8(
+; O3: {
+; O3-NEXT: .reg .b16 %rs<2>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
+; O3-NEXT: st.param.b16 [func_retval0], %rs1;
+; O3-NEXT: ret;
%res = bitcast i16 %a to <2 x i8>
ret <2 x i8> %res
}
+
+define <2 x i8> @test_call_2xi8(<2 x i8> %a) {
+; O0-LABEL: test_call_2xi8(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<7>;
+; O0-NEXT: .reg .b32 %r<2>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
+; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; O0-NEXT: { // callseq 0, 0
+; O0-NEXT: .param .align 2 .b8 param0[2];
+; O0-NEXT: .param .align 2 .b8 retval0[2];
+; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
+; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0);
+; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
+; O0-NEXT: } // callseq 0
+; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_call_2xi8(
+; O3: {
+; O3-NEXT: .reg .b16 %rs<7>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
+; O3-NEXT: { // callseq 0, 0
+; O3-NEXT: .param .align 2 .b8 param0[2];
+; O3-NEXT: .param .align 2 .b8 retval0[2];
+; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
+; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0);
+; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
+; O3-NEXT: } // callseq 0
+; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
+; O3-NEXT: ret;
+ %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a)
+ ret <2 x i8> %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; COMMON: {{.*}}
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index da99cec..06c2cc8 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -1273,10 +1273,10 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-NEXT: ld.param.b32 %r1, [test_call_param_0];
; O0-NEXT: { // callseq 0, 0
; O0-NEXT: .param .align 4 .b8 param0[4];
-; O0-NEXT: st.param.b32 [param0], %r1;
; O0-NEXT: .param .align 4 .b8 param1[4];
-; O0-NEXT: st.param.b32 [param1], %r2;
; O0-NEXT: .param .align 4 .b8 retval0[4];
+; O0-NEXT: st.param.b32 [param1], %r2;
+; O0-NEXT: st.param.b32 [param0], %r1;
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O0-NEXT: ld.param.b32 %r3, [retval0];
; O0-NEXT: } // callseq 0
@@ -1289,13 +1289,13 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_call_param_0];
-; O3-NEXT: ld.param.b32 %r2, [test_call_param_1];
; O3-NEXT: { // callseq 0, 0
; O3-NEXT: .param .align 4 .b8 param0[4];
-; O3-NEXT: st.param.b32 [param0], %r1;
; O3-NEXT: .param .align 4 .b8 param1[4];
-; O3-NEXT: st.param.b32 [param1], %r2;
; O3-NEXT: .param .align 4 .b8 retval0[4];
+; O3-NEXT: ld.param.b32 %r2, [test_call_param_1];
+; O3-NEXT: st.param.b32 [param1], %r2;
+; O3-NEXT: st.param.b32 [param0], %r1;
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O3-NEXT: ld.param.b32 %r3, [retval0];
; O3-NEXT: } // callseq 0
@@ -1315,10 +1315,10 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
; O0-NEXT: { // callseq 1, 0
; O0-NEXT: .param .align 4 .b8 param0[4];
-; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: .param .align 4 .b8 param1[4];
-; O0-NEXT: st.param.b32 [param1], %r1;
; O0-NEXT: .param .align 4 .b8 retval0[4];
+; O0-NEXT: st.param.b32 [param1], %r1;
+; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O0-NEXT: ld.param.b32 %r3, [retval0];
; O0-NEXT: } // callseq 1
@@ -1331,13 +1331,13 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
-; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1];
; O3-NEXT: { // callseq 1, 0
; O3-NEXT: .param .align 4 .b8 param0[4];
-; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: .param .align 4 .b8 param1[4];
-; O3-NEXT: st.param.b32 [param1], %r1;
; O3-NEXT: .param .align 4 .b8 retval0[4];
+; O3-NEXT: st.param.b32 [param1], %r1;
+; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1];
+; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O3-NEXT: ld.param.b32 %r3, [retval0];
; O3-NEXT: } // callseq 1
@@ -1357,10 +1357,10 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
; O0-NEXT: { // callseq 2, 0
; O0-NEXT: .param .align 4 .b8 param0[4];
-; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: .param .align 4 .b8 param1[4];
-; O0-NEXT: st.param.b32 [param1], %r1;
; O0-NEXT: .param .align 4 .b8 retval0[4];
+; O0-NEXT: st.param.b32 [param1], %r1;
+; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O0-NEXT: ld.param.b32 %r3, [retval0];
; O0-NEXT: } // callseq 2
@@ -1373,13 +1373,13 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
-; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1];
; O3-NEXT: { // callseq 2, 0
; O3-NEXT: .param .align 4 .b8 param0[4];
-; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: .param .align 4 .b8 param1[4];
-; O3-NEXT: st.param.b32 [param1], %r1;
; O3-NEXT: .param .align 4 .b8 retval0[4];
+; O3-NEXT: st.param.b32 [param1], %r1;
+; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1];
+; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O3-NEXT: ld.param.b32 %r3, [retval0];
; O3-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll
index be84f9b..a3bf892 100644
--- a/llvm/test/CodeGen/NVPTX/idioms.ll
+++ b/llvm/test/CodeGen/NVPTX/idioms.ll
@@ -173,8 +173,8 @@ define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: shr.s32 %r2, %r1, 16;
; CHECK-NEXT: shr.u32 %r3, %r2, 16;
-; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
; CHECK-NEXT: ret;
call void @escape_int(i32 %i); // Force %i to be loaded completely.
%i1 = ashr i32 %i, 16
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index eae0321..782e672 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -23,15 +23,15 @@ define internal i32 @foo() {
; CHECK-NEXT: mov.b64 %SPL, __local_depot0;
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
-; CHECK-NEXT: add.u64 %rd3, %SPL, 1;
-; CHECK-NEXT: ld.local.b8 %rs1, [%rd3];
-; CHECK-NEXT: add.u64 %rd4, %SP, 0;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 1 .b8 param0[1];
-; CHECK-NEXT: st.param.b8 [param0], %rs1;
; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd4;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: add.u64 %rd2, %SP, 0;
+; CHECK-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-NEXT: add.u64 %rd4, %SPL, 1;
+; CHECK-NEXT: ld.local.b8 %rs1, [%rd4];
+; CHECK-NEXT: st.param.b8 [param0], %rs1;
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
@@ -60,15 +60,15 @@ define internal i32 @bar() {
; CHECK-NEXT: mov.b64 %SPL, __local_depot1;
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
-; CHECK-NEXT: add.u64 %rd3, %SPL, 8;
-; CHECK-NEXT: ld.local.b64 %rd4, [%rd3];
-; CHECK-NEXT: add.u64 %rd5, %SP, 0;
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd4;
; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd5;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: add.u64 %rd2, %SP, 0;
+; CHECK-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-NEXT: add.u64 %rd4, %SPL, 8;
+; CHECK-NEXT: ld.local.b64 %rd5, [%rd4];
+; CHECK-NEXT: st.param.b64 [param0], %rd5;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 321a624..38185c7b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -121,20 +121,18 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-LABEL: grid_const_escape(
; PTX: {
-; PTX-NEXT: .reg .b32 %r<2>;
; PTX-NEXT: .reg .b64 %rd<4>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
-; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd3;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd3;
; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0;
-; PTX-NEXT: ld.param.b32 %r1, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
@@ -153,7 +151,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
+; PTX-NEXT: .reg .b32 %r<2>;
; PTX-NEXT: .reg .b64 %rd<8>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
@@ -167,18 +165,17 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: add.u64 %rd6, %SP, 0;
; PTX-NEXT: add.u64 %rd7, %SPL, 0;
; PTX-NEXT: st.local.b32 [%rd7], %r1;
-; PTX-NEXT: mov.b64 %rd1, escape3;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b64 param1;
-; PTX-NEXT: st.param.b64 [param1], %rd6;
; PTX-NEXT: .param .b64 param2;
-; PTX-NEXT: st.param.b64 [param2], %rd4;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param2], %rd4;
+; PTX-NEXT: st.param.b64 [param1], %rd6;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape3;
; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1;
-; PTX-NEXT: ld.param.b32 %r2, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
@@ -255,7 +252,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escape(
; PTX: {
-; PTX-NEXT: .reg .b32 %r<4>;
+; PTX-NEXT: .reg .b32 %r<3>;
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
@@ -266,14 +263,13 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escape_param_0];
; PTX-NEXT: add.s32 %r2, %r1, %r1;
; PTX-NEXT: st.global.b32 [%rd4], %r2;
-; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2;
-; PTX-NEXT: ld.param.b32 %r3, [retval0];
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
@@ -295,7 +291,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escapemem(
; PTX: {
-; PTX-NEXT: .reg .b32 %r<5>;
+; PTX-NEXT: .reg .b32 %r<4>;
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
@@ -307,14 +303,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: ld.param.b32 %r2, [grid_const_partial_escapemem_param_0+4];
; PTX-NEXT: st.global.b64 [%rd4], %rd5;
; PTX-NEXT: add.s32 %r3, %r1, %r2;
-; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3;
-; PTX-NEXT: ld.param.b32 %r4, [retval0];
; PTX-NEXT: } // callseq 3
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
; PTX-NEXT: ret;
@@ -535,9 +530,9 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-NEXT: .reg .b32 %r<2>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0];
; PTX-NEXT: { // callseq 4, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
+; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0];
; PTX-NEXT: st.param.b32 [param0], %r1;
; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 4
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index c165de7..7c029ab 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -31,7 +31,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
; PTX-LABEL: load_alignment(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
-; PTX-NEXT: .reg .b64 %rd<7>;
+; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %rd1, load_alignment_param_0;
@@ -45,10 +45,9 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
; PTX-NEXT: st.b32 [%rd3], %r3;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b64 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: call.uni (retval0), escape, (param0);
-; PTX-NEXT: ld.param.b64 %rd6, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
entry:
@@ -76,17 +75,16 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
;
; PTX-LABEL: load_padding(
; PTX: {
-; PTX-NEXT: .reg .b64 %rd<4>;
+; PTX-NEXT: .reg .b64 %rd<3>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd1, load_padding_param_0;
; PTX-NEXT: cvta.local.u64 %rd2, %rd1;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: .param .b64 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: call.uni (retval0), escape, (param0);
-; PTX-NEXT: ld.param.b64 %rd3, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
%tmp = call ptr @escape(ptr nonnull align 16 %arg)
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 4784d70..20a3519 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -911,9 +911,9 @@ define void @device_func(ptr byval(i32) align 4 %input) {
; PTX-NEXT: .reg .b64 %rd<2>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0];
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
+; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0];
; PTX-NEXT: st.param.b32 [param0], %r1;
; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 3
diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
index 8401f45..b2994c0 100644
--- a/llvm/test/CodeGen/NVPTX/misched_func_call.ll
+++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
@@ -8,7 +8,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-LABEL: wombat(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<11>;
-; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %bb
; CHECK-NEXT: ld.param.b32 %r4, [wombat_param_2];
@@ -19,19 +19,18 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000;
; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], 0;
; CHECK-NEXT: call.uni (retval0), quux, (param0);
-; CHECK-NEXT: ld.param.b64 %rd1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3;
; CHECK-NEXT: or.b32 %r8, %r4, %r7;
; CHECK-NEXT: mul.lo.s32 %r9, %r2, %r8;
-; CHECK-NEXT: cvt.rn.f64.s32 %rd2, %r9;
-; CHECK-NEXT: cvt.rn.f64.u32 %rd3, %r10;
-; CHECK-NEXT: add.rn.f64 %rd4, %rd3, %rd2;
-; CHECK-NEXT: mov.b64 %rd5, 0;
-; CHECK-NEXT: st.global.b64 [%rd5], %rd4;
+; CHECK-NEXT: cvt.rn.f64.s32 %rd1, %r9;
+; CHECK-NEXT: cvt.rn.f64.u32 %rd2, %r10;
+; CHECK-NEXT: add.rn.f64 %rd3, %rd2, %rd1;
+; CHECK-NEXT: mov.b64 %rd4, 0;
+; CHECK-NEXT: st.global.b64 [%rd4], %rd3;
; CHECK-NEXT: mov.b32 %r10, 1;
; CHECK-NEXT: bra.uni $L__BB0_1;
bb:
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
index 4fa1235..c5ea9f8 100644
--- a/llvm/test/CodeGen/NVPTX/param-add.ll
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -18,16 +18,16 @@ define i32 @test(%struct.1float alignstack(32) %data) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_param_0];
-; CHECK-NEXT: shr.u32 %r2, %r1, 8;
-; CHECK-NEXT: shr.u32 %r3, %r1, 16;
-; CHECK-NEXT: shr.u32 %r4, %r1, 24;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 1 .b8 param0[4];
+; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: st.param.b8 [param0], %r1;
+; CHECK-NEXT: shr.u32 %r2, %r1, 8;
; CHECK-NEXT: st.param.b8 [param0+1], %r2;
+; CHECK-NEXT: shr.u32 %r3, %r1, 16;
; CHECK-NEXT: st.param.b8 [param0+2], %r3;
+; CHECK-NEXT: shr.u32 %r4, %r3, 8;
; CHECK-NEXT: st.param.b8 [param0+3], %r4;
-; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll
index 6c52bfd..db3fbbc 100644
--- a/llvm/test/CodeGen/NVPTX/param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll
@@ -27,10 +27,10 @@
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1_param_0];
; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
; CHECK: setp.ne.b16 %p1, [[A]], 0
+; CHECK-DAG: .param .b32 param0;
+; CHECK-DAG: .param .b32 retval0;
; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[B]]
-; CHECK: .param .b32 retval0;
+; CHECK-DAG: st.param.b32 [param0], [[B]]
; CHECK: call.uni (retval0), test_i1,
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R8]];
@@ -47,11 +47,11 @@ define i1 @test_i1(i1 %a) {
; CHECK-NEXT: .param .b32 test_i1s_param_0
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
+; CHECK: .param .b32 param0;
+; CHECK: .param .b32 retval0;
; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1;
; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]];
-; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[A]];
-; CHECK: .param .b32 retval0;
; CHECK: call.uni
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1;
@@ -70,9 +70,9 @@ define signext i1 @test_i1s(i1 signext %a) {
; CHECK-DAG: ld.param.b8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v3i1_param_0]
; CHECK: .param .align 1 .b8 param0[1];
+; CHECK: .param .align 1 .b8 retval0[1];
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
-; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni (retval0), test_v3i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
@@ -89,8 +89,8 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) {
; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1]
; CHECK: ld.param.b8 [[E0:%rs[0-9]+]], [test_v4i1_param_0]
; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0], [[E0]];
; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: st.param.b8 [param0], [[E0]];
; CHECK: call.uni (retval0), test_v4i1,
; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1];
@@ -112,9 +112,9 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) {
; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v5i1_param_0]
; CHECK: .param .align 1 .b8 param0[1];
+; CHECK: .param .align 1 .b8 retval0[1];
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
-; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni (retval0), test_v5i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
@@ -131,8 +131,8 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) {
; CHECK-NEXT: .param .b32 test_i2_param_0
; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i2_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i2,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -147,8 +147,8 @@ define i2 @test_i2(i2 %a) {
; CHECK-NEXT: .param .b32 test_i3_param_0
; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i3_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i3,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -163,10 +163,10 @@ define i3 @test_i3(i3 %a) {
; CHECK-LABEL: test_i8(
; CHECK-NEXT: .param .b32 test_i8_param_0
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i8_param_0];
-; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[A32]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
+; CHECK: st.param.b32 [param0], [[A32]];
; CHECK: call.uni (retval0), test_i8,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R32]];
@@ -181,10 +181,10 @@ define i8 @test_i8(i8 %a) {
; CHECK-LABEL: test_i8s(
; CHECK-NEXT: .param .b32 test_i8s_param_0
; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
-; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[A]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
+; CHECK: st.param.b32 [param0], [[A]];
; CHECK: call.uni (retval0), test_i8s,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
@@ -202,8 +202,8 @@ define signext i8 @test_i8s(i8 signext %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v3i8_param_0];
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[R]]
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[R]]
; CHECK: call.uni (retval0), test_v3i8,
; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
@@ -220,8 +220,8 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v4i8_param_0]
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[R]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[R]];
; CHECK: call.uni (retval0), test_v4i8,
; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[RET]];
@@ -237,20 +237,13 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v5i8_param_0]
; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK-DAG: st.param.v4.b8 [param0],
-; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK-DAG: st.param.b32 [param0], [[E0]];
+; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: call.uni (retval0), test_v5i8,
-; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
-; CHECK-DAG: cvt.u32.u16 [[R3:%r[0-9]+]], [[RE3]];
-; CHECK-DAG: cvt.u32.u16 [[R2:%r[0-9]+]], [[RE2]];
-; CHECK-DAG: prmt.b32 [[P0:%r[0-9]+]], [[R2]], [[R3]], 0x3340U;
-; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RE1]];
-; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RE0]];
-; CHECK-DAG: prmt.b32 [[P1:%r[0-9]+]], [[R0]], [[R1]], 0x3340U;
-; CHECK-DAG: prmt.b32 [[P2:%r[0-9]+]], [[P1]], [[P0]], 0x5410U;
-; CHECK-DAG: st.param.b32 [func_retval0], [[P2]];
+; CHECK-DAG: st.param.b32 [func_retval0], [[RE0]];
; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
; CHECK-NEXT: ret;
define <5 x i8> @test_v5i8(<5 x i8> %a) {
@@ -262,8 +255,8 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) {
; CHECK-LABEL: test_i11(
; CHECK-NEXT: .param .b32 test_i11_param_0
; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0];
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i11,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -277,10 +270,10 @@ define i11 @test_i11(i11 %a) {
; CHECK-LABEL: test_i16(
; CHECK-NEXT: .param .b32 test_i16_param_0
; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16_param_0];
-; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
+; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: call.uni (retval0), test_i16,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[RE32]];
@@ -294,10 +287,10 @@ define i16 @test_i16(i16 %a) {
; CHECK-LABEL: test_i16s(
; CHECK-NEXT: .param .b32 test_i16s_param_0
; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16s_param_0];
-; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
+; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: call.uni (retval0), test_i16s,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
@@ -312,14 +305,15 @@ define signext i16 @test_i16s(i16 signext %a) {
; CHECK-LABEL: test_v3i16(
; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
-; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0];
+; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3i16_param_0];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK-DAG: st.param.b32 [param0], [[E0]];
+; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: call.uni (retval0), test_v3i16,
-; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0];
+; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
+; CHECK-DAG: mov.b32 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [[RE]];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]};
; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]];
; CHECK-NEXT: ret;
@@ -333,8 +327,8 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) {
; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0]
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: call.uni (retval0), test_v4i16,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
@@ -348,15 +342,15 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_v5i16(
; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
+; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5i16_param_0]
; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: call.uni (retval0), test_v5i16,
-; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
-; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]];
; CHECK-NEXT: ret;
define <5 x i16> @test_v5i16(<5 x i16> %a) {
@@ -369,8 +363,8 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) {
; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2]
; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[E]];
; CHECK: call.uni (retval0), test_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
@@ -385,8 +379,8 @@ define half @test_f16(half %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0];
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_v2f16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
@@ -401,8 +395,8 @@ define <2 x half> @test_v2f16(<2 x half> %a) {
; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2]
; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[E]];
; CHECK: call.uni (retval0), test_bf16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
@@ -417,8 +411,8 @@ define bfloat @test_bf16(bfloat %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0];
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_v2bf16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
@@ -432,15 +426,16 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
; CHECK:.func (.param .align 8 .b8 func_retval0[8])
; CHECK-LABEL: test_v3f16(
; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
-; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3f16_param_0];
+; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3f16_param_0];
; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
-; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK-DAG: st.param.b32 [param0], [[E0]];
+; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: call.uni (retval0), test_v3f16,
-; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
+; CHECK-DAG: mov.b32 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [[R]];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]};
; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
; CHECK: ret;
@@ -454,8 +449,8 @@ define <3 x half> @test_v3f16(<3 x half> %a) {
; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
; CHECK: ld.param.v2.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
; CHECK: call.uni (retval0), test_v4f16,
; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]};
@@ -468,16 +463,16 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
; CHECK:.func (.param .align 16 .b8 func_retval0[16])
; CHECK-LABEL: test_v5f16(
; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
+; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5f16_param_0];
; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v4.b16 [param0],
-; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: call.uni (retval0), test_v5f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
-; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
+; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]};
; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
; CHECK: ret;
define <5 x half> @test_v5f16(<5 x half> %a) {
@@ -490,8 +485,8 @@ define <5 x half> @test_v5f16(<5 x half> %a) {
; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
; CHECK: ld.param.v4.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
; CHECK: call.uni (retval0), test_v8f16,
; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
@@ -504,20 +499,20 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
; CHECK:.func (.param .align 32 .b8 func_retval0[32])
; CHECK-LABEL: test_v9f16(
; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0];
-; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8];
+; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v9f16_param_0];
+; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v9f16_param_0+8];
; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16];
; CHECK: .param .align 32 .b8 param0[32];
-; CHECK-DAG: st.param.v4.b16 [param0],
-; CHECK-DAG: st.param.v4.b16 [param0+8],
-; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
+; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
; CHECK: call.uni (retval0), test_v9f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
-; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
+; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.v2.b32 {[[R2:%r[0-9]+]], [[R3:%r[0-9]+]]}, [retval0+8];
; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
-; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
-; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
+; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]};
+; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[R2]], [[R3]]};
; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
; CHECK: ret;
define <9 x half> @test_v9f16(<9 x half> %a) {
@@ -531,8 +526,8 @@ define <9 x half> @test_v9f16(<9 x half> %a) {
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i19_param_0];
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i19_param_0+2];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i19,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -548,8 +543,8 @@ define i19 @test_i19(i19 %a) {
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i23_param_0];
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i23_param_0+2];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i23,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -565,8 +560,8 @@ define i23 @test_i23(i23 %a) {
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i24_param_0+2];
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i24_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i24,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -581,8 +576,8 @@ define i24 @test_i24(i24 %a) {
; CHECK-NEXT: .param .b32 test_i29_param_0
; CHECK: ld.param.b32 {{%r[0-9]+}}, [test_i29_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i29,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -597,8 +592,8 @@ define i29 @test_i29(i29 %a) {
; CHECK-NEXT: .param .b32 test_i32_param_0
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_i32_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -613,10 +608,10 @@ define i32 @test_i32(i32 %a) {
; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
; CHECK-DAG: ld.param.b32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b32 [param0+8], [[E2]];
-; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: .param .align 16 .b8 param0[16];
+; CHECK-DAG: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
; CHECK: call.uni (retval0), test_v3i32,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
@@ -632,9 +627,9 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) {
; CHECK-LABEL: test_v4i32(
; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
; CHECK: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: .param .align 16 .b8 param0[16];
+; CHECK-DAG: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK: call.uni (retval0), test_v4i32,
; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -650,9 +645,9 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) {
; CHECK-DAG: ld.param.b32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
; CHECK-DAG: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
; CHECK: .param .align 32 .b8 param0[32];
+; CHECK: .param .align 32 .b8 retval0[32];
; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
-; CHECK: .param .align 32 .b8 retval0[32];
; CHECK: call.uni (retval0), test_v5i32,
; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
@@ -669,8 +664,8 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) {
; CHECK-NEXT: .param .b32 test_f32_param_0
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_f32_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -686,8 +681,8 @@ define float @test_f32(float %a) {
; CHECK-DAG: ld.param.b8 {{%rd[0-9]+}}, [test_i40_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i40_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i40,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -703,8 +698,8 @@ define i40 @test_i40(i40 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i47_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i47_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i47,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -720,8 +715,8 @@ define i47 @test_i47(i47 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i48_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i48_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i48,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -738,8 +733,8 @@ define i48 @test_i48(i48 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i51_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i51_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i51,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -756,8 +751,8 @@ define i51 @test_i51(i51 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i56_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i56_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i56,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -772,8 +767,8 @@ define i56 @test_i56(i56 %a) {
; CHECK-NEXT: .param .b64 test_i57_param_0
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [test_i57_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i57,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -788,8 +783,8 @@ define i57 @test_i57(i57 %a) {
; CHECK-NEXT: .param .b64 test_i64_param_0
; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_i64_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], [[E]];
; CHECK: call.uni (retval0), test_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
@@ -805,9 +800,9 @@ define i64 @test_i64(i64 %a) {
; CHECK-DAG: ld.param.b64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
; CHECK: .param .align 32 .b8 param0[32];
-; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b64 [param0+16], [[E2]];
; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b64 [param0+16], [[E2]];
; CHECK: call.uni (retval0), test_v3i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
@@ -828,9 +823,9 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) {
; CHECK-DAG: ld.param.v2.b64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
; CHECK: .param .align 32 .b8 param0[32];
-; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
; CHECK: call.uni (retval0), test_v4i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
@@ -849,8 +844,8 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) {
; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: st.param.b8 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_i1,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
@@ -865,8 +860,8 @@ define %s_i1 @test_s_i1(%s_i1 %a) {
; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: st.param.b8 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_i8,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
@@ -881,8 +876,8 @@ define %s_i8 @test_s_i8(%s_i8 %a) {
; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_i16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
@@ -897,8 +892,8 @@ define %s_i16 @test_s_i16(%s_i16 %a) {
; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
@@ -913,8 +908,8 @@ define %s_f16 @test_s_f16(%s_f16 %a) {
; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_i32_param_0];
; CHECK: .param .align 4 .b8 param0[4]
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_s_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -929,8 +924,8 @@ define %s_i32 @test_s_i32(%s_i32 %a) {
; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_f32_param_0];
; CHECK: .param .align 4 .b8 param0[4]
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_s_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -945,8 +940,8 @@ define %s_f32 @test_s_f32(%s_f32 %a) {
; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_s_i64_param_0];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: st.param.b64 [param0], [[E]];
; CHECK: call.uni (retval0), test_s_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
@@ -966,12 +961,12 @@ define %s_i64 @test_s_i64(%s_i64 %a) {
; CHECK-DAG: ld.param.b32 [[E1:%r[0-9]+]], [test_s_i32f32_param_0+4];
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
; CHECK: .param .align 8 .b8 param0[24];
+; CHECK: .param .align 8 .b8 retval0[24];
; CHECK-DAG: st.param.b32 [param0], [[E0]];
; CHECK-DAG: st.param.b32 [param0+4], [[E1]];
; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
-; CHECK: .param .align 8 .b8 retval0[24];
; CHECK: call.uni (retval0), test_s_i32f32,
; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4];
@@ -997,10 +992,10 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
; CHECK: .param .align 8 .b8 param0[24];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
-; CHECK: st.param.b64 [param0+16], [[E4]];
; CHECK: .param .align 8 .b8 retval0[24];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
+; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
; CHECK: call.uni (retval0), test_s_i32x4,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
@@ -1024,16 +1019,13 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
; CHECK: ld.param.b8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
; CHECK: .param .align 8 .b8 param0[32];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b8 [param0+8], [[E2]];
-; CHECK: st.param.b32 [param0+12], [[E3]];
-; CHECK: st.param.b32 [param0+16], [[E4]];
-; CHECK: st.param.b64 [param0+24], [[E5]];
; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK: call.uni (retval0), test_s_i1i32x4,
-; CHECK: (
-; CHECK: param0
-; CHECK: );
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b8 [param0+8], [[E2]];
+; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
+; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
+; CHECK-DAG: st.param.b64 [param0+24], [[E5]];
+; CHECK: call.uni (retval0), test_s_i1i32x4, (param0);
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8];
; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12];
@@ -1082,6 +1074,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0+1];
; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0];
; CHECK: .param .align 1 .b8 param0[25];
+; CHECK: .param .align 1 .b8 retval0[25];
; CHECK-DAG: st.param.b8 [param0],
; CHECK-DAG: st.param.b8 [param0+1],
; CHECK-DAG: st.param.b8 [param0+2],
@@ -1107,33 +1100,32 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
; CHECK-DAG: st.param.b8 [param0+22],
; CHECK-DAG: st.param.b8 [param0+23],
; CHECK-DAG: st.param.b8 [param0+24],
-; CHECK: .param .align 1 .b8 retval0[25];
-; CHECK: call.uni (retval0), test_s_i1i32x4p,
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+3];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+4];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+5];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+6];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+7];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+9];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+10];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+11];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+12];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+13];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+14];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+15];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+16];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+17];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+18];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+19];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+20];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+21];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+22];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+23];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+24];
+; CHECK: call.uni (retval0), test_s_i1i32x4p, (param0);
+; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+3];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+2];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+1];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+7];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+6];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+5];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+4];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+12];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+11];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+10];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+9];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+16];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+15];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+14];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+13];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+24];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+23];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+22];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+21];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+20];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+19];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+18];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+17];
; CHECK: } // callseq
; CHECK-DAG: st.param.b8 [func_retval0],
; CHECK-DAG: st.param.b8 [func_retval0+1],
@@ -1177,13 +1169,13 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
; CHECK: ld.param.b32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
; CHECK: .param .align 16 .b8 param0[80];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b32 [param0+8], [[E2]];
-; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
-; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
-; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
-; CHECK: st.param.b32 [param0+64], [[E15]];
; CHECK: .param .align 16 .b8 retval0[80];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
+; CHECK-DAG: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
+; CHECK-DAG: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
+; CHECK-DAG: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
+; CHECK-DAG: st.param.b32 [param0+64], [[E15]];
; CHECK: call.uni (retval0), test_s_crossfield,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll
index 88ad0b0..2155fb4 100644
--- a/llvm/test/CodeGen/NVPTX/param-overalign.ll
+++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll
@@ -28,8 +28,8 @@ define float @caller_md(float %a, float %b) {
; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: call.uni (retval0), callee_md, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -69,8 +69,8 @@ define float @caller(float %a, float %b) {
; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
index a480984a..a592b82 100644
--- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
+++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
@@ -84,8 +84,8 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x1_param_1
; CHECK: )
; CHECK: .param .b32 param0;
- ; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[4];
+ ; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), callee_St4x1, (param0);
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
%1 = load i32, ptr %in, align 4
@@ -112,8 +112,8 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x2_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[8];
- ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[8];
+ ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: call.uni (retval0), callee_St4x2, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%agg.tmp = alloca %struct.St4x2, align 8
@@ -149,9 +149,9 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x3_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[12];
+ ; CHECK: .param .align 16 .b8 retval0[12];
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[12];
; CHECK: call.uni (retval0), callee_St4x3, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8];
@@ -193,8 +193,8 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x4_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
+ ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: call.uni (retval0), callee_St4x4, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2
@@ -239,9 +239,9 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x5_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[20];
+ ; CHECK: .param .align 16 .b8 retval0[20];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[20];
; CHECK: call.uni (retval0), callee_St4x5, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16];
@@ -295,9 +295,9 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x6_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[24];
+ ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: call.uni (retval0), callee_St4x6, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -357,10 +357,10 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x7_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[28];
+ ; CHECK: .param .align 16 .b8 retval0[28];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[28];
; CHECK: call.uni (retval0), callee_St4x7, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -429,9 +429,9 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x8_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[32];
- ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
+ ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: call.uni (retval0), callee_St4x8, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -503,8 +503,8 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x1_param_1
; CHECK: )
; CHECK: .param .b64 param0;
- ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[8];
+ ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), callee_St8x1, (param0);
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
%1 = load i64, ptr %in, align 8
@@ -531,8 +531,8 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x2_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
+ ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: call.uni (retval0), callee_St8x2, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
%call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2
@@ -565,9 +565,9 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x3_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[24];
+ ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: call.uni (retval0), callee_St8x3, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16];
@@ -609,9 +609,9 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x4_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[32];
- ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
- ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
+ ; CHECK-DAG: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
+ ; CHECK-DAG: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: call.uni (retval0), callee_St8x4, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16];
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
index 5d0d6f6..4a53152 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
@@ -77,7 +77,7 @@ constants: []
machineFunctionInfo: {}
body: |
bb.0:
- %0:b32, %1:b32, %2:b32, %3:b32 = LoadParamMemV4I32 0
+ %0:b32, %1:b32, %2:b32, %3:b32 = LDV_i32_v4 0, 0, 101, 3, 32, &retval0, 0 :: (load (s128), addrspace 101)
; CHECK-NOT: ProxyReg
%4:b32 = ProxyRegB32 killed %0
%5:b32 = ProxyRegB32 killed %1
@@ -86,7 +86,7 @@ body: |
; CHECK: STV_i32_v4 killed %0, killed %1, killed %2, killed %3
STV_i32_v4 killed %4, killed %5, killed %6, killed %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s128), addrspace 101)
- %8:b32 = LoadParamMemI32 0
+ %8:b32 = LD_i32 0, 0, 101, 3, 32, &retval0, 0 :: (load (s32), addrspace 101)
; CHECK-NOT: ProxyReg
%9:b32 = ProxyRegB32 killed %8
%10:b32 = ProxyRegB32 killed %9
diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
index 6aa1119..f90435a 100644
--- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -26,8 +26,8 @@ define void @st_param_i8_i16() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 2 .b8 param0[4];
-; CHECK-NEXT: st.param.b8 [param0], 1;
; CHECK-NEXT: st.param.b16 [param0+2], 2;
+; CHECK-NEXT: st.param.b8 [param0], 1;
; CHECK-NEXT: call.uni call_i8_i16, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
@@ -75,7 +75,7 @@ define void @st_param_f32() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .b32 param0;
-; CHECK-NEXT: st.param.b32 [param0], 0f40A00000;
+; CHECK-NEXT: st.param.b32 [param0], 1084227584;
; CHECK-NEXT: call.uni call_f32, (param0);
; CHECK-NEXT: } // callseq 3
; CHECK-NEXT: ret;
@@ -91,7 +91,7 @@ define void @st_param_f64() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 4, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], 0d4018000000000000;
+; CHECK-NEXT: st.param.b64 [param0], 4618441417868443648;
; CHECK-NEXT: call.uni call_f64, (param0);
; CHECK-NEXT: } // callseq 4
; CHECK-NEXT: ret;
@@ -165,7 +165,7 @@ define void @st_param_v2_i16_ii() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 8, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2};
+; CHECK-NEXT: st.param.b32 [param0], 131073;
; CHECK-NEXT: call.uni call_v2_i16, (param0);
; CHECK-NEXT: } // callseq 8
; CHECK-NEXT: ret;
@@ -432,7 +432,7 @@ define void @st_param_v4_i8_iiii() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 23, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4};
+; CHECK-NEXT: st.param.b32 [param0], 67305985;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 23
; CHECK-NEXT: ret;
@@ -442,15 +442,18 @@ define void @st_param_v4_i8_iiii() {
define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_irrr(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irrr_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irrr_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_irrr_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irrr_param_2];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_irrr_param_1];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_irrr_param_0];
+; CHECK-NEXT: prmt.b32 %r5, 1, %r4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
; CHECK-NEXT: { // callseq 24, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs3, %rs2, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 24
; CHECK-NEXT: ret;
@@ -464,15 +467,18 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_rirr(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rirr_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rirr_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rirr_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rirr_param_2];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rirr_param_1];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rirr_param_0];
+; CHECK-NEXT: prmt.b32 %r5, %r4, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
; CHECK-NEXT: { // callseq 25, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, 2, %rs2, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 25
; CHECK-NEXT: ret;
@@ -486,15 +492,18 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_rrir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrir_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrir_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrir_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrir_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrir_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrir_param_2];
+; CHECK-NEXT: prmt.b32 %r5, 3, %r4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U;
; CHECK-NEXT: { // callseq 26, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, 3, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 26
; CHECK-NEXT: ret;
@@ -508,15 +517,18 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
; CHECK-LABEL: st_param_v4_i8_rrri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrri_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrri_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrri_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrri_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrri_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrri_param_2];
+; CHECK-NEXT: prmt.b32 %r5, %r4, 4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U;
; CHECK-NEXT: { // callseq 27, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, %rs1, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 27
; CHECK-NEXT: ret;
@@ -530,14 +542,16 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_iirr(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iirr_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_iirr_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iirr_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_iirr_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r4, 513, %r3, 0x5410U;
; CHECK-NEXT: { // callseq 28, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs2, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 28
; CHECK-NEXT: ret;
@@ -551,14 +565,17 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_irir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irir_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irir_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irir_param_1];
+; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irir_param_0];
+; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 29, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, 3, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 29
; CHECK-NEXT: ret;
@@ -572,14 +589,17 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
; CHECK-LABEL: st_param_v4_i8_irri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irri_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irri_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irri_param_1];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irri_param_0];
+; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 30, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, %rs1, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 30
; CHECK-NEXT: ret;
@@ -593,14 +613,17 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_riir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riir_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riir_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riir_param_1];
+; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riir_param_0];
+; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 31, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, 3, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 31
; CHECK-NEXT: ret;
@@ -614,14 +637,17 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
; CHECK-LABEL: st_param_v4_i8_riri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riri_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riri_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riri_param_1];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riri_param_0];
+; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 32, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, %rs1, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 32
; CHECK-NEXT: ret;
@@ -635,14 +661,16 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
; CHECK-LABEL: st_param_v4_i8_rrii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrii_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrii_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrii_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrii_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r4, %r3, 1027, 0x5410U;
; CHECK-NEXT: { // callseq 33, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, %rs1, 3, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 33
; CHECK-NEXT: ret;
@@ -656,13 +684,15 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
define void @st_param_v4_i8_iiir(i8 %d) {
; CHECK-LABEL: st_param_v4_i8_iiir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiir_param_0];
; CHECK-NEXT: { // callseq 34, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiir_param_0];
+; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 34
; CHECK-NEXT: ret;
@@ -676,13 +706,15 @@ define void @st_param_v4_i8_iiir(i8 %d) {
define void @st_param_v4_i8_iiri(i8 %c) {
; CHECK-LABEL: st_param_v4_i8_iiri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiri_param_0];
; CHECK-NEXT: { // callseq 35, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiri_param_0];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 35
; CHECK-NEXT: ret;
@@ -696,13 +728,15 @@ define void @st_param_v4_i8_iiri(i8 %c) {
define void @st_param_v4_i8_irii(i8 %b) {
; CHECK-LABEL: st_param_v4_i8_irii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irii_param_0];
; CHECK-NEXT: { // callseq 36, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irii_param_0];
+; CHECK-NEXT: prmt.b32 %r2, 1, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 36
; CHECK-NEXT: ret;
@@ -716,13 +750,15 @@ define void @st_param_v4_i8_irii(i8 %b) {
define void @st_param_v4_i8_riii(i8 %a) {
; CHECK-LABEL: st_param_v4_i8_riii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riii_param_0];
; CHECK-NEXT: { // callseq 37, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riii_param_0];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 37
; CHECK-NEXT: ret;
@@ -742,7 +778,7 @@ define void @st_param_v4_i16_iiii() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 38, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 38
; CHECK-NEXT: ret;
@@ -841,13 +877,15 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) {
; CHECK-LABEL: st_param_v4_i16_iirr(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iirr_param_0];
; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_iirr_param_1];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 43, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 43
; CHECK-NEXT: ret;
@@ -946,13 +984,15 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
; CHECK-LABEL: st_param_v4_i16_rrii(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_rrii_param_0];
; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_rrii_param_1];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 48, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 48
; CHECK-NEXT: ret;
@@ -966,13 +1006,16 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
define void @st_param_v4_i16_iiir(i16 %d) {
; CHECK-LABEL: st_param_v4_i16_iiir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiir_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 3;
+; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
; CHECK-NEXT: { // callseq 49, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 49
; CHECK-NEXT: ret;
@@ -986,13 +1029,16 @@ define void @st_param_v4_i16_iiir(i16 %d) {
define void @st_param_v4_i16_iiri(i16 %c) {
; CHECK-LABEL: st_param_v4_i16_iiri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiri_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 4;
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 50, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 50
; CHECK-NEXT: ret;
@@ -1006,13 +1052,16 @@ define void @st_param_v4_i16_iiri(i16 %c) {
define void @st_param_v4_i16_irii(i16 %b) {
; CHECK-LABEL: st_param_v4_i16_irii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_irii_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 1;
+; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
; CHECK-NEXT: { // callseq 51, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 51
; CHECK-NEXT: ret;
@@ -1026,13 +1075,16 @@ define void @st_param_v4_i16_irii(i16 %b) {
define void @st_param_v4_i16_riii(i16 %a) {
; CHECK-LABEL: st_param_v4_i16_riii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_riii_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 2;
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 52, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 52
; CHECK-NEXT: ret;
@@ -1672,13 +1724,12 @@ declare void @call_v4_f32(%struct.float4 alignstack(16))
define void @st_param_bfloat() {
; CHECK-LABEL: st_param_bfloat(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0x4100;
; CHECK-NEXT: { // callseq 83, 0
; CHECK-NEXT: .param .align 2 .b8 param0[2];
-; CHECK-NEXT: st.param.b16 [param0], %rs1;
+; CHECK-NEXT: st.param.b16 [param0], 0x4100;
; CHECK-NEXT: call.uni call_bfloat, (param0);
; CHECK-NEXT: } // callseq 83
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll
index 5b31b5e..c8ca6b6 100644
--- a/llvm/test/CodeGen/NVPTX/store-undef.ll
+++ b/llvm/test/CodeGen/NVPTX/store-undef.ll
@@ -34,9 +34,9 @@ define void @test_store_param_def(i64 %param0, i32 %param1) {
; CHECK-NEXT: ld.param.b32 %r1, [test_store_param_def_param_1];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 16 .b8 param0[32];
+; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r2, %r1, %r3, %r4};
+; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r5, %r1};
; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1};
-; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
; CHECK-NEXT: call.uni test_call, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/tanhf.ll b/llvm/test/CodeGen/NVPTX/tanhf.ll
new file mode 100644
index 0000000..6f4eb22
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/tanhf.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mcpu=sm_75 -mattr=+ptx70 | FileCheck %s
+; RUN: %if ptxas-11.0 %{ llc < %s -mcpu=sm_75 -mattr=+ptx70 | %ptxas-verify -arch=sm_75 %}
+
+target triple = "nvptx64-nvidia-cuda"
+
+define float @test1(float %in) local_unnamed_addr {
+; CHECK-LABEL: test1(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [test1_param_0];
+; CHECK-NEXT: tanh.approx.f32 %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %call = call afn float @llvm.tanh.f32(float %in)
+ ret float %call
+}
+
+define half @test2(half %in) local_unnamed_addr {
+; CHECK-LABEL: test2(
+; CHECK: {
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [test2_param_0];
+; CHECK-NEXT: cvt.f32.f16 %r1, %rs1;
+; CHECK-NEXT: tanh.approx.f32 %r2, %r1;
+; CHECK-NEXT: cvt.rn.f16.f32 %rs2, %r2;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs2;
+; CHECK-NEXT: ret;
+ %call = call afn half @llvm.tanh.f16(half %in)
+ ret half %call
+}
+
+declare float @llvm.tanh.f32(float)
+declare half @llvm.tanh.f16(half)
+
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index d6961a9..3138d7c 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -69,8 +69,8 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) {
; CHECK-NEXT: tex.1d.v4.f32.s32 {%r2, %r3, %r4, %r5}, [tex0, {%r1}];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: call.uni (retval0), texfunc, (param0);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
index 87e46b1..697eb90 100644
--- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Verifies correctness of load/store of parameters and return values.
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | %ptxas-verify %}
%s_i8i16p = type { <{ i16, i8, i16 }>, i64 }
%s_i8i32p = type { <{ i32, i8, i32 }>, i64 }
@@ -24,37 +24,35 @@
define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
; CHECK-LABEL: test_s_i8i16p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<15>;
+; CHECK-NEXT: .reg .b16 %rs<13>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4];
-; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
-; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3];
-; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
+; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i16p_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8];
-; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2];
-; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0];
+; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i16p_param_0+4];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-NEXT: st.param.b16 [param0], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
-; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
-; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
+; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
-; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+2];
+; CHECK-NEXT: ld.param.b16 %rs3, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rs5, [retval0+3];
; CHECK-NEXT: } // callseq 0
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
+; CHECK-NEXT: shl.b16 %rs8, %rs4, 8;
+; CHECK-NEXT: or.b16 %rs9, %rs8, %rs5;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs5;
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs2;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-NEXT: shr.u16 %rs12, %rs9, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs12;
; CHECK-NEXT: ret;
%r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
ret %s_i8i16p %r
@@ -64,56 +62,51 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
; CHECK-LABEL: test_s_i8i32p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<12>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6];
-; CHECK-NEXT: shl.b32 %r4, %r3, 8;
-; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5];
-; CHECK-NEXT: or.b32 %r6, %r4, %r5;
-; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7];
-; CHECK-NEXT: shl.b32 %r8, %r7, 16;
-; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8];
-; CHECK-NEXT: shl.b32 %r10, %r9, 24;
-; CHECK-NEXT: or.b32 %r11, %r10, %r8;
-; CHECK-NEXT: or.b32 %r2, %r11, %r6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4];
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0];
-; CHECK-NEXT: shr.u32 %r12, %r2, 8;
-; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i32p_param_0+4];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8i32p_param_0+6];
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+7];
+; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8i32p_param_0+8];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+5], %r2;
-; CHECK-NEXT: st.param.b8 [param0+6], %r12;
-; CHECK-NEXT: st.param.b8 [param0+7], %r13;
-; CHECK-NEXT: st.param.b8 [param0+8], %r9;
-; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: st.param.b8 [param0+8], %r4;
+; CHECK-NEXT: st.param.b8 [param0+7], %r3;
+; CHECK-NEXT: st.param.b8 [param0+6], %r2;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
-; CHECK-NEXT: ld.param.b32 %r14, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r5, [retval0];
+; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 1
-; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
-; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
-; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
-; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
+; CHECK-NEXT: shl.b32 %r12, %r8, 8;
+; CHECK-NEXT: or.b32 %r13, %r12, %r9;
+; CHECK-NEXT: shl.b32 %r15, %r7, 16;
+; CHECK-NEXT: shl.b32 %r17, %r6, 24;
+; CHECK-NEXT: or.b32 %r18, %r17, %r15;
+; CHECK-NEXT: or.b32 %r19, %r18, %r13;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: shr.u32 %r21, %r19, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
+; CHECK-NEXT: shr.u32 %r22, %r19, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
+; CHECK-NEXT: shr.u32 %r23, %r19, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
ret %s_i8i32p %r
@@ -123,112 +116,66 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
; CHECK-LABEL: test_s_i8i64p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<20>;
-; CHECK-NEXT: .reg .b64 %rd<68>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b64 %rd<46>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10];
-; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
-; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9];
-; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
-; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11];
-; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
-; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12];
-; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
-; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
-; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
-; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14];
-; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
-; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13];
-; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
-; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15];
-; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
-; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16];
-; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
-; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
-; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
-; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
-; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
-; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0];
-; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
-; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
-; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
-; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
-; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
-; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
+; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8i64p_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
+; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+16];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 8 .b8 param0[32];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
-; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
-; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
-; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
-; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
-; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
-; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
-; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
-; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
+; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
+; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
-; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
-; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
-; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
+; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
+; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
+; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
+; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
+; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
+; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
; CHECK-NEXT: } // callseq 2
-; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
-; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
-; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
-; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
-; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
-; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
-; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
-; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
-; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
-; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
-; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
-; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
-; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
-; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
-; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
-; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
-; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
-; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
-; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
-; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
-; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
-; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
-; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
-; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
-; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
-; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
-; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
-; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
-; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
+; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
+; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
+; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
+; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
+; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
+; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
+; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
+; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
+; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
+; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
+; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
+; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
+; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
+; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
+; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
+; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
-; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
-; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
-; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
-; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
-; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
-; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
-; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
-; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
-; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
-; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
-; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
-; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
+; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
+; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
; CHECK-NEXT: ret;
%r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
ret %s_i8i64p %r
@@ -242,33 +189,32 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4];
-; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
-; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3];
-; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
-; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2];
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [test_s_i8f16p_param_0+2];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
+; CHECK-NEXT: ld.param.b8 %rs3, [test_s_i8f16p_param_0+4];
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-NEXT: st.param.b16 [param0], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
-; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
-; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
+; CHECK-NEXT: st.param.b8 [param0+4], %rs3;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+2], %rs2;
+; CHECK-NEXT: st.param.b16 [param0], %rs1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
-; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+2];
+; CHECK-NEXT: ld.param.b16 %rs5, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs6, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rs7, [retval0+3];
; CHECK-NEXT: } // callseq 3
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
+; CHECK-NEXT: shl.b16 %rs10, %rs6, 8;
+; CHECK-NEXT: or.b16 %rs11, %rs10, %rs7;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs7;
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs4;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs5;
+; CHECK-NEXT: shr.u16 %rs14, %rs11, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs14;
; CHECK-NEXT: ret;
%r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
ret %s_i8f16p %r
@@ -278,56 +224,51 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
; CHECK-LABEL: test_s_i8f16x2p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<12>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6];
-; CHECK-NEXT: shl.b32 %r4, %r3, 8;
-; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5];
-; CHECK-NEXT: or.b32 %r6, %r4, %r5;
-; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7];
-; CHECK-NEXT: shl.b32 %r8, %r7, 16;
-; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8];
-; CHECK-NEXT: shl.b32 %r10, %r9, 24;
-; CHECK-NEXT: or.b32 %r11, %r10, %r8;
-; CHECK-NEXT: or.b32 %r2, %r11, %r6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4];
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0];
-; CHECK-NEXT: shr.u32 %r12, %r2, 8;
-; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16x2p_param_0+4];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f16x2p_param_0+6];
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+7];
+; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f16x2p_param_0+8];
; CHECK-NEXT: { // callseq 4, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+5], %r2;
-; CHECK-NEXT: st.param.b8 [param0+6], %r12;
-; CHECK-NEXT: st.param.b8 [param0+7], %r13;
-; CHECK-NEXT: st.param.b8 [param0+8], %r9;
-; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: st.param.b8 [param0+8], %r4;
+; CHECK-NEXT: st.param.b8 [param0+7], %r3;
+; CHECK-NEXT: st.param.b8 [param0+6], %r2;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
-; CHECK-NEXT: ld.param.b32 %r14, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r5, [retval0];
+; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 4
-; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
-; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
-; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
-; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
+; CHECK-NEXT: shl.b32 %r12, %r8, 8;
+; CHECK-NEXT: or.b32 %r13, %r12, %r9;
+; CHECK-NEXT: shl.b32 %r15, %r7, 16;
+; CHECK-NEXT: shl.b32 %r17, %r6, 24;
+; CHECK-NEXT: or.b32 %r18, %r17, %r15;
+; CHECK-NEXT: or.b32 %r19, %r18, %r13;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: shr.u32 %r21, %r19, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
+; CHECK-NEXT: shr.u32 %r22, %r19, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
+; CHECK-NEXT: shr.u32 %r23, %r19, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
ret %s_i8f16x2p %r
@@ -337,56 +278,51 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
; CHECK-LABEL: test_s_i8f32p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<12>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6];
-; CHECK-NEXT: shl.b32 %r4, %r3, 8;
-; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5];
-; CHECK-NEXT: or.b32 %r6, %r4, %r5;
-; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7];
-; CHECK-NEXT: shl.b32 %r8, %r7, 16;
-; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8];
-; CHECK-NEXT: shl.b32 %r10, %r9, 24;
-; CHECK-NEXT: or.b32 %r11, %r10, %r8;
-; CHECK-NEXT: or.b32 %r2, %r11, %r6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4];
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0];
-; CHECK-NEXT: shr.u32 %r12, %r2, 8;
-; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f32p_param_0+4];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f32p_param_0+6];
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+7];
+; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f32p_param_0+8];
; CHECK-NEXT: { // callseq 5, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+5], %r2;
-; CHECK-NEXT: st.param.b8 [param0+6], %r12;
-; CHECK-NEXT: st.param.b8 [param0+7], %r13;
-; CHECK-NEXT: st.param.b8 [param0+8], %r9;
-; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: st.param.b8 [param0+8], %r4;
+; CHECK-NEXT: st.param.b8 [param0+7], %r3;
+; CHECK-NEXT: st.param.b8 [param0+6], %r2;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
-; CHECK-NEXT: ld.param.b32 %r14, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r5, [retval0];
+; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 5
-; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
-; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
-; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
-; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
+; CHECK-NEXT: shl.b32 %r12, %r8, 8;
+; CHECK-NEXT: or.b32 %r13, %r12, %r9;
+; CHECK-NEXT: shl.b32 %r15, %r7, 16;
+; CHECK-NEXT: shl.b32 %r17, %r6, 24;
+; CHECK-NEXT: or.b32 %r18, %r17, %r15;
+; CHECK-NEXT: or.b32 %r19, %r18, %r13;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: shr.u32 %r21, %r19, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
+; CHECK-NEXT: shr.u32 %r22, %r19, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
+; CHECK-NEXT: shr.u32 %r23, %r19, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
ret %s_i8f32p %r
@@ -396,112 +332,66 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
; CHECK-LABEL: test_s_i8f64p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<20>;
-; CHECK-NEXT: .reg .b64 %rd<68>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b64 %rd<46>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10];
-; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
-; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9];
-; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
-; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11];
-; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
-; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12];
-; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
-; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
-; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
-; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14];
-; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
-; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13];
-; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
-; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15];
-; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
-; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16];
-; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
-; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
-; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
-; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
-; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
-; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0];
-; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
-; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
-; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
-; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
-; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
-; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
+; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8f64p_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
+; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+16];
; CHECK-NEXT: { // callseq 6, 0
; CHECK-NEXT: .param .align 8 .b8 param0[32];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
-; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
-; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
-; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
-; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
-; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
-; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
-; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
-; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
+; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
+; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
-; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
-; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
-; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
+; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
+; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
+; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
+; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
+; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
+; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
; CHECK-NEXT: } // callseq 6
-; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
-; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
-; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
-; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
-; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
-; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
-; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
-; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
-; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
-; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
-; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
-; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
-; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
-; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
-; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
-; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
-; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
-; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
-; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
-; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
-; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
-; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
-; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
-; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
-; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
-; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
-; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
-; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
-; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
+; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
+; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
+; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
+; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
+; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
+; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
+; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
+; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
+; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
+; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
+; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
+; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
+; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
+; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
+; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
+; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
-; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
-; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
-; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
-; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
-; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
-; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
-; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
-; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
-; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
-; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
-; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
-; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
+; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
+; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
; CHECK-NEXT: ret;
%r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
ret %s_i8f64p %r
diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll
index 3ca729f..9e312a2 100644
--- a/llvm/test/CodeGen/NVPTX/vaargs.ll
+++ b/llvm/test/CodeGen/NVPTX/vaargs.ll
@@ -89,14 +89,14 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) {
; CHECK-NEXT: ld.param.b32 [[ARG_I32:%r[0-9]+]], [test_foo_param_0];
; Store arguments to an array
-; CHECK32: .param .align 8 .b8 param1[28];
-; CHECK64: .param .align 8 .b8 param1[32];
-; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]];
-; CHECK-NEXT: st.param.b64 [param1+8], [[ARG_I64]];
-; CHECK-NEXT: st.param.b64 [param1+16], [[ARG_DOUBLE]];
-; CHECK-NEXT: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]];
-; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
+; CHECK32: .param .align 8 .b8 param1[28];
+; CHECK64: .param .align 8 .b8 param1[32];
+; CHECK-DAG: .param .b32 retval0;
+; CHECK-DAG: st.param.b32 [param1], [[ARG_I32]];
+; CHECK-DAG: st.param.b64 [param1+8], [[ARG_I64]];
+; CHECK-DAG: st.param.b64 [param1+16], [[ARG_DOUBLE]];
+; CHECK-DAG: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]];
+; CHECK-DAG: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
entry:
%ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index ad2e704..a9b3675 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -115,13 +115,13 @@ define dso_local i32 @foo() {
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
; CHECK-PTX-NEXT: st.b64 [%SP+24], 4607182418800017408;
; CHECK-PTX-NEXT: st.b64 [%SP+32], 4607182418800017408;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 0, 0
; CHECK-PTX-NEXT: .param .b32 param0;
-; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 0
@@ -218,13 +218,13 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: st.b32 [%SP+8], 1;
; CHECK-PTX-NEXT: st.b8 [%SP+12], 1;
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
-; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
-; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
+; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 1
@@ -289,13 +289,13 @@ define dso_local i32 @baz() {
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: st.v4.b32 [%SP], {1, 1, 1, 1};
-; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 2, 0
; CHECK-PTX-NEXT: .param .b32 param0;
-; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 2
@@ -348,7 +348,6 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
-; CHECK-PTX-NEXT: .reg .b32 %r<2>;
; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
@@ -360,18 +359,17 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: ld.global.nc.b64 %rd4, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.local.b64 [%rd2], %rd4;
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
-; CHECK-PTX-NEXT: ld.local.b64 %rd5, [%rd2];
-; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8];
-; CHECK-PTX-NEXT: add.u64 %rd7, %SP, 16;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-PTX-NEXT: st.param.b64 [param0], %rd5;
-; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd7;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
+; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8];
+; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6;
+; CHECK-PTX-NEXT: ld.local.b64 %rd7, [%rd2];
+; CHECK-PTX-NEXT: st.param.b64 [param0], %rd7;
; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1);
-; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 3
; CHECK-PTX-NEXT: ret;
entry: