diff options
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
43 files changed, 2757 insertions, 1678 deletions
diff --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir index d3d3b19..a23f0af 100644 --- a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir +++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessNoProfileData.mir @@ -123,8 +123,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] constants: [] diff --git a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir index e8ad54b..7d80c02 100644 --- a/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir +++ b/llvm/test/CodeGen/PowerPC/DisableHoistingDueToBlockHotnessProfileData.mir @@ -168,8 +168,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] constants: [] diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir index 41e2124..60a399d 100644 --- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir +++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir @@ -1,6 +1,12 @@ # RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ # RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ # RUN: -o - | FileCheck %s +# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ +# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ +# RUN: -o - | FileCheck %s +# RUN: llc -mcpu=pwr10 -mtriple=powerpc64le-unknown-linux-gnu -start-after \ +# RUN: virtregrewriter -ppc-asm-full-reg-names -verify-machineinstrs %s \ +# RUN: -o - | FileCheck %s --- | ; ModuleID = 'a.ll' @@ -30,7 +36,7 @@ ; Function Attrs: nounwind declare void @llvm.stackprotector(ptr, ptr) #1 - attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+htm,+power8-vector,+vsx,-power9-vector" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind } !llvm.ident = !{!0} @@ -71,8 +77,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: - { id: 0, name: '', type: spill-slot, offset: 0, size: 4, alignment: 4, diff --git a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll index 9ffb4fd..258ddf6 100644 --- a/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll +++ b/llvm/test/CodeGen/PowerPC/aix-cc-abi-mir.ll @@ -37,9 +37,9 @@ define signext i8 @test_chars(i8 signext %c1, i8 signext %c2, i8 signext %c3, i8 ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 ; 32BIT-NEXT: {{ $}} - ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r4 - ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r5 - ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 ; 32BIT-NEXT: renamable $r3 = EXTSB killed renamable $r3 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 ; @@ -47,9 +47,9 @@ define signext i8 @test_chars(i8 signext %c1, i8 signext %c2, i8 signext %c3, i8 ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 ; 64BIT-NEXT: {{ $}} - ; 64BIT-NEXT: renamable $r3 = ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 - ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 - ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r6, implicit killed $x6, implicit-def $x3 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6, implicit-def $x3 ; 64BIT-NEXT: renamable $x3 = EXTSB8 killed renamable $x3 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: @@ -96,9 +96,9 @@ define signext i8 @test_chars_mix(i8 signext %c1, i8 zeroext %c2, i8 zeroext %c3 ; 32BIT: bb.0.entry: ; 32BIT-NEXT: liveins: $r3, $r4, $r5, $r6 ; 32BIT-NEXT: {{ $}} - ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r4 - ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r5 - ; 32BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, killed renamable $r6 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r4 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r5 + ; 32BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, killed renamable $r6 ; 32BIT-NEXT: renamable $r3 = EXTSB killed renamable $r3 ; 32BIT-NEXT: BLR implicit $lr, implicit $rm, implicit $r3 ; @@ -106,9 +106,9 @@ define signext i8 @test_chars_mix(i8 signext %c1, i8 zeroext %c2, i8 zeroext %c3 ; 64BIT: bb.0.entry: ; 64BIT-NEXT: liveins: $x3, $x4, $x5, $x6 ; 64BIT-NEXT: {{ $}} - ; 64BIT-NEXT: renamable $r3 = ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 - ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 - ; 64BIT-NEXT: renamable $r3 = ADD4 killed renamable $r3, renamable $r6, implicit killed $x6, implicit-def $x3 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 renamable $r3, renamable $r4, implicit killed $x4, implicit killed $x3 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r5, implicit killed $x5 + ; 64BIT-NEXT: renamable $r3 = nsw ADD4 killed renamable $r3, renamable $r6, implicit killed $x6, implicit-def $x3 ; 64BIT-NEXT: renamable $x3 = EXTSB8 killed renamable $x3 ; 64BIT-NEXT: BLR8 implicit $lr8, implicit $rm, implicit $x3 entry: diff --git a/llvm/test/CodeGen/PowerPC/aix-nest-param.ll b/llvm/test/CodeGen/PowerPC/aix-nest-param.ll index 1863eaf..bfc7fbb 100644 --- a/llvm/test/CodeGen/PowerPC/aix-nest-param.ll +++ b/llvm/test/CodeGen/PowerPC/aix-nest-param.ll @@ -1,5 +1,5 @@ -; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s define ptr @nest_receiver(ptr nest %arg) nounwind { ret ptr %arg @@ -9,5 +9,10 @@ define ptr @nest_caller(ptr %arg) nounwind { %result = call ptr @nest_receiver(ptr nest %arg) ret ptr %result } +; CHECK-LABEL: .nest_receiver: +; CHECK: mr 3, 11 +; CHECK: blr -; CHECK: LLVM ERROR: Nest arguments are unimplemented. +; CHECK-LABEL: .nest_caller: +; CHECK: mr 11, 3 +; CHECK: bl .nest_receiver diff --git a/llvm/test/CodeGen/PowerPC/aix-trampoline.ll b/llvm/test/CodeGen/PowerPC/aix-trampoline.ll index b71f6b5..19df220 100644 --- a/llvm/test/CodeGen/PowerPC/aix-trampoline.ll +++ b/llvm/test/CodeGen/PowerPC/aix-trampoline.ll @@ -1,7 +1,7 @@ -; RUN: not --crash llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | FileCheck %s -; RUN: not --crash llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 | FileCheck %s - -; CHECK: LLVM ERROR: INIT_TRAMPOLINE operation is not supported on AIX. +; RUN: llc -mtriple powerpc-ibm-aix-xcoff < %s 2>&1 | \ +; RUN: FileCheck %s --check-prefix=32BIT +; RUN: llc -mtriple powerpc64-ibm-aix-xcoff < %s 2>&1 -mattr=-altivec | \ +; RUN: FileCheck %s --check-prefix=64BIT define void @create_trampoline(ptr %buffer, ptr %nval) nounwind { entry: @@ -12,3 +12,17 @@ entry: declare i32 @nested(i32); declare void @llvm.init.trampoline(ptr, ptr, ptr) nounwind + +; 32BIT: stw 4, 8(3) +; 32BIT: lwz [[FuncDesc:[0-9]+]], L..C0(2) +; 32BIT-DAG: lwz [[SCRATCH1:[0-9]+]], 0([[FuncDesc]]) +; 32BIT-DAG: lwz [[SCRATCH2:[0-9]+]], 4([[FuncDesc]]) +; 32BIT-DAG: stw [[SCRATCH1]], 0(3) +; 32BIT-DAG: stw [[SCRATCH2]], 4(3) + +; 64BIT: std 4, 16(3) +; 64BIT-DAG: ld [[FuncDesc:[0-9]+]], L..C0(2) +; 64BIT-DAG: ld [[SCRATCH1:[0-9]+]], 0([[FuncDesc]]) +; 64BIT-DAG: ld [[SCRATCH2:[0-9]+]], 8([[FuncDesc]]) +; 64BIT-DAG: std [[SCRATCH1]], 0(3) +; 64BIT-DAG: std [[SCRATCH2]], 8(3) diff --git a/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir b/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir index 314844a..f1ae4a2 100644 --- a/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir +++ b/llvm/test/CodeGen/PowerPC/alignlongjumptest.mir @@ -43,8 +43,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] diff --git a/llvm/test/CodeGen/PowerPC/block-placement-1.mir b/llvm/test/CodeGen/PowerPC/block-placement-1.mir index f91ab63..a74af48 100644 --- a/llvm/test/CodeGen/PowerPC/block-placement-1.mir +++ b/llvm/test/CodeGen/PowerPC/block-placement-1.mir @@ -140,8 +140,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] @@ -186,8 +186,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: - { id: 0, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default, callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', diff --git a/llvm/test/CodeGen/PowerPC/block-placement.mir b/llvm/test/CodeGen/PowerPC/block-placement.mir index dab8dfb..99d399d 100644 --- a/llvm/test/CodeGen/PowerPC/block-placement.mir +++ b/llvm/test/CodeGen/PowerPC/block-placement.mir @@ -111,8 +111,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] diff --git a/llvm/test/CodeGen/PowerPC/builtins-bcd-format-conversion.ll b/llvm/test/CodeGen/PowerPC/builtins-bcd-format-conversion.ll new file mode 100644 index 0000000..ede8625 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-bcd-format-conversion.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown -mcpu=pwr9 \ +; RUN: --ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-unknown -mcpu=pwr9 \ +; RUN: --ppc-asm-full-reg-names < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -ppc-asm-full-reg-names < %s | FileCheck %s + +define dso_local <16 x i8> @test_bcdcopysign(<16 x i8> noundef %a, <16 x i8> noundef %b) { +; CHECK-LABEL: test_bcdcopysign: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdcpsgn. v2, v2, v3 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.bcdcopysign(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %0 +} + +define dso_local <16 x i8> @test_bcdsetsign_imm0(<16 x i8> noundef %a) { +; CHECK-LABEL: test_bcdsetsign_imm0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdsetsgn. v2, v2, 0 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.bcdsetsign(<16 x i8> %a, i32 0) + ret <16 x i8> %0 +} + +define dso_local <16 x i8> @test_bcdsetsign_imm1(<16 x i8> noundef %a) { +; CHECK-LABEL: test_bcdsetsign_imm1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: bcdsetsgn. v2, v2, 1 +; CHECK-NEXT: blr +entry: + %0 = tail call <16 x i8> @llvm.ppc.bcdsetsign(<16 x i8> %a, i32 1) + ret <16 x i8> %0 +} + +declare <16 x i8> @llvm.ppc.bcdcopysign(<16 x i8>, <16 x i8>) +declare <16 x i8> @llvm.ppc.bcdsetsign(<16 x i8>, i32) diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll index 59173e2..d8e66d6 100644 --- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll +++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE @@ -7,240 +8,90 @@ ; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \ ; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32 -define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) { -; This testcase is manually reduced to isolate the critical code blocks. -; It is designed to check for vector comparison specifically for zero vectors. -; In the vector.body section, we are expecting a comparison instruction (vcmpequh), -; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors. -; The output of the merge instruction is being used by xxland and finally -; accumulated by vadduwm instruction. - +define i32 @test_Greater_than(ptr %colauths) { +; This testcase is for the special case of zero-vector comparisons. +; Currently the generated code does a comparison (vcmpequh) and then a negation (xxlnor). +; This pattern is expected to be optimized in a future patch. ; POWERPC_64LE-LABEL: test_Greater_than: -; POWERPC_64LE: .LBB0_6: # %vector.body -; POWERPC_64LE-NEXT: # -; POWERPC_64LE-NEXT: lxv [[R1:[0-9]+]], -64(4) -; POWERPC_64LE-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] -; POWERPC_64LE-NEXT: xxlnor [[R1]], [[R1]], [[R1]] -; POWERPC_64LE-NEXT: vmrghh [[R4:[0-9]+]], [[R2]], [[R2]] -; POWERPC_64LE-NEXT: vmrglh [[R2]], [[R2]], [[R2]] -; POWERPC_64LE-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] -; POWERPC_64LE-NEXT: xxland [[R1]], [[R1]], [[R6]] -; POWERPC_64LE-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] -; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body -; POWERPC_64LE-NEXT: # -; POWERPC_64LE-NEXT: lxv [[R8:[0-9]+]], 0(4) -; POWERPC_64LE-NEXT: addi 4, 4, 16 -; POWERPC_64LE-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]] -; POWERPC_64LE-NEXT: xxlnor [[R8]], [[R8]], [[R8]] -; POWERPC_64LE-NEXT: vmrglh [[R11:[0-9]+]], [[R9]], [[R9]] -; POWERPC_64LE-NEXT: vmrghh [[R9]], [[R9]], [[R9]] -; POWERPC_64LE-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] -; POWERPC_64LE-NEXT: xxland [[R8]], [[R8]], [[R6]] -; POWERPC_64LE-NEXT: vadduwm [[R7]], [[R7]], [[R9]] -; POWERPC_64LE-NEXT: vadduwm [[R3]], [[R3]], [[R11]] -; POWERPC_64LE-NEXT: bdnz .LBB0_10 -; POWERPC_64LE: blr +; POWERPC_64LE: # %bb.0: # %entry +; POWERPC_64LE-NEXT: lfd 0, 0(3) +; POWERPC_64LE-NEXT: xxlxor 35, 35, 35 +; POWERPC_64LE-NEXT: li 4, 0 +; POWERPC_64LE-NEXT: li 3, 4 +; POWERPC_64LE-NEXT: xxswapd 34, 0 +; POWERPC_64LE-NEXT: vcmpequh 2, 2, 3 +; POWERPC_64LE-NEXT: xxlnor 34, 34, 34 +; POWERPC_64LE-NEXT: vmrglh 3, 2, 2 +; POWERPC_64LE-NEXT: vextuwrx 4, 4, 2 +; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3 +; POWERPC_64LE-NEXT: clrlwi 4, 4, 31 +; POWERPC_64LE-NEXT: rlwimi 4, 3, 1, 30, 30 +; POWERPC_64LE-NEXT: mfvsrwz 3, 35 +; POWERPC_64LE-NEXT: rlwimi 4, 3, 2, 29, 29 +; POWERPC_64LE-NEXT: li 3, 12 +; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3 +; POWERPC_64LE-NEXT: rlwimi 4, 3, 3, 28, 28 +; POWERPC_64LE-NEXT: stb 4, -1(1) +; POWERPC_64LE-NEXT: lbz 3, -1(1) +; POWERPC_64LE-NEXT: popcntd 3, 3 +; POWERPC_64LE-NEXT: blr ; ; POWERPC_64-LABEL: test_Greater_than: -; POWERPC_64: L..BB0_6: # %vector.body -; POWERPC_64-NEXT: # -; POWERPC_64-NEXT: lxv [[R1:[0-9]+]], -64(4) -; POWERPC_64-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] -; POWERPC_64-NEXT: xxlnor [[R1]], [[R1]], [[R1]] -; POWERPC_64-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]] -; POWERPC_64-NEXT: vmrghh [[R2]], [[R2]], [[R2]] -; POWERPC_64-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] -; POWERPC_64-NEXT: xxland [[R1]], [[R1]], [[R6]] -; POWERPC_64-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] -; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body -; POWERPC_64-NEXT: # -; POWERPC_64-NEXT: lxv [[R8:[0-9]+]], 0(4) -; POWERPC_64-NEXT: addi 4, 4, 16 -; POWERPC_64-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]] -; POWERPC_64-NEXT: xxlnor [[R8]], [[R8]], [[R8]] -; POWERPC_64-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]] -; POWERPC_64-NEXT: vmrglh [[R9]], [[R9]], [[R9]] -; POWERPC_64-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] -; POWERPC_64-NEXT: xxland [[R8]], [[R8]], [[R6]] -; POWERPC_64-NEXT: vadduwm [[R7]], [[R7]], [[R9]] -; POWERPC_64-NEXT: vadduwm [[R3]], [[R3]], [[R11]] -; POWERPC_64-NEXT: bdnz L..BB0_10 -; POWERPC_64: blr +; POWERPC_64: # %bb.0: # %entry +; POWERPC_64-NEXT: lxsd 2, 0(3) +; POWERPC_64-NEXT: xxlxor 35, 35, 35 +; POWERPC_64-NEXT: li 4, 12 +; POWERPC_64-NEXT: li 3, 8 +; POWERPC_64-NEXT: vcmpequh 2, 2, 3 +; POWERPC_64-NEXT: xxlnor 34, 34, 34 +; POWERPC_64-NEXT: vmrghh 2, 2, 2 +; POWERPC_64-NEXT: vextuwlx 4, 4, 2 +; POWERPC_64-NEXT: vextuwlx 3, 3, 2 +; POWERPC_64-NEXT: clrlwi 4, 4, 31 +; POWERPC_64-NEXT: rlwimi 4, 3, 1, 30, 30 +; POWERPC_64-NEXT: mfvsrwz 3, 34 +; POWERPC_64-NEXT: rlwimi 4, 3, 2, 29, 29 +; POWERPC_64-NEXT: li 3, 0 +; POWERPC_64-NEXT: vextuwlx 3, 3, 2 +; POWERPC_64-NEXT: rlwimi 4, 3, 3, 28, 28 +; POWERPC_64-NEXT: stb 4, -1(1) +; POWERPC_64-NEXT: lbz 3, -1(1) +; POWERPC_64-NEXT: popcntd 3, 3 +; POWERPC_64-NEXT: blr ; ; POWERPC_32-LABEL: test_Greater_than: -; POWERPC_32: L..BB0_7: # %vector.body -; POWERPC_32-NEXT: # -; POWERPC_32-NEXT: lxv [[R1:[0-9]+]], 0(10) -; POWERPC_32-NEXT: addic [[R13:[0-9]+]], [[R13]], 64 -; POWERPC_32-NEXT: addze [[R14:[0-9]+]], [[R14]] -; POWERPC_32-NEXT: xor [[R15:[0-9]+]], [[R13]], [[R16:[0-9]+]] -; POWERPC_32-NEXT: or. [[R15]], [[R15]], [[R14]] -; POWERPC_32-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]] -; POWERPC_32-NEXT: xxlnor [[R1]], [[R1]], [[R1]] -; POWERPC_32-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]] -; POWERPC_32-NEXT: vmrghh [[R2]], [[R2]], [[R2]] -; POWERPC_32-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]] -; POWERPC_32-NEXT: xxland [[R1]], [[R1]], [[R6]] -; POWERPC_32-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]] -; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body -; POWERPC_32-NEXT: # -; POWERPC_32-NEXT: slwi [[R14]], [[R13]], 1 -; POWERPC_32-NEXT: addic [[R13]], [[R13]], 8 -; POWERPC_32-NEXT: addze [[R17:[0-9]+]], [[R17]] -; POWERPC_32-NEXT: lxvx [[R8:[0-9]+]], [[R18:[0-9]+]], [[R14]] -; POWERPC_32-NEXT: xor [[R14]], [[R13]], [[R16]] -; POWERPC_32-NEXT: or. [[R14]], [[R14]], [[R17]] -; POWERPC_32-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R3]] -; POWERPC_32-NEXT: xxlnor [[R8]], [[R8]], [[R8]] -; POWERPC_32-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]] -; POWERPC_32-NEXT: vmrglh [[R9]], [[R9]], [[R9]] -; POWERPC_32-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]] -; POWERPC_32-NEXT: xxland [[R8]], [[R8]], [[R6]] -; POWERPC_32-NEXT: vadduwm [[R7]], [[R7]], [[R9]] -; POWERPC_32-NEXT: vadduwm [[R19:[0-9]+]], [[R19]], [[R11]] -; POWERPC_32-NEXT: bne 0, L..BB0_11 -; POWERPC_32: blr - entry: - %cmp5 = icmp sgt i32 %ncols, 0 - br i1 %cmp5, label %iter.check, label %for.cond.cleanup - -iter.check: ; preds = %entry - %wide.trip.count = zext nneg i32 %ncols to i64 - %min.iters.check = icmp ult i32 %ncols, 8 - br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check - -for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check - %indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ] - %num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ] - br label %for.body - -vector.main.loop.iter.check: ; preds = %iter.check - %min.iters.check9 = icmp ult i32 %ncols, 64 - br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph - -vector.ph: ; preds = %vector.main.loop.iter.check - %n.vec = and i64 %wide.trip.count, 2147483584 - br label %vector.body - -vector.body: ; preds = %vector.body, %vector.ph - %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] - %vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ] - %vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ] - %vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ] - %vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ] - %vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ] - %vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ] - %vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ] - %vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ] - %0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index - %1 = getelementptr inbounds nuw i8, ptr %0, i64 16 - %2 = getelementptr inbounds nuw i8, ptr %0, i64 32 - %3 = getelementptr inbounds nuw i8, ptr %0, i64 48 - %4 = getelementptr inbounds nuw i8, ptr %0, i64 64 - %5 = getelementptr inbounds nuw i8, ptr %0, i64 80 - %6 = getelementptr inbounds nuw i8, ptr %0, i64 96 - %7 = getelementptr inbounds nuw i8, ptr %0, i64 112 - %wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5 - %wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5 - %wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5 - %wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5 - %wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5 - %wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5 - %wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5 - %wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5 - %8 = icmp ne <8 x i16> %wide.load, zeroinitializer - %9 = icmp ne <8 x i16> %wide.load17, zeroinitializer - %10 = icmp ne <8 x i16> %wide.load18, zeroinitializer - %11 = icmp ne <8 x i16> %wide.load19, zeroinitializer - %12 = icmp ne <8 x i16> %wide.load20, zeroinitializer - %13 = icmp ne <8 x i16> %wide.load21, zeroinitializer - %14 = icmp ne <8 x i16> %wide.load22, zeroinitializer - %15 = icmp ne <8 x i16> %wide.load23, zeroinitializer - %16 = zext <8 x i1> %8 to <8 x i32> - %17 = zext <8 x i1> %9 to <8 x i32> - %18 = zext <8 x i1> %10 to <8 x i32> - %19 = zext <8 x i1> %11 to <8 x i32> - %20 = zext <8 x i1> %12 to <8 x i32> - %21 = zext <8 x i1> %13 to <8 x i32> - %22 = zext <8 x i1> %14 to <8 x i32> - %23 = zext <8 x i1> %15 to <8 x i32> - %24 = add <8 x i32> %vec.phi, %16 - %25 = add <8 x i32> %vec.phi10, %17 - %26 = add <8 x i32> %vec.phi11, %18 - %27 = add <8 x i32> %vec.phi12, %19 - %28 = add <8 x i32> %vec.phi13, %20 - %29 = add <8 x i32> %vec.phi14, %21 - %30 = add <8 x i32> %vec.phi15, %22 - %31 = add <8 x i32> %vec.phi16, %23 - %index.next = add nuw i64 %index, 64 - %32 = icmp eq i64 %index.next, %n.vec - br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9 - -middle.block: ; preds = %vector.body - %bin.rdx = add <8 x i32> %25, %24 - %bin.rdx24 = add <8 x i32> %26, %bin.rdx - %bin.rdx25 = add <8 x i32> %27, %bin.rdx24 - %bin.rdx26 = add <8 x i32> %28, %bin.rdx25 - %bin.rdx27 = add <8 x i32> %29, %bin.rdx26 - %bin.rdx28 = add <8 x i32> %30, %bin.rdx27 - %bin.rdx29 = add <8 x i32> %31, %bin.rdx28 - %33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29) - %cmp.n = icmp eq i64 %n.vec, %wide.trip.count - br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check - -vec.epilog.iter.check: ; preds = %middle.block - %n.vec.remaining = and i64 %wide.trip.count, 56 - %min.epilog.iters.check = icmp eq i64 %n.vec.remaining, 0 - br i1 %min.epilog.iters.check, label %for.body.preheader, label %vec.epilog.ph - -vec.epilog.ph: ; preds = %vec.epilog.iter.check, %vector.main.loop.iter.check - %vec.epilog.resume.val = phi i64 [ %n.vec, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] - %bc.merge.rdx = phi i32 [ %33, %vec.epilog.iter.check ], [ 0, %vector.main.loop.iter.check ] - %n.vec31 = and i64 %wide.trip.count, 2147483640 - %34 = insertelement <8 x i32> <i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 %bc.merge.rdx, i64 0 - br label %vec.epilog.vector.body - -vec.epilog.vector.body: ; preds = %vec.epilog.vector.body, %vec.epilog.ph - %index32 = phi i64 [ %vec.epilog.resume.val, %vec.epilog.ph ], [ %index.next35, %vec.epilog.vector.body ] - %vec.phi33 = phi <8 x i32> [ %34, %vec.epilog.ph ], [ %38, %vec.epilog.vector.body ] - %35 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index32 - %wide.load34 = load <8 x i16>, ptr %35, align 2, !tbaa !5 - %36 = icmp ne <8 x i16> %wide.load34, zeroinitializer - %37 = zext <8 x i1> %36 to <8 x i32> - %38 = add <8 x i32> %vec.phi33, %37 - %index.next35 = add nuw i64 %index32, 8 - %39 = icmp eq i64 %index.next35, %n.vec31 - br i1 %39, label %vec.epilog.middle.block, label %vec.epilog.vector.body, !llvm.loop !13 - -vec.epilog.middle.block: ; preds = %vec.epilog.vector.body - %40 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %38) - %cmp.n36 = icmp eq i64 %n.vec31, %wide.trip.count - br i1 %cmp.n36, label %for.cond.cleanup, label %for.body.preheader - -for.cond.cleanup: ; preds = %for.body, %middle.block, %vec.epilog.middle.block, %entry - %num_cols_needed.0.lcssa = phi i32 [ 0, %entry ], [ %33, %middle.block ], [ %40, %vec.epilog.middle.block ], [ %spec.select, %for.body ] - ret i32 %num_cols_needed.0.lcssa - -for.body: ; preds = %for.body.preheader, %for.body - %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ] - %num_cols_needed.06 = phi i32 [ %spec.select, %for.body ], [ %num_cols_needed.06.ph, %for.body.preheader ] - %arrayidx = getelementptr inbounds nuw i16, ptr %colauths, i64 %indvars.iv - %41 = load i16, ptr %arrayidx, align 2, !tbaa !5 - %tobool.not = icmp ne i16 %41, 0 - %inc = zext i1 %tobool.not to i32 - %spec.select = add nuw nsw i32 %num_cols_needed.06, %inc - %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 - %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count - br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !14 +; POWERPC_32: # %bb.0: # %entry +; POWERPC_32-NEXT: li 4, 4 +; POWERPC_32-NEXT: lxvwsx 1, 0, 3 +; POWERPC_32-NEXT: xxlxor 35, 35, 35 +; POWERPC_32-NEXT: lxvwsx 0, 3, 4 +; POWERPC_32-NEXT: xxmrghw 34, 1, 0 +; POWERPC_32-NEXT: vcmpequh 2, 2, 3 +; POWERPC_32-NEXT: xxlnor 34, 34, 34 +; POWERPC_32-NEXT: vmrghh 2, 2, 2 +; POWERPC_32-NEXT: stxv 34, -32(1) +; POWERPC_32-NEXT: lwz 3, -20(1) +; POWERPC_32-NEXT: lwz 4, -24(1) +; POWERPC_32-NEXT: clrlwi 3, 3, 31 +; POWERPC_32-NEXT: rlwimi 3, 4, 1, 30, 30 +; POWERPC_32-NEXT: lwz 4, -28(1) +; POWERPC_32-NEXT: rlwimi 3, 4, 2, 29, 29 +; POWERPC_32-NEXT: lwz 4, -32(1) +; POWERPC_32-NEXT: rlwimi 3, 4, 3, 28, 28 +; POWERPC_32-NEXT: popcntw 3, 3 +; POWERPC_32-NEXT: blr +entry: + %0 = load <4 x i16>, ptr %colauths, align 2, !tbaa !5 + %1 = icmp ne <4 x i16> %0, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2) + %4 = zext nneg i4 %3 to i32 + ret i32 %4 } +declare i4 @llvm.ctpop.i4(i4) #1 + !5 = !{!6, !6, i64 0} !6 = !{!"short", !7, i64 0} !7 = !{!"omnipotent char", !8, i64 0} !8 = !{!"Simple C/C++ TBAA"} -!9 = distinct !{!9, !10, !11, !12} -!10 = !{!"llvm.loop.mustprogress"} -!11 = !{!"llvm.loop.isvectorized", i32 1} -!12 = !{!"llvm.loop.unroll.runtime.disable"} -!13 = distinct !{!13, !10, !11, !12} -!14 = distinct !{!14, !10, !12, !11} diff --git a/llvm/test/CodeGen/PowerPC/collapse-rotates.mir b/llvm/test/CodeGen/PowerPC/collapse-rotates.mir index 938b27f..b30b161 100644 --- a/llvm/test/CodeGen/PowerPC/collapse-rotates.mir +++ b/llvm/test/CodeGen/PowerPC/collapse-rotates.mir @@ -45,8 +45,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] constants: [] diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir index e1d0285..fac09d2 100644 --- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir +++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-R0-special-handling.mir @@ -111,8 +111,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -165,8 +165,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -219,8 +219,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -272,8 +272,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -322,8 +322,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -370,8 +370,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -417,8 +417,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir index cdd6be5..0b61455 100644 --- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir +++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir @@ -242,8 +242,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -292,8 +292,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -348,8 +348,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -411,8 +411,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -470,8 +470,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -528,8 +528,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -590,8 +590,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -648,8 +648,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -707,8 +707,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -765,8 +765,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -821,8 +821,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -876,8 +876,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -931,8 +931,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -986,8 +986,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1040,8 +1040,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1093,8 +1093,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1146,8 +1146,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1199,8 +1199,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1252,8 +1252,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1305,8 +1305,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir index fa06dd5..61c0da6 100644 --- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir +++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir @@ -1044,8 +1044,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1100,8 +1100,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1160,8 +1160,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1221,8 +1221,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1280,8 +1280,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1335,8 +1335,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1385,8 +1385,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1439,8 +1439,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1494,8 +1494,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1548,8 +1548,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1602,8 +1602,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1655,8 +1655,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1708,8 +1708,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1762,8 +1762,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1818,8 +1818,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1877,8 +1877,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -1938,8 +1938,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2002,8 +2002,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2072,8 +2072,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2149,8 +2149,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2229,8 +2229,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2306,8 +2306,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2384,8 +2384,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2461,8 +2461,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2542,8 +2542,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2621,8 +2621,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2697,8 +2697,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2772,8 +2772,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2845,8 +2845,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2920,8 +2920,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -2993,8 +2993,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3077,8 +3077,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: - { id: 0, name: '', type: default, offset: 0, size: 16, alignment: 16, @@ -3183,8 +3183,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3256,8 +3256,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3329,8 +3329,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3402,8 +3402,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3465,8 +3465,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3515,8 +3515,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3562,8 +3562,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3608,8 +3608,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3658,8 +3658,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3713,8 +3713,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3768,8 +3768,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3823,8 +3823,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3874,8 +3874,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3920,8 +3920,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -3970,8 +3970,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4024,8 +4024,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4078,8 +4078,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4131,8 +4131,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4183,8 +4183,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4235,8 +4235,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4284,8 +4284,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4339,8 +4339,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4404,8 +4404,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4467,8 +4467,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4522,8 +4522,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4575,8 +4575,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4628,8 +4628,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4681,8 +4681,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4739,8 +4739,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4797,8 +4797,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4859,8 +4859,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4917,8 +4917,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -4976,8 +4976,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5036,8 +5036,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5091,8 +5091,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5144,8 +5144,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5209,8 +5209,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5282,8 +5282,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5357,8 +5357,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5430,8 +5430,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5505,8 +5505,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5578,8 +5578,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5652,8 +5652,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5723,8 +5723,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5794,8 +5794,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5867,8 +5867,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -5938,8 +5938,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6011,8 +6011,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6074,8 +6074,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6126,8 +6126,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6178,8 +6178,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6236,8 +6236,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6297,8 +6297,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6351,8 +6351,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6401,8 +6401,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6448,8 +6448,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -6494,8 +6494,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: diff --git a/llvm/test/CodeGen/PowerPC/ctrloop-do-not-duplicate-mi.mir b/llvm/test/CodeGen/PowerPC/ctrloop-do-not-duplicate-mi.mir index 651869d..668e7fe 100644 --- a/llvm/test/CodeGen/PowerPC/ctrloop-do-not-duplicate-mi.mir +++ b/llvm/test/CodeGen/PowerPC/ctrloop-do-not-duplicate-mi.mir @@ -117,8 +117,8 @@ frameInfo: hasMustTailInVarArgFunc: false hasTailCall: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] diff --git a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir index deaae39..c7a98f8 100644 --- a/llvm/test/CodeGen/PowerPC/livevars-crash2.mir +++ b/llvm/test/CodeGen/PowerPC/livevars-crash2.mir @@ -136,8 +136,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll index daadf85..8e49ddc 100644 --- a/llvm/test/CodeGen/PowerPC/llrint-conv.ll +++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll @@ -1,10 +1,25 @@ ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s +; RUN: llc < %s -mtriple=powerpc | FileCheck %s + +; FIXME: crash "Input type needs to be promoted!" +; define signext i32 @testmswh(half %x) { +; entry: +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) +; %conv = trunc i64 %0 to i32 +; ret i32 %conv +; } + +; define i64 @testmsxh(half %x) { +; entry: +; %0 = tail call i64 @llvm.llrint.i64.f16(half %x) +; ret i64 %0 +; } ; CHECK-LABEL: testmsws: ; CHECK: bl llrintf define signext i32 @testmsws(float %x) { entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -13,7 +28,7 @@ entry: ; CHECK: bl llrintf define i64 @testmsxs(float %x) { entry: - %0 = tail call i64 @llvm.llrint.f32(float %x) + %0 = tail call i64 @llvm.llrint.i64.f32(float %x) ret i64 %0 } @@ -21,7 +36,7 @@ entry: ; CHECK: bl llrint define signext i32 @testmswd(double %x) { entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -30,7 +45,7 @@ entry: ; CHECK: bl llrint define i64 @testmsxd(double %x) { entry: - %0 = tail call i64 @llvm.llrint.f64(double %x) + %0 = tail call i64 @llvm.llrint.i64.f64(double %x) ret i64 %0 } @@ -38,7 +53,7 @@ entry: ; CHECK: bl llrintl define signext i32 @testmswl(ppc_fp128 %x) { entry: - %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.ppcf128(ppc_fp128 %x) %conv = trunc i64 %0 to i32 ret i32 %conv } @@ -47,10 +62,27 @@ entry: ; CHECK: bl llrintl define i64 @testmsll(ppc_fp128 %x) { entry: - %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x) + %0 = tail call i64 @llvm.llrint.i64.ppcf128(ppc_fp128 %x) + ret i64 %0 +} + +; CHECK-LABEL: testmswq: +; CHECK: bl llrintf128 +define signext i32 @testmswq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmslq: +; CHECK: bl llrintf128 +define i64 @testmslq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.llrint.i64.f128(fp128 %x) ret i64 %0 } -declare i64 @llvm.llrint.f32(float) nounwind readnone -declare i64 @llvm.llrint.f64(double) nounwind readnone -declare i64 @llvm.llrint.ppcf128(ppc_fp128) nounwind readnone +declare i64 @llvm.llrint.i64.f32(float) nounwind readnone +declare i64 @llvm.llrint.i64.f64(double) nounwind readnone +declare i64 @llvm.llrint.i64.ppcf128(ppc_fp128) nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/lrint-conv.ll b/llvm/test/CodeGen/PowerPC/lrint-conv.ll index adfc994..bc77a20 100644 --- a/llvm/test/CodeGen/PowerPC/lrint-conv.ll +++ b/llvm/test/CodeGen/PowerPC/lrint-conv.ll @@ -1,4 +1,19 @@ ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s +; RUN: llc < %s -mtriple=powerpc | FileCheck %s + +; FIXME: crash "Input type needs to be promoted!" +; define signext i32 @testmswh(half %x) { +; entry: +; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) +; %conv = trunc i64 %0 to i32 +; ret i32 %conv +; } + +; define i64 @testmsxh(half %x) { +; entry: +; %0 = tail call i64 @llvm.lrint.i64.f16(half %x) +; ret i64 %0 +; } ; CHECK-LABEL: testmsws: ; CHECK: bl lrintf @@ -51,6 +66,23 @@ entry: ret i64 %0 } +; CHECK-LABEL: testmswq: +; CHECK: bl lrintf128 +define signext i32 @testmswq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) + %conv = trunc i64 %0 to i32 + ret i32 %conv +} + +; CHECK-LABEL: testmslq: +; CHECK: bl lrintf128 +define i64 @testmslq(fp128 %x) { +entry: + %0 = tail call i64 @llvm.lrint.i64.f128(fp128 %x) + ret i64 %0 +} + declare i64 @llvm.lrint.i64.f32(float) nounwind readnone declare i64 @llvm.lrint.i64.f64(double) nounwind readnone declare i64 @llvm.lrint.i64.ppcf128(ppc_fp128) nounwind readnone diff --git a/llvm/test/CodeGen/PowerPC/memintr32.ll b/llvm/test/CodeGen/PowerPC/milicode32.ll index c07a5af..a2af6d4 100644 --- a/llvm/test/CodeGen/PowerPC/memintr32.ll +++ b/llvm/test/CodeGen/PowerPC/milicode32.ll @@ -11,7 +11,7 @@ define i32 @memcmp_test(ptr nocapture noundef readonly %ptr1, ptr nocapture noun ; CHECK-AIX-32-P9-NEXT: mflr r0 ; CHECK-AIX-32-P9-NEXT: stwu r1, -64(r1) ; CHECK-AIX-32-P9-NEXT: stw r0, 72(r1) -; CHECK-AIX-32-P9-NEXT: bl .memcmp[PR] +; CHECK-AIX-32-P9-NEXT: bl .___memcmp[PR] ; CHECK-AIX-32-P9-NEXT: nop ; CHECK-AIX-32-P9-NEXT: addi r1, r1, 64 ; CHECK-AIX-32-P9-NEXT: lwz r0, 8(r1) @@ -35,5 +35,37 @@ entry: declare i32 @memcmp(ptr noundef captures(none), ptr noundef captures(none), i32 noundef) nounwind +define i32 @strlen_test(ptr noundef %str) nounwind { +; CHECK-AIX-32-P9-LABEL: strlen_test: +; CHECK-AIX-32-P9: # %bb.0: # %entry +; CHECK-AIX-32-P9-NEXT: mflr r0 +; CHECK-AIX-32-P9-NEXT: stwu r1, -64(r1) +; CHECK-AIX-32-P9-NEXT: stw r0, 72(r1) +; CHECK-AIX-32-P9-NEXT: stw r3, 60(r1) +; CHECK-AIX-32-P9-NEXT: bl .strlen[PR] +; CHECK-AIX-32-P9-NEXT: nop +; CHECK-AIX-32-P9-NEXT: addi r1, r1, 64 +; CHECK-AIX-32-P9-NEXT: lwz r0, 8(r1) +; CHECK-AIX-32-P9-NEXT: mtlr r0 +; CHECK-AIX-32-P9-NEXT: blr +; +; CHECK-LINUX32-P9-LABEL: strlen_test: +; CHECK-LINUX32-P9: # %bb.0: # %entry +; CHECK-LINUX32-P9-NEXT: mflr r0 +; CHECK-LINUX32-P9-NEXT: stwu r1, -16(r1) +; CHECK-LINUX32-P9-NEXT: stw r0, 20(r1) +; CHECK-LINUX32-P9-NEXT: stw r3, 12(r1) +; CHECK-LINUX32-P9-NEXT: bl strlen +; CHECK-LINUX32-P9-NEXT: lwz r0, 20(r1) +; CHECK-LINUX32-P9-NEXT: addi r1, r1, 16 +; CHECK-LINUX32-P9-NEXT: mtlr r0 +; CHECK-LINUX32-P9-NEXT: blr +entry: + %str.addr = alloca ptr, align 4 + store ptr %str, ptr %str.addr, align 4 + %0 = load ptr, ptr %str.addr, align 4 + %call = call i32 @strlen(ptr noundef %0) + ret i32 %call +} - +declare i32 @strlen(ptr noundef) nounwind diff --git a/llvm/test/CodeGen/PowerPC/memintr64.ll b/llvm/test/CodeGen/PowerPC/milicode64.ll index b3a6650..0f0585d9 100644 --- a/llvm/test/CodeGen/PowerPC/memintr64.ll +++ b/llvm/test/CodeGen/PowerPC/milicode64.ll @@ -39,7 +39,7 @@ define noundef i32 @_Z11memcmp_testPKvS0_m(ptr noundef readonly captures(none) % ; CHECK-AIX-64-P9-NEXT: mflr r0 ; CHECK-AIX-64-P9-NEXT: stdu r1, -112(r1) ; CHECK-AIX-64-P9-NEXT: std r0, 128(r1) -; CHECK-AIX-64-P9-NEXT: bl .memcmp[PR] +; CHECK-AIX-64-P9-NEXT: bl .___memcmp64[PR] ; CHECK-AIX-64-P9-NEXT: nop ; CHECK-AIX-64-P9-NEXT: addi r1, r1, 112 ; CHECK-AIX-64-P9-NEXT: ld r0, 16(r1) @@ -52,4 +52,51 @@ entry: declare i32 @memcmp(ptr noundef captures(none), ptr noundef captures(none), i64 noundef) nounwind +define i64 @strlen_test(ptr noundef %str) nounwind { +; CHECK-LE-P9-LABEL: strlen_test: +; CHECK-LE-P9: # %bb.0: # %entry +; CHECK-LE-P9-NEXT: mflr r0 +; CHECK-LE-P9-NEXT: stdu r1, -48(r1) +; CHECK-LE-P9-NEXT: std r0, 64(r1) +; CHECK-LE-P9-NEXT: std r3, 40(r1) +; CHECK-LE-P9-NEXT: bl strlen +; CHECK-LE-P9-NEXT: nop +; CHECK-LE-P9-NEXT: addi r1, r1, 48 +; CHECK-LE-P9-NEXT: ld r0, 16(r1) +; CHECK-LE-P9-NEXT: mtlr r0 +; CHECK-LE-P9-NEXT: blr +; +; CHECK-BE-P9-LABEL: strlen_test: +; CHECK-BE-P9: # %bb.0: # %entry +; CHECK-BE-P9-NEXT: mflr r0 +; CHECK-BE-P9-NEXT: stdu r1, -128(r1) +; CHECK-BE-P9-NEXT: std r0, 144(r1) +; CHECK-BE-P9-NEXT: std r3, 120(r1) +; CHECK-BE-P9-NEXT: bl strlen +; CHECK-BE-P9-NEXT: nop +; CHECK-BE-P9-NEXT: addi r1, r1, 128 +; CHECK-BE-P9-NEXT: ld r0, 16(r1) +; CHECK-BE-P9-NEXT: mtlr r0 +; CHECK-BE-P9-NEXT: blr +; +; CHECK-AIX-64-P9-LABEL: strlen_test: +; CHECK-AIX-64-P9: # %bb.0: # %entry +; CHECK-AIX-64-P9-NEXT: mflr r0 +; CHECK-AIX-64-P9-NEXT: stdu r1, -128(r1) +; CHECK-AIX-64-P9-NEXT: std r0, 144(r1) +; CHECK-AIX-64-P9-NEXT: std r3, 120(r1) +; CHECK-AIX-64-P9-NEXT: bl .strlen[PR] +; CHECK-AIX-64-P9-NEXT: nop +; CHECK-AIX-64-P9-NEXT: addi r1, r1, 128 +; CHECK-AIX-64-P9-NEXT: ld r0, 16(r1) +; CHECK-AIX-64-P9-NEXT: mtlr r0 +; CHECK-AIX-64-P9-NEXT: blr +entry: + %str.addr = alloca ptr, align 8 + store ptr %str, ptr %str.addr, align 8 + %0 = load ptr, ptr %str.addr, align 8 + %call = call i64 @strlen(ptr noundef %0) + ret i64 %call +} +declare i64 @strlen(ptr noundef) nounwind diff --git a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll index 232014d..a9503f7 100644 --- a/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll +++ b/llvm/test/CodeGen/PowerPC/mtvsrbmi.ll @@ -2,22 +2,87 @@ ; Verify whether the generated assembly for the following function includes the mtvsrbmi instruction. ; vector unsigned char v00FF() ; { -; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }; -; return x; +; vector unsigned char x = { 0xFF, 0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0 }; +; return x; +; } +; vector unsigned short short00FF() +; { +; vector unsigned short x = { 0xFF, 0,0,0, 0,0,0,0}; +; return x; +; } +; vector unsigned int int00FF() +; { +; vector unsigned int x = { 0xFF, 0,0,0}; +; return x; +; } +; vector unsigned long long longlong00FF() +; { +; vector unsigned long long x = { 0xFF, 0}; +; return x; ; } ; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix -mcpu=pwr10 -verify-machineinstrs \ -; RUN: | FileCheck %s --check-prefix=CHECK +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-BE + +; RUN: llc < %s -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-gnu-linux -mcpu=pwr10 -verify-machineinstrs \ +; RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-LE + +; CHECK-NOT: .byte 255 +; CHECK-NOT: .byte 0 define dso_local noundef range(i8 -1, 1) <16 x i8> @_Z5v00FFv() { -; CHECK-NOT: L..CPI0_0: -; CHECK-NOT: .byte 255 # 0xff -; CHECK-NOT: .byte 0 # 0x0 - -; CHECK-LABEL: _Z5v00FFv: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: mtvsrbmi v2, 1 -; CHECK-NEXT: blr +; CHECK-BE-LABEL: _Z5v00FFv: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrbmi v2, 32768 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: _Z5v00FFv: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtvsrbmi v2, 1 +; CHECK-LE-NEXT: blr + entry: ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> } + +define dso_local noundef range(i16 0, 256) <8 x i16> @_Z9short00FFv() { +; CHECK-BE-LABEL: _Z9short00FFv: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrbmi v2, 16384 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: _Z9short00FFv: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtvsrbmi v2, 1 +; CHECK-LE-NEXT: blr +entry: + ret <8 x i16> <i16 255, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> +} + +define dso_local noundef range(i32 0, 256) <4 x i32> @_Z7int00FFv() { +; CHECK-BE-LABEL: _Z7int00FFv: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrbmi v2, 4096 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: _Z7int00FFv: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtvsrbmi v2, 1 +; CHECK-LE-NEXT: blr +entry: + ret <4 x i32> <i32 255, i32 0, i32 0, i32 0> +} + +define dso_local noundef range(i64 0, 256) <2 x i64> @_Z12longlong00FFv() { +; CHECK-BE-LABEL: _Z12longlong00FFv: +; CHECK-BE: # %bb.0: # %entry +; CHECK-BE-NEXT: mtvsrbmi v2, 256 +; CHECK-BE-NEXT: blr +; +; CHECK-LE-LABEL: _Z12longlong00FFv: +; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: mtvsrbmi v2, 1 +; CHECK-LE-NEXT: blr +entry: + ret <2 x i64> <i64 255, i64 0> +} diff --git a/llvm/test/CodeGen/PowerPC/nofpclass.ll b/llvm/test/CodeGen/PowerPC/nofpclass.ll new file mode 100644 index 0000000..b08e810 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/nofpclass.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s + +; TODO: Update this test after adding the proper expansion of nofpclass for +; ppc_fp128 to test with more masks and to demonstrate preserving nofpclass +; after legalization. + +define ppc_fp128 @f(ppc_fp128 nofpclass(nan) %s) { +; CHECK-LABEL: f: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: blr +entry: + ret ppc_fp128 %s +} diff --git a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll index c733a01..4b03278 100644 --- a/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll +++ b/llvm/test/CodeGen/PowerPC/p10-spill-crlt.ll @@ -30,16 +30,14 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) ; CHECK-NEXT: stw r12, 8(r1) -; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r29, -24 ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: .cfi_offset cr2, 8 ; CHECK-NEXT: .cfi_offset cr3, 8 ; CHECK-NEXT: .cfi_offset cr4, 8 -; CHECK-NEXT: std r29, 40(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 48(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, 32(r1) # 8-byte Folded Spill ; CHECK-NEXT: bl call_2@notoc ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_13 ; CHECK-NEXT: # %bb.1: # %bb @@ -67,11 +65,10 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr { ; CHECK-NEXT: bc 12, 4*cr3+eq, .LBB0_11 ; CHECK-NEXT: # %bb.6: # %bb32 ; CHECK-NEXT: # +; CHECK-NEXT: rlwinm r30, r30, 0, 24, 22 ; CHECK-NEXT: andi. r3, r30, 2 -; CHECK-NEXT: rlwinm r29, r30, 0, 24, 22 ; CHECK-NEXT: mcrf cr2, cr0 ; CHECK-NEXT: bl call_4@notoc -; CHECK-NEXT: mr r30, r29 ; CHECK-NEXT: beq+ cr2, .LBB0_3 ; CHECK-NEXT: # %bb.7: # %bb37 ; CHECK-NEXT: .LBB0_8: # %bb22 @@ -92,13 +89,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr { ; CHECK-BE-NEXT: stdu r1, -144(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_offset 144 ; CHECK-BE-NEXT: .cfi_offset lr, 16 -; CHECK-BE-NEXT: .cfi_offset r28, -32 ; CHECK-BE-NEXT: .cfi_offset r29, -24 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 ; CHECK-BE-NEXT: .cfi_offset cr2, 8 -; CHECK-BE-NEXT: std r28, 112(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: std r29, 120(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; CHECK-BE-NEXT: bl call_2 @@ -131,12 +126,11 @@ define dso_local void @P10_Spill_CR_LT() local_unnamed_addr { ; CHECK-BE-NEXT: bc 12, 4*cr3+eq, .LBB0_11 ; CHECK-BE-NEXT: # %bb.6: # %bb32 ; CHECK-BE-NEXT: # +; CHECK-BE-NEXT: rlwinm r29, r29, 0, 24, 22 ; CHECK-BE-NEXT: andi. r3, r29, 2 -; CHECK-BE-NEXT: rlwinm r28, r29, 0, 24, 22 ; CHECK-BE-NEXT: mcrf cr2, cr0 ; CHECK-BE-NEXT: bl call_4 ; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: mr r29, r28 ; CHECK-BE-NEXT: beq+ cr2, .LBB0_3 ; CHECK-BE-NEXT: # %bb.7: # %bb37 ; CHECK-BE-NEXT: .LBB0_8: # %bb22 diff --git a/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir b/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir index f615fcf..4333473 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir +++ b/llvm/test/CodeGen/PowerPC/peephole-phi-acc.mir @@ -223,8 +223,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] @@ -337,8 +337,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] @@ -485,8 +485,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] @@ -696,8 +696,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] diff --git a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir index 1d898a4..ceecdc5 100644 --- a/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir +++ b/llvm/test/CodeGen/PowerPC/peephole-replaceInstr-after-eliminate-extsw.mir @@ -447,8 +447,8 @@ frameInfo: hasMustTailInVarArgFunc: false hasTailCall: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] entry_values: [] diff --git a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir index 72f7782..17060a8 100644 --- a/llvm/test/CodeGen/PowerPC/phi-eliminate.mir +++ b/llvm/test/CodeGen/PowerPC/phi-eliminate.mir @@ -122,8 +122,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] diff --git a/llvm/test/CodeGen/PowerPC/ppc_reduce_cr_logicals.ll b/llvm/test/CodeGen/PowerPC/ppc_reduce_cr_logicals.ll new file mode 100644 index 0000000..585ce89 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/ppc_reduce_cr_logicals.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=powerpc64-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECK +; RUN: llc -mtriple=powerpc-unknown-gnu-linux < %s | FileCheck %s -check-prefix=CHECKBE + +define i32 @xe_migrate_copy(ptr %m, ptr %dst, ptr %tile, ptr %0, ptr %primary_gt, i1 %tobool4, i1 %tobool9, i64 %1, i32 %conv55, i1 %tobool37.not) nounwind { +; CHECK-LABEL: xe_migrate_copy: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: mflr 0 +; CHECK-NEXT: stdu 1, -128(1) +; CHECK-NEXT: lbz 4, 255(1) +; CHECK-NEXT: andi. 4, 4, 1 +; CHECK-NEXT: std 0, 144(1) +; CHECK-NEXT: crmove 20, 1 +; CHECK-NEXT: andi. 4, 9, 1 +; CHECK-NEXT: lwz 9, 244(1) +; CHECK-NEXT: crmove 21, 1 +; CHECK-NEXT: andi. 4, 8, 1 +; CHECK-NEXT: li 4, 0 +; CHECK-NEXT: std 4, 112(1) +; CHECK-NEXT: crandc 21, 21, 20 +; CHECK-NEXT: bc 12, 21, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %while.body +; CHECK-NEXT: crand 20, 20, 1 +; CHECK-NEXT: li 8, 0 +; CHECK-NEXT: bc 4, 20, .LBB0_3 +; CHECK-NEXT: .LBB0_2: # %while.body +; CHECK-NEXT: li 8, 1 +; CHECK-NEXT: .LBB0_3: # %while.body +; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 6, 0 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: li 7, 0 +; CHECK-NEXT: li 10, 0 +; CHECK-NEXT: bl xe_migrate_ccs_copy +; CHECK-NEXT: nop +; CHECK-NEXT: addi 1, 1, 128 +; CHECK-NEXT: ld 0, 16(1) +; CHECK-NEXT: mtlr 0 +; CHECK-NEXT: blr +; +; CHECKBE-LABEL: xe_migrate_copy: +; CHECKBE: # %bb.0: # %entry +; CHECKBE-NEXT: mflr 0 +; CHECKBE-NEXT: stwu 1, -32(1) +; CHECKBE-NEXT: lbz 4, 55(1) +; CHECKBE-NEXT: li 5, 0 +; CHECKBE-NEXT: stw 0, 36(1) +; CHECKBE-NEXT: andi. 4, 4, 1 +; CHECKBE-NEXT: crmove 20, 1 +; CHECKBE-NEXT: andi. 4, 9, 1 +; CHECKBE-NEXT: crmove 21, 1 +; CHECKBE-NEXT: andi. 4, 8, 1 +; CHECKBE-NEXT: lwz 4, 48(1) +; CHECKBE-NEXT: crandc 21, 21, 20 +; CHECKBE-NEXT: stw 5, 24(1) +; CHECKBE-NEXT: stw 5, 20(1) +; CHECKBE-NEXT: stw 5, 16(1) +; CHECKBE-NEXT: stw 4, 12(1) +; CHECKBE-NEXT: bc 12, 21, .LBB0_2 +; CHECKBE-NEXT: # %bb.1: # %while.body +; CHECKBE-NEXT: crand 20, 20, 1 +; CHECKBE-NEXT: li 8, 0 +; CHECKBE-NEXT: bc 4, 20, .LBB0_3 +; CHECKBE-NEXT: .LBB0_2: # %while.body +; CHECKBE-NEXT: li 8, 1 +; CHECKBE-NEXT: .LBB0_3: # %while.body +; CHECKBE-NEXT: mr 4, 3 +; CHECKBE-NEXT: li 6, 0 +; CHECKBE-NEXT: li 7, 0 +; CHECKBE-NEXT: li 9, 0 +; CHECKBE-NEXT: li 10, 0 +; CHECKBE-NEXT: stw 8, 8(1) +; CHECKBE-NEXT: bl xe_migrate_ccs_copy +; CHECKBE-NEXT: lwz 0, 36(1) +; CHECKBE-NEXT: addi 1, 1, 32 +; CHECKBE-NEXT: mtlr 0 +; CHECKBE-NEXT: blr + +entry: + br label %while.body + +while.body: + %cond53.in = select i1 %tobool37.not, i1 %tobool4, i1 %tobool9 + %call57 = call zeroext i32 @xe_migrate_ccs_copy(ptr noundef %m, ptr noundef %m, i64 0, i1 false, i64 0, i1 %cond53.in, i32 %conv55, i64 0, i1 false) + ret i32 %call57 +} + +declare i32 @xe_migrate_ccs_copy(ptr, ptr, i64, i1, i64, i1, i32, i64, i1) diff --git a/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir b/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir index f3ef95b..05b3056 100644 --- a/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir +++ b/llvm/test/CodeGen/PowerPC/remove-copy-crunsetcrbit.mir @@ -99,8 +99,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] constants: [] diff --git a/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir b/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir index f5b931e..f89475e 100644 --- a/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir +++ b/llvm/test/CodeGen/PowerPC/remove-implicit-use.mir @@ -56,8 +56,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] constants: [] diff --git a/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir b/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir index 913877b..2040494 100644 --- a/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir +++ b/llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir @@ -55,8 +55,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: - { id: 0, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default, callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', diff --git a/llvm/test/CodeGen/PowerPC/remove-self-copies.mir b/llvm/test/CodeGen/PowerPC/remove-self-copies.mir index b5713a9..6c08390 100644 --- a/llvm/test/CodeGen/PowerPC/remove-self-copies.mir +++ b/llvm/test/CodeGen/PowerPC/remove-self-copies.mir @@ -65,8 +65,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: diff --git a/llvm/test/CodeGen/PowerPC/rlwinm_rldicl_to_andi.mir b/llvm/test/CodeGen/PowerPC/rlwinm_rldicl_to_andi.mir index a1d8539d..584f2a9 100644 --- a/llvm/test/CodeGen/PowerPC/rlwinm_rldicl_to_andi.mir +++ b/llvm/test/CodeGen/PowerPC/rlwinm_rldicl_to_andi.mir @@ -107,8 +107,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -167,8 +167,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -227,8 +227,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -284,8 +284,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -338,8 +338,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: @@ -392,8 +392,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: diff --git a/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir b/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir index 1717238..4414dfa 100644 --- a/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir +++ b/llvm/test/CodeGen/PowerPC/schedule-addi-load.mir @@ -68,8 +68,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] constants: [] diff --git a/llvm/test/CodeGen/PowerPC/setcr_bc.mir b/llvm/test/CodeGen/PowerPC/setcr_bc.mir index bc8bb55..76f9d5e 100644 --- a/llvm/test/CodeGen/PowerPC/setcr_bc.mir +++ b/llvm/test/CodeGen/PowerPC/setcr_bc.mir @@ -64,8 +64,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default, callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', diff --git a/llvm/test/CodeGen/PowerPC/setcr_bc2.mir b/llvm/test/CodeGen/PowerPC/setcr_bc2.mir index 5986c88..433ea63 100644 --- a/llvm/test/CodeGen/PowerPC/setcr_bc2.mir +++ b/llvm/test/CodeGen/PowerPC/setcr_bc2.mir @@ -64,8 +64,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default, callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', diff --git a/llvm/test/CodeGen/PowerPC/setcr_bc3.mir b/llvm/test/CodeGen/PowerPC/setcr_bc3.mir index 2d037d0..942ac69 100644 --- a/llvm/test/CodeGen/PowerPC/setcr_bc3.mir +++ b/llvm/test/CodeGen/PowerPC/setcr_bc3.mir @@ -37,8 +37,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: - { id: 0, type: spill-slot, offset: -16, size: 8, alignment: 16, stack-id: default, callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '', diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll index f3e3410..5d5445f 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-1.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-1.ll @@ -187,34 +187,34 @@ define void @foo() { ; CHECK-P9-NEXT: .p2align 4 ; CHECK-P9-NEXT: .LBB0_1: # %vector.body ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: lxv 2, -32(6) -; CHECK-P9-NEXT: lxv 3, -32(5) -; CHECK-P9-NEXT: lxv 4, -16(5) -; CHECK-P9-NEXT: vadduwm 2, 3, 2 +; CHECK-P9-NEXT: lxv 2, -32(3) ; CHECK-P9-NEXT: lxv 3, -32(4) +; CHECK-P9-NEXT: lxv 4, -16(4) +; CHECK-P9-NEXT: vadduwm 2, 3, 2 +; CHECK-P9-NEXT: lxv 3, -32(5) ; CHECK-P9-NEXT: vmuluwm 2, 2, 3 -; CHECK-P9-NEXT: lxv 3, -16(6) -; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: lxv 4, 0(5) -; CHECK-P9-NEXT: stxv 2, -32(3) -; CHECK-P9-NEXT: lxv 2, -16(4) -; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: lxv 3, 0(6) +; CHECK-P9-NEXT: lxv 3, -16(3) ; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: lxv 4, 16(5) -; CHECK-P9-NEXT: addi 5, 5, 64 -; CHECK-P9-NEXT: stxv 2, -16(3) -; CHECK-P9-NEXT: lxv 2, 0(4) +; CHECK-P9-NEXT: lxv 4, 0(4) +; CHECK-P9-NEXT: stxv 2, -32(6) +; CHECK-P9-NEXT: lxv 2, -16(5) ; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: lxv 3, 16(6) -; CHECK-P9-NEXT: addi 6, 6, 64 +; CHECK-P9-NEXT: lxv 3, 0(3) ; CHECK-P9-NEXT: vadduwm 3, 4, 3 -; CHECK-P9-NEXT: stxv 2, 0(3) -; CHECK-P9-NEXT: lxv 2, 16(4) +; CHECK-P9-NEXT: lxv 4, 16(4) ; CHECK-P9-NEXT: addi 4, 4, 64 +; CHECK-P9-NEXT: stxv 2, -16(6) +; CHECK-P9-NEXT: lxv 2, 0(5) ; CHECK-P9-NEXT: vmuluwm 2, 3, 2 -; CHECK-P9-NEXT: stxv 2, 16(3) +; CHECK-P9-NEXT: lxv 3, 16(3) ; CHECK-P9-NEXT: addi 3, 3, 64 +; CHECK-P9-NEXT: vadduwm 3, 4, 3 +; CHECK-P9-NEXT: stxv 2, 0(6) +; CHECK-P9-NEXT: lxv 2, 16(5) +; CHECK-P9-NEXT: addi 5, 5, 64 +; CHECK-P9-NEXT: vmuluwm 2, 3, 2 +; CHECK-P9-NEXT: stxv 2, 16(6) +; CHECK-P9-NEXT: addi 6, 6, 64 ; CHECK-P9-NEXT: bdnz .LBB0_1 ; CHECK-P9-NEXT: # %bb.2: # %for.end ; CHECK-P9-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir index 73bd475..a55cb04 100644 --- a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir +++ b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence1.mir @@ -43,8 +43,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: diff --git a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir index ffeb066..49d4a15 100644 --- a/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir +++ b/llvm/test/CodeGen/PowerPC/tls_get_addr_fence2.mir @@ -43,8 +43,8 @@ frameInfo: hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: stack: constants: diff --git a/llvm/test/CodeGen/PowerPC/two-address-crash.mir b/llvm/test/CodeGen/PowerPC/two-address-crash.mir index cd2e69d..21f08cb0 100644 --- a/llvm/test/CodeGen/PowerPC/two-address-crash.mir +++ b/llvm/test/CodeGen/PowerPC/two-address-crash.mir @@ -62,8 +62,8 @@ frameInfo: hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 - savePoint: '' - restorePoint: '' + savePoint: [] + restorePoint: [] fixedStack: [] stack: [] callSites: [] diff --git a/llvm/test/CodeGen/PowerPC/vector-llrint.ll b/llvm/test/CodeGen/PowerPC/vector-llrint.ll index 9229fef..8a9e48e 100644 --- a/llvm/test/CodeGen/PowerPC/vector-llrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-llrint.ll @@ -1,4 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: crash "Input type needs to be promoted!" +; SKIP: llc -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; SKIP: -mtriple=powerpc-unknown-unknown -verify-machineinstrs < %s | \ +; SKIP: FileCheck %s --check-prefix=PPC32 ; RUN: llc -mcpu=pwr7 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ ; RUN: -mtriple=powerpc64-unknown-unknown -verify-machineinstrs < %s | \ ; RUN: FileCheck %s --check-prefix=BE @@ -9,14 +13,12 @@ ; RUN: -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs < %s \ ; RUN: --enable-unsafe-fp-math | FileCheck %s --check-prefix=FAST -define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { +define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: clrldi r3, r3, 48 @@ -34,8 +36,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: clrldi r3, r3, 48 @@ -53,8 +53,6 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) ; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: .cfi_offset lr, 16 ; FAST-NEXT: bl __truncsfhf2 ; FAST-NEXT: nop ; FAST-NEXT: clrldi r3, r3, 48 @@ -71,16 +69,12 @@ define <1 x i64> @llrint_v1i64_v1f16(<1 x half> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f16(<1 x half>) -define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { +define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) nounwind { ; BE-LABEL: llrint_v1i64_v2f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) ; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r30, -24 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f1 ; BE-NEXT: fmr f1, f2 @@ -118,17 +112,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -96(r1) -; CHECK-NEXT: std r0, 112(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -24 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v31, -48 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 112(r1) ; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f31 @@ -153,7 +142,7 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 96 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -162,10 +151,6 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { ; FAST-LABEL: llrint_v1i64_v2f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 48 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill ; FAST-NEXT: stdu r1, -48(r1) @@ -202,20 +187,12 @@ define <2 x i64> @llrint_v1i64_v2f16(<2 x half> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f16(<2 x half>) -define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { +define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) ; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r28, -56 -; BE-NEXT: .cfi_offset r29, -48 -; BE-NEXT: .cfi_offset r30, -40 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f29, f1 ; BE-NEXT: fmr f1, f2 @@ -289,18 +266,8 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -144(r1) -; CHECK-NEXT: std r0, 160(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 144 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r28, -56 -; CHECK-NEXT: .cfi_offset r29, -48 -; CHECK-NEXT: .cfi_offset r30, -40 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v30, -96 -; CHECK-NEXT: .cfi_offset v31, -80 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 160(r1) ; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill @@ -308,11 +275,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-NEXT: fmr f29, f2 ; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f29 @@ -365,11 +332,11 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v3, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 144 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -378,12 +345,6 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { ; FAST-LABEL: llrint_v4i64_v4f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 64 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill @@ -447,28 +408,12 @@ define <4 x i64> @llrint_v4i64_v4f16(<4 x half> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f16(<4 x half>) -define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { +define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) ; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r24, -120 -; BE-NEXT: .cfi_offset r25, -112 -; BE-NEXT: .cfi_offset r26, -104 -; BE-NEXT: .cfi_offset r27, -96 -; BE-NEXT: .cfi_offset r28, -88 -; BE-NEXT: .cfi_offset r29, -80 -; BE-NEXT: .cfi_offset r30, -72 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f25, f1 ; BE-NEXT: fmr f1, f2 @@ -614,44 +559,24 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -240(r1) -; CHECK-NEXT: std r0, 256(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 240 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r24, -120 -; CHECK-NEXT: .cfi_offset r25, -112 -; CHECK-NEXT: .cfi_offset r26, -104 -; CHECK-NEXT: .cfi_offset r27, -96 -; CHECK-NEXT: .cfi_offset r28, -88 -; CHECK-NEXT: .cfi_offset r29, -80 -; CHECK-NEXT: .cfi_offset r30, -72 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v28, -192 -; CHECK-NEXT: .cfi_offset v29, -176 -; CHECK-NEXT: .cfi_offset v30, -160 -; CHECK-NEXT: .cfi_offset v31, -144 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 256(r1) ; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f2 ; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f3 ; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f5 @@ -659,11 +584,11 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: fmr f29, f6 ; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f7 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f8 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f25 @@ -766,7 +691,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload @@ -774,7 +699,7 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload @@ -782,9 +707,9 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 240 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -793,16 +718,6 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { ; FAST-LABEL: llrint_v8i64_v8f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 96 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill @@ -920,44 +835,12 @@ define <8 x i64> @llrint_v8i64_v8f16(<8 x half> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f16(<8 x half>) -define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { +define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) nounwind { ; BE-LABEL: llrint_v16i64_v16f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -496(r1) ; BE-NEXT: std r0, 512(r1) -; BE-NEXT: .cfi_def_cfa_offset 496 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r16, -248 -; BE-NEXT: .cfi_offset r17, -240 -; BE-NEXT: .cfi_offset r18, -232 -; BE-NEXT: .cfi_offset r19, -224 -; BE-NEXT: .cfi_offset r20, -216 -; BE-NEXT: .cfi_offset r21, -208 -; BE-NEXT: .cfi_offset r22, -200 -; BE-NEXT: .cfi_offset r23, -192 -; BE-NEXT: .cfi_offset r24, -184 -; BE-NEXT: .cfi_offset r25, -176 -; BE-NEXT: .cfi_offset r26, -168 -; BE-NEXT: .cfi_offset r27, -160 -; BE-NEXT: .cfi_offset r28, -152 -; BE-NEXT: .cfi_offset r29, -144 -; BE-NEXT: .cfi_offset r30, -136 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1244,105 +1127,65 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -432(r1) -; CHECK-NEXT: std r0, 448(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 432 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r16, -248 -; CHECK-NEXT: .cfi_offset r17, -240 -; CHECK-NEXT: .cfi_offset r18, -232 -; CHECK-NEXT: .cfi_offset r19, -224 -; CHECK-NEXT: .cfi_offset r20, -216 -; CHECK-NEXT: .cfi_offset r21, -208 -; CHECK-NEXT: .cfi_offset r22, -200 -; CHECK-NEXT: .cfi_offset r23, -192 -; CHECK-NEXT: .cfi_offset r24, -184 -; CHECK-NEXT: .cfi_offset r25, -176 -; CHECK-NEXT: .cfi_offset r26, -168 -; CHECK-NEXT: .cfi_offset r27, -160 -; CHECK-NEXT: .cfi_offset r28, -152 -; CHECK-NEXT: .cfi_offset r29, -144 -; CHECK-NEXT: .cfi_offset r30, -136 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v24, -384 -; CHECK-NEXT: .cfi_offset v25, -368 -; CHECK-NEXT: .cfi_offset v26, -352 -; CHECK-NEXT: .cfi_offset v27, -336 -; CHECK-NEXT: .cfi_offset v28, -320 -; CHECK-NEXT: .cfi_offset v29, -304 -; CHECK-NEXT: .cfi_offset v30, -288 -; CHECK-NEXT: .cfi_offset v31, -272 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 448(r1) ; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -1545,7 +1388,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v5, v28 ; CHECK-NEXT: vmr v6, v27 @@ -1553,7 +1396,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v8, v25 ; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload @@ -1561,7 +1404,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload @@ -1569,7 +1412,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload @@ -1577,7 +1420,7 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload @@ -1585,13 +1428,13 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 432 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -1600,24 +1443,6 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { ; FAST-LABEL: llrint_v16i64_v16f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 160 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill @@ -1841,50 +1666,12 @@ define <16 x i64> @llrint_v16i64_v16f16(<16 x half> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f16(<16 x half>) -define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { +define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-LABEL: llrint_v32i64_v32f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -864(r1) ; BE-NEXT: std r0, 880(r1) -; BE-NEXT: .cfi_def_cfa_offset 864 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r14, -288 -; BE-NEXT: .cfi_offset r15, -280 -; BE-NEXT: .cfi_offset r16, -272 -; BE-NEXT: .cfi_offset r17, -264 -; BE-NEXT: .cfi_offset r18, -256 -; BE-NEXT: .cfi_offset r19, -248 -; BE-NEXT: .cfi_offset r20, -240 -; BE-NEXT: .cfi_offset r21, -232 -; BE-NEXT: .cfi_offset r22, -224 -; BE-NEXT: .cfi_offset r23, -216 -; BE-NEXT: .cfi_offset r24, -208 -; BE-NEXT: .cfi_offset r25, -200 -; BE-NEXT: .cfi_offset r26, -192 -; BE-NEXT: .cfi_offset r27, -184 -; BE-NEXT: .cfi_offset r28, -176 -; BE-NEXT: .cfi_offset r29, -168 -; BE-NEXT: .cfi_offset r30, -160 -; BE-NEXT: .cfi_offset r31, -152 -; BE-NEXT: .cfi_offset f14, -144 -; BE-NEXT: .cfi_offset f15, -136 -; BE-NEXT: .cfi_offset f16, -128 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1924,6 +1711,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill ; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f13 +; BE-NEXT: mr r30, r3 ; BE-NEXT: fmr f29, f12 ; BE-NEXT: fmr f30, f11 ; BE-NEXT: fmr f28, f10 @@ -1934,7 +1722,6 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: fmr f23, f5 ; BE-NEXT: fmr f22, f4 ; BE-NEXT: fmr f21, f3 -; BE-NEXT: mr r30, r3 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: fmr f1, f20 @@ -2437,98 +2224,48 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -688(r1) -; CHECK-NEXT: std r0, 704(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 688 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r14, -288 -; CHECK-NEXT: .cfi_offset r15, -280 -; CHECK-NEXT: .cfi_offset r16, -272 -; CHECK-NEXT: .cfi_offset r17, -264 -; CHECK-NEXT: .cfi_offset r18, -256 -; CHECK-NEXT: .cfi_offset r19, -248 -; CHECK-NEXT: .cfi_offset r20, -240 -; CHECK-NEXT: .cfi_offset r21, -232 -; CHECK-NEXT: .cfi_offset r22, -224 -; CHECK-NEXT: .cfi_offset r23, -216 -; CHECK-NEXT: .cfi_offset r24, -208 -; CHECK-NEXT: .cfi_offset r25, -200 -; CHECK-NEXT: .cfi_offset r26, -192 -; CHECK-NEXT: .cfi_offset r27, -184 -; CHECK-NEXT: .cfi_offset r28, -176 -; CHECK-NEXT: .cfi_offset r29, -168 -; CHECK-NEXT: .cfi_offset r30, -160 -; CHECK-NEXT: .cfi_offset r31, -152 -; CHECK-NEXT: .cfi_offset f14, -144 -; CHECK-NEXT: .cfi_offset f15, -136 -; CHECK-NEXT: .cfi_offset f16, -128 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v20, -480 -; CHECK-NEXT: .cfi_offset v21, -464 -; CHECK-NEXT: .cfi_offset v22, -448 -; CHECK-NEXT: .cfi_offset v23, -432 -; CHECK-NEXT: .cfi_offset v24, -416 -; CHECK-NEXT: .cfi_offset v25, -400 -; CHECK-NEXT: .cfi_offset v26, -384 -; CHECK-NEXT: .cfi_offset v27, -368 -; CHECK-NEXT: .cfi_offset v28, -352 -; CHECK-NEXT: .cfi_offset v29, -336 -; CHECK-NEXT: .cfi_offset v30, -320 -; CHECK-NEXT: .cfi_offset v31, -304 ; CHECK-NEXT: li r4, 208 +; CHECK-NEXT: std r0, 704(r1) ; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 224 +; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 240 +; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 256 +; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 272 +; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 ; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 288 ; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 @@ -2536,7 +2273,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 304 ; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 @@ -2544,7 +2281,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 320 ; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 @@ -2552,15 +2289,15 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 336 -; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 352 -; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 368 -; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 384 -; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -3039,7 +2776,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: li r3, 384 ; CHECK-NEXT: xxswapd vs4, vs4 ; CHECK-NEXT: stxvd2x vs4, 0, r30 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 368 ; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload @@ -3057,7 +2794,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 352 ; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload @@ -3065,7 +2802,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 336 ; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload @@ -3073,7 +2810,7 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 320 ; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload @@ -3081,23 +2818,23 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 304 ; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 288 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 272 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 256 -; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 224 -; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 208 -; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 688 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3107,95 +2844,62 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: .cfi_def_cfa_offset 480 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset r30, -160 -; FAST-NEXT: .cfi_offset f14, -144 -; FAST-NEXT: .cfi_offset f15, -136 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 -; FAST-NEXT: .cfi_offset v20, -352 -; FAST-NEXT: .cfi_offset v21, -336 -; FAST-NEXT: .cfi_offset v22, -320 -; FAST-NEXT: .cfi_offset v23, -304 -; FAST-NEXT: .cfi_offset v24, -288 -; FAST-NEXT: .cfi_offset v25, -272 -; FAST-NEXT: .cfi_offset v26, -256 -; FAST-NEXT: .cfi_offset v27, -240 -; FAST-NEXT: .cfi_offset v28, -224 -; FAST-NEXT: .cfi_offset v29, -208 -; FAST-NEXT: .cfi_offset v30, -192 -; FAST-NEXT: .cfi_offset v31, -176 ; FAST-NEXT: li r4, 128 +; FAST-NEXT: std r0, 496(r1) ; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r3 ; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 144 +; FAST-NEXT: fmr f16, f4 ; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 160 +; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 176 ; FAST-NEXT: xxlor v22, f3, f3 +; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill ; FAST-NEXT: fmr f29, f9 ; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 192 ; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 208 -; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 224 ; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 240 ; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 256 ; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 272 ; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 288 ; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 304 ; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 44 ; FAST-NEXT: xxlor v31, f6, f6 ; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill @@ -3624,30 +3328,30 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { ; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 288 ; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 272 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 256 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 240 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 224 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 208 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 192 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 176 -; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 160 -; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 144 -; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 128 -; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 480 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -3657,14 +3361,12 @@ define <32 x i64> @llrint_v32i64_v32f16(<32 x half> %x) { } declare <32 x i64> @llvm.llrint.v32i64.v32f16(<32 x half>) -define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { +define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl llrintf ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -3677,8 +3379,6 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -3696,15 +3396,13 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float>) -define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { +define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) nounwind { ; BE-LABEL: llrint_v2i64_v2f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -144(r1) -; BE-NEXT: std r0, 160(r1) -; BE-NEXT: .cfi_def_cfa_offset 144 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 160(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl llrintf @@ -3725,14 +3423,11 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -3744,7 +3439,7 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3769,15 +3464,13 @@ define <2 x i64> @llrint_v2i64_v2f32(<2 x float> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f32(<2 x float>) -define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { +define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl llrintf @@ -3808,17 +3501,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 96(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -3841,9 +3530,9 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3879,15 +3568,13 @@ define <4 x i64> @llrint_v4i64_v4f32(<4 x float> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f32(<4 x float>) -define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { +define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 224(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -3940,24 +3627,18 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -4003,13 +3684,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { ; CHECK-NEXT: vmr v2, v29 ; CHECK-NEXT: vmr v4, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4067,15 +3748,13 @@ define <8 x i64> @llrint_v8i64_v8f32(<8 x float> %x) { } declare <8 x i64> @llvm.llrint.v8i64.v8f32(<8 x float>) -define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { +define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) nounwind { ; BE-LABEL: llrint_v16i64_v16f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 320(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -4172,38 +3851,28 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -176(r1) -; CHECK-NEXT: std r0, 192(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v24, -128 -; CHECK-NEXT: .cfi_offset v25, -112 -; CHECK-NEXT: .cfi_offset v26, -96 -; CHECK-NEXT: .cfi_offset v27, -80 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 192(r1) ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v26, v3 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: vmr v28, v4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v29, v2 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl llrintf ; CHECK-NEXT: nop @@ -4295,21 +3964,21 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-NEXT: vmr v6, v25 ; CHECK-NEXT: vmr v8, v24 ; CHECK-NEXT: xxmrghd v9, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 176 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4411,14 +4080,12 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.llrint.v16i64.v16f32(<16 x float>) -define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { +define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) nounwind { ; BE-LABEL: llrint_v1i64_v1f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl llrint ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -4431,8 +4098,6 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl llrint ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -4450,16 +4115,13 @@ define <1 x i64> @llrint_v1i64_v1f64(<1 x double> %x) { } declare <1 x i64> @llvm.llrint.v1i64.v1f64(<1 x double>) -define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { +define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) nounwind { ; BE-LABEL: llrint_v2i64_v2f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v31, v2 ; BE-NEXT: xxlor f1, v31, v31 @@ -4483,12 +4145,9 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: xxlor f1, v31, v31 ; CHECK-NEXT: bl llrint @@ -4500,7 +4159,7 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4523,17 +4182,13 @@ define <2 x i64> @llrint_v2i64_v2f64(<2 x double> %x) { } declare <2 x i64> @llvm.llrint.v2i64.v2f64(<2 x double>) -define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { +define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) nounwind { ; BE-LABEL: llrint_v4i64_v4f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -192(r1) -; BE-NEXT: std r0, 208(r1) -; BE-NEXT: .cfi_def_cfa_offset 192 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 208(r1) ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v30, v2 ; BE-NEXT: li r3, 176 @@ -4572,17 +4227,13 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xxlor f1, v30, v30 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl llrint ; CHECK-NEXT: nop @@ -4603,9 +4254,9 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4637,25 +4288,19 @@ define <4 x i64> @llrint_v4i64_v4f64(<4 x double> %x) { } declare <4 x i64> @llvm.llrint.v4i64.v4f64(<4 x double>) -define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { +define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) nounwind { ; BE-LABEL: llrint_v8i64_v8f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -256(r1) -; BE-NEXT: std r0, 272(r1) -; BE-NEXT: .cfi_def_cfa_offset 256 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v28, -64 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 272(r1) ; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 208 ; BE-NEXT: vmr v28, v2 -; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 224 +; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: vmr v29, v3 ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 240 @@ -4718,25 +4363,19 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v28, v2 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: xxlor f1, v28, v28 ; CHECK-NEXT: vmr v29, v3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl llrint ; CHECK-NEXT: nop @@ -4777,13 +4416,13 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; CHECK-NEXT: vmr v3, v29 ; CHECK-NEXT: vmr v2, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4832,3 +4471,536 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.llrint.v8i64.v8f64(<8 x double>) + +define <1 x i64> @llrint_v1i64_v1f128(<1 x fp128> %x) nounwind { +; BE-LABEL: llrint_v1i64_v1f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v1i64_v1f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v1i64_v1f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -32(r1) +; FAST-NEXT: std r0, 48(r1) +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: addi r1, r1, 32 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.llrint.v1i64.v1f128(<1 x fp128>) + +define <2 x i64> @llrint_v2i64_v2f128(<2 x fp128> %x) nounwind { +; BE-LABEL: llrint_v2i64_v2f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v2i64_v2f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, vs0, v30 +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v2i64_v2f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -80(r1) +; FAST-NEXT: li r3, 48 +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v2, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 80 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.llrint.v2i64.v2f128(<2 x fp128>) + +define <4 x i64> @llrint_v4i64_v4f128(<4 x fp128> %x) nounwind { +; BE-LABEL: llrint_v4i64_v4f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 176 +; BE-NEXT: vmr v29, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 192 +; BE-NEXT: vmr v30, v4 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v5 +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 176 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v4i64_v4f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxmrghd v3, vs0, v30 +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v4i64_v4f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -112(r1) +; FAST-NEXT: li r3, 48 +; FAST-NEXT: std r0, 128(r1) +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v29, v3 +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v30, v4 +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v5 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxmrghd v3, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 112 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.llrint.v4i64.v4f128(<4 x fp128>) + +define <8 x i64> @llrint_v8i64_v8f128(<8 x fp128> %x) nounwind { +; BE-LABEL: llrint_v8i64_v8f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 208 +; BE-NEXT: vmr v25, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 224 +; BE-NEXT: vmr v26, v4 +; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 240 +; BE-NEXT: vmr v27, v5 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 256 +; BE-NEXT: vmr v28, v6 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 272 +; BE-NEXT: vmr v29, v7 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 288 +; BE-NEXT: vmr v30, v8 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v9 +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v25 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v27 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v26 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v28 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl llrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: li r3, 288 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 272 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 256 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 240 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 224 +; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 208 +; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: llrint_v8i64_v8f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -176(r1) +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v25, v3 +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v26, v4 +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: vmr v27, v5 +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: vmr v28, v6 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v29, v7 +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v30, v8 +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v9 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v26 +; CHECK-NEXT: xxmrghd v25, vs0, v24 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: xxmrghd v27, vs0, v26 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl llrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v3, v27 +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: xxmrghd v5, vs0, v30 +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 176 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: llrint_v8i64_v8f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -176(r1) +; FAST-NEXT: li r3, 48 +; FAST-NEXT: std r0, 192(r1) +; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v25, v3 +; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v26, v4 +; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 112 +; FAST-NEXT: vmr v27, v5 +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 128 +; FAST-NEXT: vmr v28, v6 +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v29, v7 +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v30, v8 +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v9 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: mtvsrd v24, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v26 +; FAST-NEXT: xxmrghd v25, vs0, v24 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v28 +; FAST-NEXT: xxmrghd v27, vs0, v26 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl llrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v4, v29 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v3, v27 +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: xxmrghd v5, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 176 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.llrint.v8i64.v8f128(<8 x fp128>) diff --git a/llvm/test/CodeGen/PowerPC/vector-lrint.ll b/llvm/test/CodeGen/PowerPC/vector-lrint.ll index c2576d4..f437536 100644 --- a/llvm/test/CodeGen/PowerPC/vector-lrint.ll +++ b/llvm/test/CodeGen/PowerPC/vector-lrint.ll @@ -1,4 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; FIXME: crash "Input type needs to be promoted!" +; SKIP: sed 's/iXLen/i32/g' %s | llc -ppc-asm-full-reg-names \ +; SKIP: -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \ +; SKIP: -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32 ; RUN: sed 's/iXLen/i32/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE @@ -9,6 +13,10 @@ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-unknown \ ; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ ; RUN: FileCheck %s --check-prefixes=FAST +; FIXME: crash "Input type needs to be promoted!" +; SKIP: sed 's/iXLen/i64/g' %s | llc -ppc-asm-full-reg-names \ +; SKIP: -ppc-vsr-nums-as-vr -mtriple=powerpc-unknown-unknown \ +; SKIP: -verify-machineinstrs | FileCheck %s --check-prefixes=PPC32 ; RUN: sed 's/iXLen/i64/g' %s | llc -mcpu=pwr7 -ppc-asm-full-reg-names \ ; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-unknown \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=BE @@ -20,14 +28,12 @@ ; RUN: -verify-machineinstrs --enable-unsafe-fp-math | \ ; RUN: FileCheck %s --check-prefixes=FAST -define <1 x i64> @lrint_v1f16(<1 x half> %x) { +define <1 x i64> @lrint_v1f16(<1 x half> %x) nounwind { ; BE-LABEL: lrint_v1f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: clrldi r3, r3, 48 @@ -45,8 +51,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: clrldi r3, r3, 48 @@ -64,8 +68,6 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) { ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -32(r1) ; FAST-NEXT: std r0, 48(r1) -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: .cfi_offset lr, 16 ; FAST-NEXT: bl __truncsfhf2 ; FAST-NEXT: nop ; FAST-NEXT: clrldi r3, r3, 48 @@ -82,16 +84,12 @@ define <1 x i64> @lrint_v1f16(<1 x half> %x) { } declare <1 x i64> @llvm.lrint.v1i64.v1f16(<1 x half>) -define <2 x i64> @lrint_v2f16(<2 x half> %x) { +define <2 x i64> @lrint_v2f16(<2 x half> %x) nounwind { ; BE-LABEL: lrint_v2f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) ; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r30, -24 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f31, 152(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f1 ; BE-NEXT: fmr f1, f2 @@ -129,17 +127,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -96(r1) -; CHECK-NEXT: std r0, 112(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r30, -24 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v31, -48 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 112(r1) ; CHECK-NEXT: std r30, 72(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f31, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f31 @@ -164,7 +157,7 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; CHECK-NEXT: lfd f31, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 72(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 96 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -173,10 +166,6 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { ; FAST-LABEL: lrint_v2f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 48 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill ; FAST-NEXT: stdu r1, -48(r1) @@ -213,20 +202,12 @@ define <2 x i64> @lrint_v2f16(<2 x half> %x) { } declare <2 x i64> @llvm.lrint.v2i64.v2f16(<2 x half>) -define <4 x i64> @lrint_v4f16(<4 x half> %x) { +define <4 x i64> @lrint_v4f16(<4 x half> %x) nounwind { ; BE-LABEL: lrint_v4f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) ; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r28, -56 -; BE-NEXT: .cfi_offset r29, -48 -; BE-NEXT: .cfi_offset r30, -40 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f29, 184(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f29, f1 ; BE-NEXT: fmr f1, f2 @@ -300,18 +281,8 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -144(r1) -; CHECK-NEXT: std r0, 160(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 144 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r28, -56 -; CHECK-NEXT: .cfi_offset r29, -48 -; CHECK-NEXT: .cfi_offset r30, -40 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v30, -96 -; CHECK-NEXT: .cfi_offset v31, -80 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 160(r1) ; CHECK-NEXT: std r28, 88(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 96(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 104(r1) # 8-byte Folded Spill @@ -319,11 +290,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK-NEXT: fmr f29, f2 ; CHECK-NEXT: stfd f30, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: stfd f31, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f29 @@ -376,11 +347,11 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; CHECK-NEXT: lfd f29, 120(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 104(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 96(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: ld r28, 88(r1) # 8-byte Folded Reload ; CHECK-NEXT: xxmrghd v3, vs0, v30 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 144 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -389,12 +360,6 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { ; FAST-LABEL: lrint_v4f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 64 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f28, -32(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, -24(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill @@ -458,28 +423,12 @@ define <4 x i64> @lrint_v4f16(<4 x half> %x) { } declare <4 x i64> @llvm.lrint.v4i64.v4f16(<4 x half>) -define <8 x i64> @lrint_v8f16(<8 x half> %x) { +define <8 x i64> @lrint_v8f16(<8 x half> %x) nounwind { ; BE-LABEL: lrint_v8f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) ; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r24, -120 -; BE-NEXT: .cfi_offset r25, -112 -; BE-NEXT: .cfi_offset r26, -104 -; BE-NEXT: .cfi_offset r27, -96 -; BE-NEXT: .cfi_offset r28, -88 -; BE-NEXT: .cfi_offset r29, -80 -; BE-NEXT: .cfi_offset r30, -72 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f25, 248(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f25, f1 ; BE-NEXT: fmr f1, f2 @@ -625,44 +574,24 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -240(r1) -; CHECK-NEXT: std r0, 256(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 240 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r24, -120 -; CHECK-NEXT: .cfi_offset r25, -112 -; CHECK-NEXT: .cfi_offset r26, -104 -; CHECK-NEXT: .cfi_offset r27, -96 -; CHECK-NEXT: .cfi_offset r28, -88 -; CHECK-NEXT: .cfi_offset r29, -80 -; CHECK-NEXT: .cfi_offset r30, -72 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v28, -192 -; CHECK-NEXT: .cfi_offset v29, -176 -; CHECK-NEXT: .cfi_offset v30, -160 -; CHECK-NEXT: .cfi_offset v31, -144 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 256(r1) ; CHECK-NEXT: std r24, 120(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 128(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 136(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 144(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 152(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 160(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r30, 168(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f25, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f2 ; CHECK-NEXT: stfd f26, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f3 ; CHECK-NEXT: stfd f27, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: stfd f28, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f5 @@ -670,11 +599,11 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: fmr f29, f6 ; CHECK-NEXT: stfd f30, 224(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f7 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f31, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f8 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f25 @@ -777,7 +706,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 224(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 216(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: lfd f28, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 200(r1) # 8-byte Folded Reload @@ -785,7 +714,7 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: lfd f25, 184(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 168(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 160(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r28, 152(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 144(r1) # 8-byte Folded Reload @@ -793,9 +722,9 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; CHECK-NEXT: ld r26, 136(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 128(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 120(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 240 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -804,16 +733,6 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { ; FAST-LABEL: lrint_v8f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 96 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f24, -64(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, -56(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, -48(r1) # 8-byte Folded Spill @@ -931,44 +850,12 @@ define <8 x i64> @lrint_v8f16(<8 x half> %x) { } declare <8 x i64> @llvm.lrint.v8i64.v8f16(<8 x half>) -define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { +define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) nounwind { ; BE-LABEL: lrint_v16i64_v16f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -496(r1) ; BE-NEXT: std r0, 512(r1) -; BE-NEXT: .cfi_def_cfa_offset 496 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r16, -248 -; BE-NEXT: .cfi_offset r17, -240 -; BE-NEXT: .cfi_offset r18, -232 -; BE-NEXT: .cfi_offset r19, -224 -; BE-NEXT: .cfi_offset r20, -216 -; BE-NEXT: .cfi_offset r21, -208 -; BE-NEXT: .cfi_offset r22, -200 -; BE-NEXT: .cfi_offset r23, -192 -; BE-NEXT: .cfi_offset r24, -184 -; BE-NEXT: .cfi_offset r25, -176 -; BE-NEXT: .cfi_offset r26, -168 -; BE-NEXT: .cfi_offset r27, -160 -; BE-NEXT: .cfi_offset r28, -152 -; BE-NEXT: .cfi_offset r29, -144 -; BE-NEXT: .cfi_offset r30, -136 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 400(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1255,105 +1142,65 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -432(r1) -; CHECK-NEXT: std r0, 448(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 432 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r16, -248 -; CHECK-NEXT: .cfi_offset r17, -240 -; CHECK-NEXT: .cfi_offset r18, -232 -; CHECK-NEXT: .cfi_offset r19, -224 -; CHECK-NEXT: .cfi_offset r20, -216 -; CHECK-NEXT: .cfi_offset r21, -208 -; CHECK-NEXT: .cfi_offset r22, -200 -; CHECK-NEXT: .cfi_offset r23, -192 -; CHECK-NEXT: .cfi_offset r24, -184 -; CHECK-NEXT: .cfi_offset r25, -176 -; CHECK-NEXT: .cfi_offset r26, -168 -; CHECK-NEXT: .cfi_offset r27, -160 -; CHECK-NEXT: .cfi_offset r28, -152 -; CHECK-NEXT: .cfi_offset r29, -144 -; CHECK-NEXT: .cfi_offset r30, -136 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v24, -384 -; CHECK-NEXT: .cfi_offset v25, -368 -; CHECK-NEXT: .cfi_offset v26, -352 -; CHECK-NEXT: .cfi_offset v27, -336 -; CHECK-NEXT: .cfi_offset v28, -320 -; CHECK-NEXT: .cfi_offset v29, -304 -; CHECK-NEXT: .cfi_offset v30, -288 -; CHECK-NEXT: .cfi_offset v31, -272 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 448(r1) ; CHECK-NEXT: std r16, 184(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 192(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 200(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 208(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r20, 216(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 224(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: std r22, 232(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 240(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 248(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 256(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r26, 264(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 272(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: std r28, 280(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 288(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 296(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 312(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 320(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f19, 328(r1) # 8-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f20, 336(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: stfd f21, 344(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 352(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f23, 360(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: stfd f24, 368(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 376(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f26, 384(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: stfd f27, 392(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f29, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill -; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: stfd f30, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -1556,7 +1403,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v4, v29 ; CHECK-NEXT: lfd f30, 416(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f29, 408(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v5, v28 ; CHECK-NEXT: vmr v6, v27 @@ -1564,7 +1411,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: vmr v8, v25 ; CHECK-NEXT: lfd f28, 400(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f27, 392(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: lfd f26, 384(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f25, 376(r1) # 8-byte Folded Reload @@ -1572,7 +1419,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f24, 368(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f23, 360(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f22, 352(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 ; CHECK-NEXT: lfd f21, 344(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r30, 296(r1) # 8-byte Folded Reload @@ -1580,7 +1427,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: lfd f19, 328(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 288(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 280(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: lfd f18, 320(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r27, 272(r1) # 8-byte Folded Reload @@ -1588,7 +1435,7 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r26, 264(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, 256(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r24, 248(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: ld r23, 240(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 232(r1) # 8-byte Folded Reload @@ -1596,13 +1443,13 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; CHECK-NEXT: ld r20, 216(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r19, 208(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r18, 200(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: ld r17, 192(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 184(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 432 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -1611,24 +1458,6 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { ; FAST-LABEL: lrint_v16i64_v16f16: ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 -; FAST-NEXT: .cfi_def_cfa_offset 160 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 ; FAST-NEXT: stfd f16, -128(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f17, -120(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, -112(r1) # 8-byte Folded Spill @@ -1852,50 +1681,12 @@ define <16 x i64> @lrint_v16i64_v16f16(<16 x half> %x) { } declare <16 x i64> @llvm.lrint.v16i64.v16f16(<16 x half>) -define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { +define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) nounwind { ; BE-LABEL: lrint_v32i64_v32f16: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -864(r1) ; BE-NEXT: std r0, 880(r1) -; BE-NEXT: .cfi_def_cfa_offset 864 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset r14, -288 -; BE-NEXT: .cfi_offset r15, -280 -; BE-NEXT: .cfi_offset r16, -272 -; BE-NEXT: .cfi_offset r17, -264 -; BE-NEXT: .cfi_offset r18, -256 -; BE-NEXT: .cfi_offset r19, -248 -; BE-NEXT: .cfi_offset r20, -240 -; BE-NEXT: .cfi_offset r21, -232 -; BE-NEXT: .cfi_offset r22, -224 -; BE-NEXT: .cfi_offset r23, -216 -; BE-NEXT: .cfi_offset r24, -208 -; BE-NEXT: .cfi_offset r25, -200 -; BE-NEXT: .cfi_offset r26, -192 -; BE-NEXT: .cfi_offset r27, -184 -; BE-NEXT: .cfi_offset r28, -176 -; BE-NEXT: .cfi_offset r29, -168 -; BE-NEXT: .cfi_offset r30, -160 -; BE-NEXT: .cfi_offset r31, -152 -; BE-NEXT: .cfi_offset f14, -144 -; BE-NEXT: .cfi_offset f15, -136 -; BE-NEXT: .cfi_offset f16, -128 -; BE-NEXT: .cfi_offset f17, -120 -; BE-NEXT: .cfi_offset f18, -112 -; BE-NEXT: .cfi_offset f19, -104 -; BE-NEXT: .cfi_offset f20, -96 -; BE-NEXT: .cfi_offset f21, -88 -; BE-NEXT: .cfi_offset f22, -80 -; BE-NEXT: .cfi_offset f23, -72 -; BE-NEXT: .cfi_offset f24, -64 -; BE-NEXT: .cfi_offset f25, -56 -; BE-NEXT: .cfi_offset f26, -48 -; BE-NEXT: .cfi_offset f27, -40 -; BE-NEXT: .cfi_offset f28, -32 -; BE-NEXT: .cfi_offset f29, -24 -; BE-NEXT: .cfi_offset f30, -16 -; BE-NEXT: .cfi_offset f31, -8 ; BE-NEXT: stfd f20, 768(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f20, f1 ; BE-NEXT: fmr f1, f2 @@ -1935,6 +1726,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: stfd f30, 848(r1) # 8-byte Folded Spill ; BE-NEXT: stfd f31, 856(r1) # 8-byte Folded Spill ; BE-NEXT: fmr f31, f13 +; BE-NEXT: mr r30, r3 ; BE-NEXT: fmr f29, f12 ; BE-NEXT: fmr f30, f11 ; BE-NEXT: fmr f28, f10 @@ -1945,7 +1737,6 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; BE-NEXT: fmr f23, f5 ; BE-NEXT: fmr f22, f4 ; BE-NEXT: fmr f21, f3 -; BE-NEXT: mr r30, r3 ; BE-NEXT: bl __truncsfhf2 ; BE-NEXT: nop ; BE-NEXT: fmr f1, f20 @@ -2448,98 +2239,48 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -688(r1) -; CHECK-NEXT: std r0, 704(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 688 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r14, -288 -; CHECK-NEXT: .cfi_offset r15, -280 -; CHECK-NEXT: .cfi_offset r16, -272 -; CHECK-NEXT: .cfi_offset r17, -264 -; CHECK-NEXT: .cfi_offset r18, -256 -; CHECK-NEXT: .cfi_offset r19, -248 -; CHECK-NEXT: .cfi_offset r20, -240 -; CHECK-NEXT: .cfi_offset r21, -232 -; CHECK-NEXT: .cfi_offset r22, -224 -; CHECK-NEXT: .cfi_offset r23, -216 -; CHECK-NEXT: .cfi_offset r24, -208 -; CHECK-NEXT: .cfi_offset r25, -200 -; CHECK-NEXT: .cfi_offset r26, -192 -; CHECK-NEXT: .cfi_offset r27, -184 -; CHECK-NEXT: .cfi_offset r28, -176 -; CHECK-NEXT: .cfi_offset r29, -168 -; CHECK-NEXT: .cfi_offset r30, -160 -; CHECK-NEXT: .cfi_offset r31, -152 -; CHECK-NEXT: .cfi_offset f14, -144 -; CHECK-NEXT: .cfi_offset f15, -136 -; CHECK-NEXT: .cfi_offset f16, -128 -; CHECK-NEXT: .cfi_offset f17, -120 -; CHECK-NEXT: .cfi_offset f18, -112 -; CHECK-NEXT: .cfi_offset f19, -104 -; CHECK-NEXT: .cfi_offset f20, -96 -; CHECK-NEXT: .cfi_offset f21, -88 -; CHECK-NEXT: .cfi_offset f22, -80 -; CHECK-NEXT: .cfi_offset f23, -72 -; CHECK-NEXT: .cfi_offset f24, -64 -; CHECK-NEXT: .cfi_offset f25, -56 -; CHECK-NEXT: .cfi_offset f26, -48 -; CHECK-NEXT: .cfi_offset f27, -40 -; CHECK-NEXT: .cfi_offset f28, -32 -; CHECK-NEXT: .cfi_offset f29, -24 -; CHECK-NEXT: .cfi_offset f30, -16 -; CHECK-NEXT: .cfi_offset f31, -8 -; CHECK-NEXT: .cfi_offset v20, -480 -; CHECK-NEXT: .cfi_offset v21, -464 -; CHECK-NEXT: .cfi_offset v22, -448 -; CHECK-NEXT: .cfi_offset v23, -432 -; CHECK-NEXT: .cfi_offset v24, -416 -; CHECK-NEXT: .cfi_offset v25, -400 -; CHECK-NEXT: .cfi_offset v26, -384 -; CHECK-NEXT: .cfi_offset v27, -368 -; CHECK-NEXT: .cfi_offset v28, -352 -; CHECK-NEXT: .cfi_offset v29, -336 -; CHECK-NEXT: .cfi_offset v30, -320 -; CHECK-NEXT: .cfi_offset v31, -304 ; CHECK-NEXT: li r4, 208 +; CHECK-NEXT: std r0, 704(r1) ; CHECK-NEXT: std r14, 400(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r15, 408(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r16, 416(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r17, 424(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r18, 432(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r19, 440(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 224 +; CHECK-NEXT: std r20, 448(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r21, 456(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r22, 464(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r23, 472(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r24, 480(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r25, 488(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 240 +; CHECK-NEXT: std r26, 496(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r27, 504(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r28, 512(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, 520(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r30, 528(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 256 +; CHECK-NEXT: std r31, 536(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f14, 544(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f15, 552(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f16, 560(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f17, 568(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f18, 576(r1) # 8-byte Folded Spill -; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill -; CHECK-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 272 +; CHECK-NEXT: stfd f19, 584(r1) # 8-byte Folded Spill ; CHECK-NEXT: stfd f20, 592(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f20, f2 ; CHECK-NEXT: stfd f21, 600(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f21, f3 ; CHECK-NEXT: stfd f22, 608(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f22, f4 -; CHECK-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 288 ; CHECK-NEXT: stfd f23, 616(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f23, f5 @@ -2547,7 +2288,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f24, f6 ; CHECK-NEXT: stfd f25, 632(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f25, f7 -; CHECK-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 304 ; CHECK-NEXT: stfd f26, 640(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f26, f8 @@ -2555,7 +2296,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f27, f9 ; CHECK-NEXT: stfd f28, 656(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f28, f10 -; CHECK-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 320 ; CHECK-NEXT: stfd f29, 664(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f29, f11 @@ -2563,15 +2304,15 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: fmr f30, f12 ; CHECK-NEXT: stfd f31, 680(r1) # 8-byte Folded Spill ; CHECK-NEXT: fmr f31, f13 -; CHECK-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 336 -; CHECK-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 352 -; CHECK-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 368 -; CHECK-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: li r4, 384 -; CHECK-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; CHECK-NEXT: bl __truncsfhf2 ; CHECK-NEXT: nop ; CHECK-NEXT: fmr f1, f20 @@ -3050,7 +2791,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: li r3, 384 ; CHECK-NEXT: xxswapd vs4, vs4 ; CHECK-NEXT: stxvd2x vs4, 0, r30 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 368 ; CHECK-NEXT: lfd f31, 680(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f30, 672(r1) # 8-byte Folded Reload @@ -3068,7 +2809,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: lfd f18, 576(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f17, 568(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f16, 560(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 352 ; CHECK-NEXT: lfd f15, 552(r1) # 8-byte Folded Reload ; CHECK-NEXT: lfd f14, 544(r1) # 8-byte Folded Reload @@ -3076,7 +2817,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r30, 528(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, 520(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, 512(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 336 ; CHECK-NEXT: ld r27, 504(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r26, 496(r1) # 8-byte Folded Reload @@ -3084,7 +2825,7 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r24, 480(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r23, 472(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r22, 464(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 320 ; CHECK-NEXT: ld r21, 456(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r20, 448(r1) # 8-byte Folded Reload @@ -3092,23 +2833,23 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; CHECK-NEXT: ld r18, 432(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r17, 424(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r16, 416(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 304 ; CHECK-NEXT: ld r15, 408(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r14, 400(r1) # 8-byte Folded Reload -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 288 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 272 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 256 -; CHECK-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 240 -; CHECK-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 224 -; CHECK-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 208 -; CHECK-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 688 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3118,95 +2859,62 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; FAST: # %bb.0: ; FAST-NEXT: mflr r0 ; FAST-NEXT: stdu r1, -480(r1) -; FAST-NEXT: std r0, 496(r1) -; FAST-NEXT: .cfi_def_cfa_offset 480 -; FAST-NEXT: .cfi_offset lr, 16 -; FAST-NEXT: .cfi_offset r30, -160 -; FAST-NEXT: .cfi_offset f14, -144 -; FAST-NEXT: .cfi_offset f15, -136 -; FAST-NEXT: .cfi_offset f16, -128 -; FAST-NEXT: .cfi_offset f17, -120 -; FAST-NEXT: .cfi_offset f18, -112 -; FAST-NEXT: .cfi_offset f19, -104 -; FAST-NEXT: .cfi_offset f20, -96 -; FAST-NEXT: .cfi_offset f21, -88 -; FAST-NEXT: .cfi_offset f22, -80 -; FAST-NEXT: .cfi_offset f23, -72 -; FAST-NEXT: .cfi_offset f24, -64 -; FAST-NEXT: .cfi_offset f25, -56 -; FAST-NEXT: .cfi_offset f26, -48 -; FAST-NEXT: .cfi_offset f27, -40 -; FAST-NEXT: .cfi_offset f28, -32 -; FAST-NEXT: .cfi_offset f29, -24 -; FAST-NEXT: .cfi_offset f30, -16 -; FAST-NEXT: .cfi_offset f31, -8 -; FAST-NEXT: .cfi_offset v20, -352 -; FAST-NEXT: .cfi_offset v21, -336 -; FAST-NEXT: .cfi_offset v22, -320 -; FAST-NEXT: .cfi_offset v23, -304 -; FAST-NEXT: .cfi_offset v24, -288 -; FAST-NEXT: .cfi_offset v25, -272 -; FAST-NEXT: .cfi_offset v26, -256 -; FAST-NEXT: .cfi_offset v27, -240 -; FAST-NEXT: .cfi_offset v28, -224 -; FAST-NEXT: .cfi_offset v29, -208 -; FAST-NEXT: .cfi_offset v30, -192 -; FAST-NEXT: .cfi_offset v31, -176 ; FAST-NEXT: li r4, 128 +; FAST-NEXT: std r0, 496(r1) ; FAST-NEXT: std r30, 320(r1) # 8-byte Folded Spill +; FAST-NEXT: mr r30, r3 ; FAST-NEXT: stfd f14, 336(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f15, 344(r1) # 8-byte Folded Spill +; FAST-NEXT: fmr f14, f5 ; FAST-NEXT: stfd f16, 352(r1) # 8-byte Folded Spill -; FAST-NEXT: fmr f16, f4 -; FAST-NEXT: mr r30, r3 -; FAST-NEXT: stvx v20, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v20, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 144 +; FAST-NEXT: fmr f16, f4 ; FAST-NEXT: stfd f17, 360(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f18, 368(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f19, 376(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f20, 384(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f21, 392(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v21, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v21, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 160 +; FAST-NEXT: stfd f22, 400(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f23, 408(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f24, 416(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f25, 424(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f26, 432(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f27, 440(r1) # 8-byte Folded Spill -; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v22, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v22, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 176 ; FAST-NEXT: xxlor v22, f3, f3 +; FAST-NEXT: stfd f28, 448(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f29, 456(r1) # 8-byte Folded Spill ; FAST-NEXT: fmr f29, f9 ; FAST-NEXT: stfd f30, 464(r1) # 8-byte Folded Spill ; FAST-NEXT: stfd f31, 472(r1) # 8-byte Folded Spill -; FAST-NEXT: stvx v23, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v23, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 192 ; FAST-NEXT: xxlor v23, f2, f2 -; FAST-NEXT: stvx v24, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v24, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 208 -; FAST-NEXT: stvx v25, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v25, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 224 ; FAST-NEXT: xxlor v25, f13, f13 -; FAST-NEXT: stvx v26, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v26, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 240 ; FAST-NEXT: xxlor v26, f12, f12 -; FAST-NEXT: stvx v27, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v27, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 256 ; FAST-NEXT: xxlor v27, f11, f11 -; FAST-NEXT: stvx v28, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v28, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 272 ; FAST-NEXT: xxlor v28, f10, f10 -; FAST-NEXT: stvx v29, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v29, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 288 ; FAST-NEXT: xxlor v29, f8, f8 -; FAST-NEXT: stvx v30, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v30, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 304 ; FAST-NEXT: xxlor v30, f7, f7 -; FAST-NEXT: stvx v31, r1, r4 # 16-byte Folded Spill +; FAST-NEXT: stxvd2x v31, r1, r4 # 16-byte Folded Spill ; FAST-NEXT: li r4, 44 ; FAST-NEXT: xxlor v31, f6, f6 ; FAST-NEXT: stxsspx f1, r1, r4 # 4-byte Folded Spill @@ -3635,30 +3343,30 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { ; FAST-NEXT: lfd f16, 352(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f15, 344(r1) # 8-byte Folded Reload ; FAST-NEXT: lfd f14, 336(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 288 ; FAST-NEXT: ld r30, 320(r1) # 8-byte Folded Reload -; FAST-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 272 -; FAST-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 256 -; FAST-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 240 -; FAST-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 224 -; FAST-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 208 -; FAST-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 192 -; FAST-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 176 -; FAST-NEXT: lvx v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 160 -; FAST-NEXT: lvx v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 144 -; FAST-NEXT: lvx v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: li r3, 128 -; FAST-NEXT: lvx v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload ; FAST-NEXT: addi r1, r1, 480 ; FAST-NEXT: ld r0, 16(r1) ; FAST-NEXT: mtlr r0 @@ -3668,14 +3376,12 @@ define <32 x i64> @lrint_v32i64_v32f16(<32 x half> %x) { } declare <32 x i64> @llvm.lrint.v32i64.v32f16(<32 x half>) -define <1 x i64> @lrint_v1f32(<1 x float> %x) { +define <1 x i64> @lrint_v1f32(<1 x float> %x) nounwind { ; BE-LABEL: lrint_v1f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl lrintf ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -3688,8 +3394,6 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -3707,15 +3411,13 @@ define <1 x i64> @lrint_v1f32(<1 x float> %x) { } declare <1 x i64> @llvm.lrint.v1i64.v1f32(<1 x float>) -define <2 x i64> @lrint_v2f32(<2 x float> %x) { +define <2 x i64> @lrint_v2f32(<2 x float> %x) nounwind { ; BE-LABEL: lrint_v2f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -144(r1) -; BE-NEXT: std r0, 160(r1) -; BE-NEXT: .cfi_def_cfa_offset 144 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 160(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl lrintf @@ -3736,14 +3438,11 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 80(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -3755,7 +3454,7 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, vs0, v31 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3780,15 +3479,13 @@ define <2 x i64> @lrint_v2f32(<2 x float> %x) { } declare <2 x i64> @llvm.lrint.v2i64.v2f32(<2 x float>) -define <4 x i64> @lrint_v4f32(<4 x float> %x) { +define <4 x i64> @lrint_v4f32(<4 x float> %x) nounwind { ; BE-LABEL: lrint_v4f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: lfs f1, 116(r1) ; BE-NEXT: bl lrintf @@ -3819,17 +3516,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 96(r1) ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -3852,9 +3545,9 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -3890,15 +3583,13 @@ define <4 x i64> @lrint_v4f32(<4 x float> %x) { } declare <4 x i64> @llvm.lrint.v4i64.v4f32(<4 x float>) -define <8 x i64> @lrint_v8f32(<8 x float> %x) { +define <8 x i64> @lrint_v8f32(<8 x float> %x) nounwind { ; BE-LABEL: lrint_v8f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -208(r1) -; BE-NEXT: std r0, 224(r1) -; BE-NEXT: .cfi_def_cfa_offset 208 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 224(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -3951,24 +3642,18 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v2 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -4014,13 +3699,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) { ; CHECK-NEXT: vmr v2, v29 ; CHECK-NEXT: vmr v4, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4078,15 +3763,13 @@ define <8 x i64> @lrint_v8f32(<8 x float> %x) { } declare <8 x i64> @llvm.lrint.v8i64.v8f32(<8 x float>) -define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { +define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) nounwind { ; BE-LABEL: lrint_v16i64_v16f32: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -304(r1) -; BE-NEXT: std r0, 320(r1) -; BE-NEXT: .cfi_def_cfa_offset 304 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: addi r3, r1, 112 +; BE-NEXT: std r0, 320(r1) ; BE-NEXT: stxvw4x v2, 0, r3 ; BE-NEXT: addi r3, r1, 128 ; BE-NEXT: stxvw4x v3, 0, r3 @@ -4183,38 +3866,28 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -176(r1) -; CHECK-NEXT: std r0, 192(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v24, -128 -; CHECK-NEXT: .cfi_offset v25, -112 -; CHECK-NEXT: .cfi_offset v26, -96 -; CHECK-NEXT: .cfi_offset v27, -80 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 192(r1) ; CHECK-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-NEXT: stvx v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xscvspdpn f1, vs0 -; CHECK-NEXT: stvx v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: stvx v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v26, v3 -; CHECK-NEXT: stvx v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 128 ; CHECK-NEXT: vmr v28, v4 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 144 ; CHECK-NEXT: vmr v29, v2 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 160 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl lrintf ; CHECK-NEXT: nop @@ -4306,21 +3979,21 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { ; CHECK-NEXT: vmr v6, v25 ; CHECK-NEXT: vmr v8, v24 ; CHECK-NEXT: xxmrghd v9, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 144 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 128 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 112 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 96 -; CHECK-NEXT: lvx v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 176 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4422,14 +4095,12 @@ define <16 x i64> @lrint_v16i64_v16f32(<16 x float> %x) { } declare <16 x i64> @llvm.lrint.v16i64.v16f32(<16 x float>) -define <1 x i64> @lrint_v1f64(<1 x double> %x) { +define <1 x i64> @lrint_v1f64(<1 x double> %x) nounwind { ; BE-LABEL: lrint_v1f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -112(r1) ; BE-NEXT: std r0, 128(r1) -; BE-NEXT: .cfi_def_cfa_offset 112 -; BE-NEXT: .cfi_offset lr, 16 ; BE-NEXT: bl lrint ; BE-NEXT: nop ; BE-NEXT: addi r1, r1, 112 @@ -4442,8 +4113,6 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) { ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -32(r1) ; CHECK-NEXT: std r0, 48(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop ; CHECK-NEXT: addi r1, r1, 32 @@ -4461,16 +4130,13 @@ define <1 x i64> @lrint_v1f64(<1 x double> %x) { } declare <1 x i64> @llvm.lrint.v1i64.v1f64(<1 x double>) -define <2 x i64> @lrint_v2f64(<2 x double> %x) { +define <2 x i64> @lrint_v2f64(<2 x double> %x) nounwind { ; BE-LABEL: lrint_v2f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -160(r1) -; BE-NEXT: std r0, 176(r1) -; BE-NEXT: .cfi_def_cfa_offset 160 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) ; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v31, v2 ; BE-NEXT: xxlor f1, v31, v31 @@ -4494,12 +4160,9 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: std r0, 80(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v2 ; CHECK-NEXT: xxlor f1, v31, v31 ; CHECK-NEXT: bl lrint @@ -4511,7 +4174,7 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) { ; CHECK-NEXT: mtfprd f0, r3 ; CHECK-NEXT: li r3, 48 ; CHECK-NEXT: xxmrghd v2, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 64 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4534,17 +4197,13 @@ define <2 x i64> @lrint_v2f64(<2 x double> %x) { } declare <2 x i64> @llvm.lrint.v2i64.v2f64(<2 x double>) -define <4 x i64> @lrint_v4f64(<4 x double> %x) { +define <4 x i64> @lrint_v4f64(<4 x double> %x) nounwind { ; BE-LABEL: lrint_v4f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -192(r1) -; BE-NEXT: std r0, 208(r1) -; BE-NEXT: .cfi_def_cfa_offset 192 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 208(r1) ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: vmr v30, v2 ; BE-NEXT: li r3, 176 @@ -4583,17 +4242,13 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -80(r1) -; CHECK-NEXT: std r0, 96(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 80 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v30, v2 ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: xxlor f1, v30, v30 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v3 ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop @@ -4614,9 +4269,9 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) { ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v2, v30 ; CHECK-NEXT: xxmrghd v3, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 80 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4648,25 +4303,19 @@ define <4 x i64> @lrint_v4f64(<4 x double> %x) { } declare <4 x i64> @llvm.lrint.v4i64.v4f64(<4 x double>) -define <8 x i64> @lrint_v8f64(<8 x double> %x) { +define <8 x i64> @lrint_v8f64(<8 x double> %x) nounwind { ; BE-LABEL: lrint_v8f64: ; BE: # %bb.0: ; BE-NEXT: mflr r0 ; BE-NEXT: stdu r1, -256(r1) -; BE-NEXT: std r0, 272(r1) -; BE-NEXT: .cfi_def_cfa_offset 256 -; BE-NEXT: .cfi_offset lr, 16 -; BE-NEXT: .cfi_offset v28, -64 -; BE-NEXT: .cfi_offset v29, -48 -; BE-NEXT: .cfi_offset v30, -32 -; BE-NEXT: .cfi_offset v31, -16 ; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 272(r1) ; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 208 ; BE-NEXT: vmr v28, v2 -; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 224 +; BE-NEXT: xxlor f1, v28, v28 ; BE-NEXT: vmr v29, v3 ; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; BE-NEXT: li r3, 240 @@ -4729,25 +4378,19 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -112(r1) -; CHECK-NEXT: std r0, 128(r1) -; CHECK-NEXT: .cfi_def_cfa_offset 112 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset v28, -64 -; CHECK-NEXT: .cfi_offset v29, -48 -; CHECK-NEXT: .cfi_offset v30, -32 -; CHECK-NEXT: .cfi_offset v31, -16 ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: stvx v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 64 ; CHECK-NEXT: vmr v28, v2 -; CHECK-NEXT: stvx v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 80 ; CHECK-NEXT: xxlor f1, v28, v28 ; CHECK-NEXT: vmr v29, v3 -; CHECK-NEXT: stvx v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: li r3, 96 ; CHECK-NEXT: vmr v30, v4 -; CHECK-NEXT: stvx v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; CHECK-NEXT: vmr v31, v5 ; CHECK-NEXT: bl lrint ; CHECK-NEXT: nop @@ -4788,13 +4431,13 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ; CHECK-NEXT: vmr v3, v29 ; CHECK-NEXT: vmr v2, v28 ; CHECK-NEXT: xxmrghd v5, v31, vs0 -; CHECK-NEXT: lvx v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 80 -; CHECK-NEXT: lvx v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 64 -; CHECK-NEXT: lvx v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: li r3, 48 -; CHECK-NEXT: lvx v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -4843,3 +4486,1046 @@ define <8 x i64> @lrint_v8f64(<8 x double> %x) { ret <8 x i64> %a } declare <8 x i64> @llvm.lrint.v8i64.v8f64(<8 x double>) + +define <1 x i64> @lrint_v1f128(<1 x fp128> %x) nounwind { +; BE-LABEL: lrint_v1f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -112(r1) +; BE-NEXT: std r0, 128(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: addi r1, r1, 112 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v1f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -32(r1) +; CHECK-NEXT: std r0, 48(r1) +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 32 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v1f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -32(r1) +; FAST-NEXT: std r0, 48(r1) +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: addi r1, r1, 32 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128> %x) + ret <1 x i64> %a +} +declare <1 x i64> @llvm.lrint.v1i64.v1f128(<1 x fp128>) + +define <2 x i64> @lrint_v2f128(<2 x fp128> %x) nounwind { +; BE-LABEL: lrint_v2f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -160(r1) +; BE-NEXT: li r3, 144 +; BE-NEXT: std r0, 176(r1) +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 160 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v2f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxmrghd v2, vs0, v30 +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v2f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -80(r1) +; FAST-NEXT: li r3, 48 +; FAST-NEXT: std r0, 96(r1) +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxmrghd v2, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 80 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128> %x) + ret <2 x i64> %a +} +declare <2 x i64> @llvm.lrint.v2i64.v2f128(<2 x fp128>) + +define <4 x i64> @lrint_v4f128(<4 x fp128> %x) nounwind { +; BE-LABEL: lrint_v4f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -208(r1) +; BE-NEXT: li r3, 160 +; BE-NEXT: std r0, 224(r1) +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 176 +; BE-NEXT: vmr v29, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 192 +; BE-NEXT: vmr v30, v4 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v5 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 176 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 208 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v4f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 128(r1) +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v29, v3 +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v30, v4 +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v5 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxmrghd v3, vs0, v30 +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v4f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -112(r1) +; FAST-NEXT: li r3, 48 +; FAST-NEXT: std r0, 128(r1) +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v29, v3 +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v30, v4 +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v5 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxmrghd v3, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 112 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128> %x) + ret <4 x i64> %a +} +declare <4 x i64> @llvm.lrint.v4i64.v4f128(<4 x fp128>) + +define <8 x i64> @lrint_v8f128(<8 x fp128> %x) nounwind { +; BE-LABEL: lrint_v8f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -304(r1) +; BE-NEXT: li r3, 192 +; BE-NEXT: std r0, 320(r1) +; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 208 +; BE-NEXT: vmr v25, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 224 +; BE-NEXT: vmr v26, v4 +; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 240 +; BE-NEXT: vmr v27, v5 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 256 +; BE-NEXT: vmr v28, v6 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 272 +; BE-NEXT: vmr v29, v7 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 288 +; BE-NEXT: vmr v30, v8 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: vmr v31, v9 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v25 +; BE-NEXT: std r3, 136(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v27 +; BE-NEXT: std r3, 128(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v26 +; BE-NEXT: std r3, 152(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 144(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v28 +; BE-NEXT: std r3, 168(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 160(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: addi r3, r1, 128 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 144 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 160 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: li r3, 288 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 272 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 256 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 240 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 224 +; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 208 +; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 192 +; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 304 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v8f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -176(r1) +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: std r0, 192(r1) +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: vmr v25, v3 +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: vmr v26, v4 +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: vmr v27, v5 +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: vmr v28, v6 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v29, v7 +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v30, v8 +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: vmr v31, v9 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: mtvsrd v24, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v26 +; CHECK-NEXT: xxmrghd v25, vs0, v24 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: xxmrghd v27, vs0, v26 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: mtvsrd v28, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: xxmrghd v29, vs0, v28 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: mtvsrd v30, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v4, v29 +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v3, v27 +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: xxmrghd v5, vs0, v30 +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 176 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v8f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -176(r1) +; FAST-NEXT: li r3, 48 +; FAST-NEXT: std r0, 192(r1) +; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: vmr v25, v3 +; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 96 +; FAST-NEXT: vmr v26, v4 +; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 112 +; FAST-NEXT: vmr v27, v5 +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 128 +; FAST-NEXT: vmr v28, v6 +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v29, v7 +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v30, v8 +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: vmr v31, v9 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: mtvsrd v24, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v26 +; FAST-NEXT: xxmrghd v25, vs0, v24 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v28 +; FAST-NEXT: xxmrghd v27, vs0, v26 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: mtvsrd v28, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: xxmrghd v29, vs0, v28 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: mtvsrd v30, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v4, v29 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v3, v27 +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: xxmrghd v5, vs0, v30 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 176 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128> %x) + ret <8 x i64> %a +} +declare <8 x i64> @llvm.lrint.v8i64.v8f128(<8 x fp128>) + +define <16 x i64> @lrint_v16i64_v16f128(<16 x fp128> %x) nounwind { +; BE-LABEL: lrint_v16i64_v16f128: +; BE: # %bb.0: +; BE-NEXT: mflr r0 +; BE-NEXT: stdu r1, -496(r1) +; BE-NEXT: li r3, 304 +; BE-NEXT: std r0, 512(r1) +; BE-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 320 +; BE-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 336 +; BE-NEXT: vmr v21, v2 +; BE-NEXT: vmr v2, v3 +; BE-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 352 +; BE-NEXT: vmr v22, v4 +; BE-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 368 +; BE-NEXT: vmr v23, v5 +; BE-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 384 +; BE-NEXT: vmr v24, v6 +; BE-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 400 +; BE-NEXT: vmr v25, v7 +; BE-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 416 +; BE-NEXT: vmr v26, v8 +; BE-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 432 +; BE-NEXT: vmr v27, v9 +; BE-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 448 +; BE-NEXT: vmr v28, v11 +; BE-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 464 +; BE-NEXT: vmr v29, v10 +; BE-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 480 +; BE-NEXT: vmr v30, v13 +; BE-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; BE-NEXT: li r3, 128 +; BE-NEXT: stxvd2x v12, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 768 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 160 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 784 +; BE-NEXT: lxvw4x vs0, 0, r3 +; BE-NEXT: li r3, 144 +; BE-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; BE-NEXT: addi r3, r1, 736 +; BE-NEXT: lxvw4x v20, 0, r3 +; BE-NEXT: addi r3, r1, 752 +; BE-NEXT: lxvw4x v31, 0, r3 +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v21 +; BE-NEXT: std r3, 184(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v23 +; BE-NEXT: std r3, 176(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v22 +; BE-NEXT: std r3, 200(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v25 +; BE-NEXT: std r3, 192(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v24 +; BE-NEXT: std r3, 216(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v27 +; BE-NEXT: std r3, 208(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v26 +; BE-NEXT: std r3, 232(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v28 +; BE-NEXT: std r3, 224(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v29 +; BE-NEXT: std r3, 248(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v30 +; BE-NEXT: std r3, 240(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 264(r1) +; BE-NEXT: li r3, 128 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v31 +; BE-NEXT: std r3, 256(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: vmr v2, v20 +; BE-NEXT: std r3, 280(r1) +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 272(r1) +; BE-NEXT: li r3, 144 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 296(r1) +; BE-NEXT: li r3, 160 +; BE-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; BE-NEXT: bl lrintf128 +; BE-NEXT: nop +; BE-NEXT: std r3, 288(r1) +; BE-NEXT: addi r3, r1, 176 +; BE-NEXT: lxvd2x v2, 0, r3 +; BE-NEXT: addi r3, r1, 192 +; BE-NEXT: lxvd2x v3, 0, r3 +; BE-NEXT: addi r3, r1, 208 +; BE-NEXT: lxvd2x v4, 0, r3 +; BE-NEXT: addi r3, r1, 224 +; BE-NEXT: lxvd2x v5, 0, r3 +; BE-NEXT: addi r3, r1, 240 +; BE-NEXT: lxvd2x v6, 0, r3 +; BE-NEXT: addi r3, r1, 256 +; BE-NEXT: lxvd2x v7, 0, r3 +; BE-NEXT: addi r3, r1, 272 +; BE-NEXT: lxvd2x v8, 0, r3 +; BE-NEXT: addi r3, r1, 288 +; BE-NEXT: lxvd2x v9, 0, r3 +; BE-NEXT: li r3, 480 +; BE-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 464 +; BE-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 448 +; BE-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 432 +; BE-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 416 +; BE-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 400 +; BE-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 384 +; BE-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 368 +; BE-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 352 +; BE-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 336 +; BE-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 320 +; BE-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload +; BE-NEXT: li r3, 304 +; BE-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload +; BE-NEXT: addi r1, r1, 496 +; BE-NEXT: ld r0, 16(r1) +; BE-NEXT: mtlr r0 +; BE-NEXT: blr +; +; CHECK-LABEL: lrint_v16i64_v16f128: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -304(r1) +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: std r0, 320(r1) +; CHECK-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: vmr v21, v4 +; CHECK-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: vmr v22, v6 +; CHECK-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: vmr v23, v8 +; CHECK-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: vmr v24, v9 +; CHECK-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: vmr v25, v7 +; CHECK-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: vmr v26, v10 +; CHECK-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: vmr v27, v5 +; CHECK-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: vmr v28, v11 +; CHECK-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: vmr v29, v12 +; CHECK-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: vmr v30, v3 +; CHECK-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 576 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: addi r3, r1, 560 +; CHECK-NEXT: lxvd2x vs1, 0, r3 +; CHECK-NEXT: addi r3, r1, 544 +; CHECK-NEXT: lxvd2x vs2, 0, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: xxswapd vs0, vs0 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: xxswapd vs0, vs1 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: xxswapd vs0, vs2 +; CHECK-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; CHECK-NEXT: addi r3, r1, 528 +; CHECK-NEXT: lxvd2x vs0, 0, r3 +; CHECK-NEXT: xxswapd v31, vs0 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: mtvsrd v20, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v21 +; CHECK-NEXT: xxmrghd v30, vs0, v20 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v27 +; CHECK-NEXT: mtvsrd v21, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v22 +; CHECK-NEXT: xxmrghd v27, vs0, v21 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v25 +; CHECK-NEXT: mtvsrd v22, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v23 +; CHECK-NEXT: xxmrghd v25, vs0, v22 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v24 +; CHECK-NEXT: mtvsrd v23, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v26 +; CHECK-NEXT: xxmrghd v24, vs0, v23 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: vmr v2, v28 +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v29 +; CHECK-NEXT: xxmrghd v28, vs0, v26 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v29, r3 +; CHECK-NEXT: li r3, 64 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: vmr v2, v31 +; CHECK-NEXT: xxmrghd v29, vs0, v29 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v31, r3 +; CHECK-NEXT: li r3, 48 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 80 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: xxmrghd v31, vs0, v31 +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtvsrd v26, r3 +; CHECK-NEXT: li r3, 96 +; CHECK-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: bl lrintf128 +; CHECK-NEXT: nop +; CHECK-NEXT: mtfprd f0, r3 +; CHECK-NEXT: li r3, 288 +; CHECK-NEXT: vmr v8, v31 +; CHECK-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 272 +; CHECK-NEXT: vmr v2, v30 +; CHECK-NEXT: vmr v7, v29 +; CHECK-NEXT: vmr v6, v28 +; CHECK-NEXT: vmr v3, v27 +; CHECK-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 256 +; CHECK-NEXT: vmr v4, v25 +; CHECK-NEXT: vmr v5, v24 +; CHECK-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 240 +; CHECK-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 224 +; CHECK-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 208 +; CHECK-NEXT: xxmrghd v9, vs0, v26 +; CHECK-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 192 +; CHECK-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 176 +; CHECK-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 160 +; CHECK-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 144 +; CHECK-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 128 +; CHECK-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: li r3, 112 +; CHECK-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload +; CHECK-NEXT: addi r1, r1, 304 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr +; +; FAST-LABEL: lrint_v16i64_v16f128: +; FAST: # %bb.0: +; FAST-NEXT: mflr r0 +; FAST-NEXT: stdu r1, -304(r1) +; FAST-NEXT: li r3, 112 +; FAST-NEXT: std r0, 320(r1) +; FAST-NEXT: stxvd2x v20, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 128 +; FAST-NEXT: stxvd2x v21, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 144 +; FAST-NEXT: vmr v21, v4 +; FAST-NEXT: stxvd2x v22, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 160 +; FAST-NEXT: vmr v22, v6 +; FAST-NEXT: stxvd2x v23, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 176 +; FAST-NEXT: vmr v23, v8 +; FAST-NEXT: stxvd2x v24, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 192 +; FAST-NEXT: vmr v24, v9 +; FAST-NEXT: stxvd2x v25, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 208 +; FAST-NEXT: vmr v25, v7 +; FAST-NEXT: stxvd2x v26, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 224 +; FAST-NEXT: vmr v26, v10 +; FAST-NEXT: stxvd2x v27, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 240 +; FAST-NEXT: vmr v27, v5 +; FAST-NEXT: stxvd2x v28, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 256 +; FAST-NEXT: vmr v28, v11 +; FAST-NEXT: stxvd2x v29, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 272 +; FAST-NEXT: vmr v29, v12 +; FAST-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 288 +; FAST-NEXT: vmr v30, v3 +; FAST-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 64 +; FAST-NEXT: stxvd2x v13, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 576 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: addi r3, r1, 560 +; FAST-NEXT: lxvd2x vs1, 0, r3 +; FAST-NEXT: addi r3, r1, 544 +; FAST-NEXT: lxvd2x vs2, 0, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: xxswapd vs0, vs0 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 80 +; FAST-NEXT: xxswapd vs0, vs1 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: li r3, 48 +; FAST-NEXT: xxswapd vs0, vs2 +; FAST-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill +; FAST-NEXT: addi r3, r1, 528 +; FAST-NEXT: lxvd2x vs0, 0, r3 +; FAST-NEXT: xxswapd v31, vs0 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: mtvsrd v20, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v21 +; FAST-NEXT: xxmrghd v30, vs0, v20 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v27 +; FAST-NEXT: mtvsrd v21, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v22 +; FAST-NEXT: xxmrghd v27, vs0, v21 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v25 +; FAST-NEXT: mtvsrd v22, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v23 +; FAST-NEXT: xxmrghd v25, vs0, v22 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v24 +; FAST-NEXT: mtvsrd v23, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v26 +; FAST-NEXT: xxmrghd v24, vs0, v23 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: vmr v2, v28 +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v29 +; FAST-NEXT: xxmrghd v28, vs0, v26 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v29, r3 +; FAST-NEXT: li r3, 64 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: vmr v2, v31 +; FAST-NEXT: xxmrghd v29, vs0, v29 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v31, r3 +; FAST-NEXT: li r3, 48 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 80 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: xxmrghd v31, vs0, v31 +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtvsrd v26, r3 +; FAST-NEXT: li r3, 96 +; FAST-NEXT: lxvd2x v2, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: bl lrintf128 +; FAST-NEXT: nop +; FAST-NEXT: mtfprd f0, r3 +; FAST-NEXT: li r3, 288 +; FAST-NEXT: vmr v8, v31 +; FAST-NEXT: lxvd2x v31, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 272 +; FAST-NEXT: vmr v2, v30 +; FAST-NEXT: vmr v7, v29 +; FAST-NEXT: vmr v6, v28 +; FAST-NEXT: vmr v3, v27 +; FAST-NEXT: lxvd2x v30, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 256 +; FAST-NEXT: vmr v4, v25 +; FAST-NEXT: vmr v5, v24 +; FAST-NEXT: lxvd2x v29, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 240 +; FAST-NEXT: lxvd2x v28, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 224 +; FAST-NEXT: lxvd2x v27, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 208 +; FAST-NEXT: xxmrghd v9, vs0, v26 +; FAST-NEXT: lxvd2x v26, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 192 +; FAST-NEXT: lxvd2x v25, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 176 +; FAST-NEXT: lxvd2x v24, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 160 +; FAST-NEXT: lxvd2x v23, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 144 +; FAST-NEXT: lxvd2x v22, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 128 +; FAST-NEXT: lxvd2x v21, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: li r3, 112 +; FAST-NEXT: lxvd2x v20, r1, r3 # 16-byte Folded Reload +; FAST-NEXT: addi r1, r1, 304 +; FAST-NEXT: ld r0, 16(r1) +; FAST-NEXT: mtlr r0 +; FAST-NEXT: blr + %a = call <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128> %x) + ret <16 x i64> %a +} +declare <16 x i64> @llvm.lrint.v16i64.v16f128(<16 x fp128>) |