diff options
author | Benjamin Maxwell <benjamin.maxwell@arm.com> | 2024-11-21 11:02:07 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-21 11:02:07 +0000 |
commit | 83c7784c35918ce037823f29d29918c5542cdf9c (patch) | |
tree | 94a4fe162260b916f61ef005e41fdb51e681d2b2 | |
parent | 5bdee35544eb21762857390014598748c64ad485 (diff) | |
download | llvm-83c7784c35918ce037823f29d29918c5542cdf9c.zip llvm-83c7784c35918ce037823f29d29918c5542cdf9c.tar.gz llvm-83c7784c35918ce037823f29d29918c5542cdf9c.tar.bz2 |
[AArch64] Don't emit Neon in streaming[-compatible] functions with -fzero-call-used-regs (#116995)
Previously, with `-fzero-call-used-regs` clang/LLVM would incorrectly
emit Neon instructions in streaming functions, and streaming-compatible
functions without SVE.
With this change:
* In streaming functions, Z/p registers will be zeroed
* In streaming compatible functions w/o SVE, D registers will be zeroed
- (As Neon vector instructions are illegal including `movi v..`)
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 11 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/zero-call-used-regs.ll | 542 |
3 files changed, 361 insertions, 194 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 2162449..d667396 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1010,7 +1010,7 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, BitVector GPRsToZero(TRI.getNumRegs()); BitVector FPRsToZero(TRI.getNumRegs()); - bool HasSVE = STI.hasSVE(); + bool HasSVE = STI.isSVEorStreamingSVEAvailable(); for (MCRegister Reg : RegsToZero.set_bits()) { if (TRI.isGeneralPurposeRegister(MF, Reg)) { // For GPRs, we only care to clear out the 64-bit register. diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index a470c03..8a3ed10 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -9700,13 +9700,20 @@ void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB, if (TRI.isGeneralPurposeRegister(MF, Reg)) { BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0); - } else if (STI.hasSVE()) { + } else if (STI.isSVEorStreamingSVEAvailable()) { BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg) .addImm(0) .addImm(0); - } else { + } else if (STI.isNeonAvailable()) { BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg) .addImm(0); + } else { + // This is a streaming-compatible function without SVE. We don't have full + // Neon (just FPRs), so we can at most use the first 64-bit sub-register. + // So given `movi v..` would be illegal use `fmov d..` instead. + assert(STI.hasNEON() && "Expected to have NEON."); + Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub); + BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64); } } diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll index 75a7c7f..4799ea3 100644 --- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll +++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -1,6 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,DEFAULT -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-unknown-unknown -mattr=+sve | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE-OR-SME +; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,STREAMING-COMPAT + +target triple = "aarch64-unknown-linux-gnu" @result = dso_local global i32 0, align 4 @@ -156,32 +160,55 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; DEFAULT-NEXT: movi v7.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; -; SVE-LABEL: all_arg: -; SVE: // %bb.0: // %entry -; SVE-NEXT: mul w8, w1, w0 -; SVE-NEXT: mov x1, #0 // =0x0 -; SVE-NEXT: mov x3, #0 // =0x0 -; SVE-NEXT: mov x4, #0 // =0x0 -; SVE-NEXT: mov x5, #0 // =0x0 -; SVE-NEXT: mov x6, #0 // =0x0 -; SVE-NEXT: mov x7, #0 // =0x0 -; SVE-NEXT: mov x18, #0 // =0x0 -; SVE-NEXT: mov z0.d, #0 // =0x0 -; SVE-NEXT: orr w0, w8, w2 -; SVE-NEXT: mov x2, #0 // =0x0 -; SVE-NEXT: mov x8, #0 // =0x0 -; SVE-NEXT: mov z1.d, #0 // =0x0 -; SVE-NEXT: mov z2.d, #0 // =0x0 -; SVE-NEXT: mov z3.d, #0 // =0x0 -; SVE-NEXT: mov z4.d, #0 // =0x0 -; SVE-NEXT: mov z5.d, #0 // =0x0 -; SVE-NEXT: mov z6.d, #0 // =0x0 -; SVE-NEXT: mov z7.d, #0 // =0x0 -; SVE-NEXT: pfalse p0.b -; SVE-NEXT: pfalse p1.b -; SVE-NEXT: pfalse p2.b -; SVE-NEXT: pfalse p3.b -; SVE-NEXT: ret +; SVE-OR-SME-LABEL: all_arg: +; SVE-OR-SME: // %bb.0: // %entry +; SVE-OR-SME-NEXT: mul w8, w1, w0 +; SVE-OR-SME-NEXT: mov x1, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x3, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x4, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x5, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0 +; SVE-OR-SME-NEXT: orr w0, w8, w2 +; SVE-OR-SME-NEXT: mov x2, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x8, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0 +; SVE-OR-SME-NEXT: pfalse p0.b +; SVE-OR-SME-NEXT: pfalse p1.b +; SVE-OR-SME-NEXT: pfalse p2.b +; SVE-OR-SME-NEXT: pfalse p3.b +; SVE-OR-SME-NEXT: ret +; +; STREAMING-COMPAT-LABEL: all_arg: +; STREAMING-COMPAT: // %bb.0: // %entry +; STREAMING-COMPAT-NEXT: mul w8, w1, w0 +; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: fmov d0, xzr +; STREAMING-COMPAT-NEXT: orr w0, w8, w2 +; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0 +; STREAMING-COMPAT-NEXT: fmov d1, xzr +; STREAMING-COMPAT-NEXT: fmov d2, xzr +; STREAMING-COMPAT-NEXT: fmov d3, xzr +; STREAMING-COMPAT-NEXT: fmov d4, xzr +; STREAMING-COMPAT-NEXT: fmov d5, xzr +; STREAMING-COMPAT-NEXT: fmov d6, xzr +; STREAMING-COMPAT-NEXT: fmov d7, xzr +; STREAMING-COMPAT-NEXT: ret entry: %mul = mul nsw i32 %b, %a @@ -238,69 +265,117 @@ define dso_local i32 @all(i32 noundef %a, i32 noundef %b, i32 noundef %c) local_ ; DEFAULT-NEXT: movi v31.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; -; SVE-LABEL: all: -; SVE: // %bb.0: // %entry -; SVE-NEXT: mul w8, w1, w0 -; SVE-NEXT: mov x1, #0 // =0x0 -; SVE-NEXT: mov x3, #0 // =0x0 -; SVE-NEXT: mov x4, #0 // =0x0 -; SVE-NEXT: mov x5, #0 // =0x0 -; SVE-NEXT: mov x6, #0 // =0x0 -; SVE-NEXT: mov x7, #0 // =0x0 -; SVE-NEXT: mov x9, #0 // =0x0 -; SVE-NEXT: mov x10, #0 // =0x0 -; SVE-NEXT: orr w0, w8, w2 -; SVE-NEXT: mov x2, #0 // =0x0 -; SVE-NEXT: mov x8, #0 // =0x0 -; SVE-NEXT: mov x11, #0 // =0x0 -; SVE-NEXT: mov x12, #0 // =0x0 -; SVE-NEXT: mov x13, #0 // =0x0 -; SVE-NEXT: mov x14, #0 // =0x0 -; SVE-NEXT: mov x15, #0 // =0x0 -; SVE-NEXT: mov x16, #0 // =0x0 -; SVE-NEXT: mov x17, #0 // =0x0 -; SVE-NEXT: mov x18, #0 // =0x0 -; SVE-NEXT: mov z0.d, #0 // =0x0 -; SVE-NEXT: mov z1.d, #0 // =0x0 -; SVE-NEXT: mov z2.d, #0 // =0x0 -; SVE-NEXT: mov z3.d, #0 // =0x0 -; SVE-NEXT: mov z4.d, #0 // =0x0 -; SVE-NEXT: mov z5.d, #0 // =0x0 -; SVE-NEXT: mov z6.d, #0 // =0x0 -; SVE-NEXT: mov z7.d, #0 // =0x0 -; SVE-NEXT: mov z16.d, #0 // =0x0 -; SVE-NEXT: mov z17.d, #0 // =0x0 -; SVE-NEXT: mov z18.d, #0 // =0x0 -; SVE-NEXT: mov z19.d, #0 // =0x0 -; SVE-NEXT: mov z20.d, #0 // =0x0 -; SVE-NEXT: mov z21.d, #0 // =0x0 -; SVE-NEXT: mov z22.d, #0 // =0x0 -; SVE-NEXT: mov z23.d, #0 // =0x0 -; SVE-NEXT: mov z24.d, #0 // =0x0 -; SVE-NEXT: mov z25.d, #0 // =0x0 -; SVE-NEXT: mov z26.d, #0 // =0x0 -; SVE-NEXT: mov z27.d, #0 // =0x0 -; SVE-NEXT: mov z28.d, #0 // =0x0 -; SVE-NEXT: mov z29.d, #0 // =0x0 -; SVE-NEXT: mov z30.d, #0 // =0x0 -; SVE-NEXT: mov z31.d, #0 // =0x0 -; SVE-NEXT: pfalse p0.b -; SVE-NEXT: pfalse p1.b -; SVE-NEXT: pfalse p2.b -; SVE-NEXT: pfalse p3.b -; SVE-NEXT: pfalse p4.b -; SVE-NEXT: pfalse p5.b -; SVE-NEXT: pfalse p6.b -; SVE-NEXT: pfalse p7.b -; SVE-NEXT: pfalse p8.b -; SVE-NEXT: pfalse p9.b -; SVE-NEXT: pfalse p10.b -; SVE-NEXT: pfalse p11.b -; SVE-NEXT: pfalse p12.b -; SVE-NEXT: pfalse p13.b -; SVE-NEXT: pfalse p14.b -; SVE-NEXT: pfalse p15.b -; SVE-NEXT: ret +; SVE-OR-SME-LABEL: all: +; SVE-OR-SME: // %bb.0: // %entry +; SVE-OR-SME-NEXT: mul w8, w1, w0 +; SVE-OR-SME-NEXT: mov x1, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x3, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x4, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x5, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x9, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x10, #0 // =0x0 +; SVE-OR-SME-NEXT: orr w0, w8, w2 +; SVE-OR-SME-NEXT: mov x2, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x8, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x11, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x12, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x13, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x14, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x15, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x16, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x17, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z16.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z17.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z18.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z19.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z20.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z21.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z22.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z23.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z24.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z25.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z26.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z27.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z28.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z29.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z30.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z31.d, #0 // =0x0 +; SVE-OR-SME-NEXT: pfalse p0.b +; SVE-OR-SME-NEXT: pfalse p1.b +; SVE-OR-SME-NEXT: pfalse p2.b +; SVE-OR-SME-NEXT: pfalse p3.b +; SVE-OR-SME-NEXT: pfalse p4.b +; SVE-OR-SME-NEXT: pfalse p5.b +; SVE-OR-SME-NEXT: pfalse p6.b +; SVE-OR-SME-NEXT: pfalse p7.b +; SVE-OR-SME-NEXT: pfalse p8.b +; SVE-OR-SME-NEXT: pfalse p9.b +; SVE-OR-SME-NEXT: pfalse p10.b +; SVE-OR-SME-NEXT: pfalse p11.b +; SVE-OR-SME-NEXT: pfalse p12.b +; SVE-OR-SME-NEXT: pfalse p13.b +; SVE-OR-SME-NEXT: pfalse p14.b +; SVE-OR-SME-NEXT: pfalse p15.b +; SVE-OR-SME-NEXT: ret +; +; STREAMING-COMPAT-LABEL: all: +; STREAMING-COMPAT: // %bb.0: // %entry +; STREAMING-COMPAT-NEXT: mul w8, w1, w0 +; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x9, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x10, #0 // =0x0 +; STREAMING-COMPAT-NEXT: orr w0, w8, w2 +; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x11, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x12, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x13, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x14, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x16, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x17, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: fmov d0, xzr +; STREAMING-COMPAT-NEXT: fmov d1, xzr +; STREAMING-COMPAT-NEXT: fmov d2, xzr +; STREAMING-COMPAT-NEXT: fmov d3, xzr +; STREAMING-COMPAT-NEXT: fmov d4, xzr +; STREAMING-COMPAT-NEXT: fmov d5, xzr +; STREAMING-COMPAT-NEXT: fmov d6, xzr +; STREAMING-COMPAT-NEXT: fmov d7, xzr +; STREAMING-COMPAT-NEXT: fmov d16, xzr +; STREAMING-COMPAT-NEXT: fmov d17, xzr +; STREAMING-COMPAT-NEXT: fmov d18, xzr +; STREAMING-COMPAT-NEXT: fmov d19, xzr +; STREAMING-COMPAT-NEXT: fmov d20, xzr +; STREAMING-COMPAT-NEXT: fmov d21, xzr +; STREAMING-COMPAT-NEXT: fmov d22, xzr +; STREAMING-COMPAT-NEXT: fmov d23, xzr +; STREAMING-COMPAT-NEXT: fmov d24, xzr +; STREAMING-COMPAT-NEXT: fmov d25, xzr +; STREAMING-COMPAT-NEXT: fmov d26, xzr +; STREAMING-COMPAT-NEXT: fmov d27, xzr +; STREAMING-COMPAT-NEXT: fmov d28, xzr +; STREAMING-COMPAT-NEXT: fmov d29, xzr +; STREAMING-COMPAT-NEXT: fmov d30, xzr +; STREAMING-COMPAT-NEXT: fmov d31, xzr +; STREAMING-COMPAT-NEXT: ret entry: %mul = mul nsw i32 %b, %a @@ -355,12 +430,19 @@ define dso_local double @used_arg_float(double noundef %a, float noundef %b) loc ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; -; SVE-LABEL: used_arg_float: -; SVE: // %bb.0: // %entry -; SVE-NEXT: fcvt d1, s1 -; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: mov z1.d, #0 // =0x0 -; SVE-NEXT: ret +; SVE-OR-SME-LABEL: used_arg_float: +; SVE-OR-SME: // %bb.0: // %entry +; SVE-OR-SME-NEXT: fcvt d1, s1 +; SVE-OR-SME-NEXT: fmul d0, d1, d0 +; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 +; SVE-OR-SME-NEXT: ret +; +; STREAMING-COMPAT-LABEL: used_arg_float: +; STREAMING-COMPAT: // %bb.0: // %entry +; STREAMING-COMPAT-NEXT: fcvt d1, s1 +; STREAMING-COMPAT-NEXT: fmul d0, d1, d0 +; STREAMING-COMPAT-NEXT: fmov d1, xzr +; STREAMING-COMPAT-NEXT: ret entry: %conv = fpext float %b to double @@ -376,12 +458,19 @@ define dso_local double @used_float(double noundef %a, float noundef %b) local_u ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; -; SVE-LABEL: used_float: -; SVE: // %bb.0: // %entry -; SVE-NEXT: fcvt d1, s1 -; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: mov z1.d, #0 // =0x0 -; SVE-NEXT: ret +; SVE-OR-SME-LABEL: used_float: +; SVE-OR-SME: // %bb.0: // %entry +; SVE-OR-SME-NEXT: fcvt d1, s1 +; SVE-OR-SME-NEXT: fmul d0, d1, d0 +; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 +; SVE-OR-SME-NEXT: ret +; +; STREAMING-COMPAT-LABEL: used_float: +; STREAMING-COMPAT: // %bb.0: // %entry +; STREAMING-COMPAT-NEXT: fcvt d1, s1 +; STREAMING-COMPAT-NEXT: fmul d0, d1, d0 +; STREAMING-COMPAT-NEXT: fmov d1, xzr +; STREAMING-COMPAT-NEXT: ret entry: %conv = fpext float %b to double @@ -468,32 +557,55 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; DEFAULT-NEXT: movi v7.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; -; SVE-LABEL: all_arg_float: -; SVE: // %bb.0: // %entry -; SVE-NEXT: fcvt d1, s1 -; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: mov x0, #0 // =0x0 -; SVE-NEXT: mov x1, #0 // =0x0 -; SVE-NEXT: mov x2, #0 // =0x0 -; SVE-NEXT: mov x3, #0 // =0x0 -; SVE-NEXT: mov x4, #0 // =0x0 -; SVE-NEXT: mov x5, #0 // =0x0 -; SVE-NEXT: mov x6, #0 // =0x0 -; SVE-NEXT: mov x7, #0 // =0x0 -; SVE-NEXT: mov x8, #0 // =0x0 -; SVE-NEXT: mov x18, #0 // =0x0 -; SVE-NEXT: mov z1.d, #0 // =0x0 -; SVE-NEXT: mov z2.d, #0 // =0x0 -; SVE-NEXT: mov z3.d, #0 // =0x0 -; SVE-NEXT: mov z4.d, #0 // =0x0 -; SVE-NEXT: mov z5.d, #0 // =0x0 -; SVE-NEXT: mov z6.d, #0 // =0x0 -; SVE-NEXT: mov z7.d, #0 // =0x0 -; SVE-NEXT: pfalse p0.b -; SVE-NEXT: pfalse p1.b -; SVE-NEXT: pfalse p2.b -; SVE-NEXT: pfalse p3.b -; SVE-NEXT: ret +; SVE-OR-SME-LABEL: all_arg_float: +; SVE-OR-SME: // %bb.0: // %entry +; SVE-OR-SME-NEXT: fcvt d1, s1 +; SVE-OR-SME-NEXT: fmul d0, d1, d0 +; SVE-OR-SME-NEXT: mov x0, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x1, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x2, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x3, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x4, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x5, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x8, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0 +; SVE-OR-SME-NEXT: pfalse p0.b +; SVE-OR-SME-NEXT: pfalse p1.b +; SVE-OR-SME-NEXT: pfalse p2.b +; SVE-OR-SME-NEXT: pfalse p3.b +; SVE-OR-SME-NEXT: ret +; +; STREAMING-COMPAT-LABEL: all_arg_float: +; STREAMING-COMPAT: // %bb.0: // %entry +; STREAMING-COMPAT-NEXT: fcvt d1, s1 +; STREAMING-COMPAT-NEXT: fmul d0, d1, d0 +; STREAMING-COMPAT-NEXT: mov x0, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: fmov d1, xzr +; STREAMING-COMPAT-NEXT: fmov d2, xzr +; STREAMING-COMPAT-NEXT: fmov d3, xzr +; STREAMING-COMPAT-NEXT: fmov d4, xzr +; STREAMING-COMPAT-NEXT: fmov d5, xzr +; STREAMING-COMPAT-NEXT: fmov d6, xzr +; STREAMING-COMPAT-NEXT: fmov d7, xzr +; STREAMING-COMPAT-NEXT: ret entry: %conv = fpext float %b to double @@ -550,69 +662,117 @@ define dso_local double @all_float(double noundef %a, float noundef %b) local_un ; DEFAULT-NEXT: movi v31.2d, #0000000000000000 ; DEFAULT-NEXT: ret ; -; SVE-LABEL: all_float: -; SVE: // %bb.0: // %entry -; SVE-NEXT: fcvt d1, s1 -; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: mov x0, #0 // =0x0 -; SVE-NEXT: mov x1, #0 // =0x0 -; SVE-NEXT: mov x2, #0 // =0x0 -; SVE-NEXT: mov x3, #0 // =0x0 -; SVE-NEXT: mov x4, #0 // =0x0 -; SVE-NEXT: mov x5, #0 // =0x0 -; SVE-NEXT: mov x6, #0 // =0x0 -; SVE-NEXT: mov x7, #0 // =0x0 -; SVE-NEXT: mov x8, #0 // =0x0 -; SVE-NEXT: mov x9, #0 // =0x0 -; SVE-NEXT: mov x10, #0 // =0x0 -; SVE-NEXT: mov x11, #0 // =0x0 -; SVE-NEXT: mov x12, #0 // =0x0 -; SVE-NEXT: mov x13, #0 // =0x0 -; SVE-NEXT: mov x14, #0 // =0x0 -; SVE-NEXT: mov x15, #0 // =0x0 -; SVE-NEXT: mov x16, #0 // =0x0 -; SVE-NEXT: mov x17, #0 // =0x0 -; SVE-NEXT: mov x18, #0 // =0x0 -; SVE-NEXT: mov z1.d, #0 // =0x0 -; SVE-NEXT: mov z2.d, #0 // =0x0 -; SVE-NEXT: mov z3.d, #0 // =0x0 -; SVE-NEXT: mov z4.d, #0 // =0x0 -; SVE-NEXT: mov z5.d, #0 // =0x0 -; SVE-NEXT: mov z6.d, #0 // =0x0 -; SVE-NEXT: mov z7.d, #0 // =0x0 -; SVE-NEXT: mov z16.d, #0 // =0x0 -; SVE-NEXT: mov z17.d, #0 // =0x0 -; SVE-NEXT: mov z18.d, #0 // =0x0 -; SVE-NEXT: mov z19.d, #0 // =0x0 -; SVE-NEXT: mov z20.d, #0 // =0x0 -; SVE-NEXT: mov z21.d, #0 // =0x0 -; SVE-NEXT: mov z22.d, #0 // =0x0 -; SVE-NEXT: mov z23.d, #0 // =0x0 -; SVE-NEXT: mov z24.d, #0 // =0x0 -; SVE-NEXT: mov z25.d, #0 // =0x0 -; SVE-NEXT: mov z26.d, #0 // =0x0 -; SVE-NEXT: mov z27.d, #0 // =0x0 -; SVE-NEXT: mov z28.d, #0 // =0x0 -; SVE-NEXT: mov z29.d, #0 // =0x0 -; SVE-NEXT: mov z30.d, #0 // =0x0 -; SVE-NEXT: mov z31.d, #0 // =0x0 -; SVE-NEXT: pfalse p0.b -; SVE-NEXT: pfalse p1.b -; SVE-NEXT: pfalse p2.b -; SVE-NEXT: pfalse p3.b -; SVE-NEXT: pfalse p4.b -; SVE-NEXT: pfalse p5.b -; SVE-NEXT: pfalse p6.b -; SVE-NEXT: pfalse p7.b -; SVE-NEXT: pfalse p8.b -; SVE-NEXT: pfalse p9.b -; SVE-NEXT: pfalse p10.b -; SVE-NEXT: pfalse p11.b -; SVE-NEXT: pfalse p12.b -; SVE-NEXT: pfalse p13.b -; SVE-NEXT: pfalse p14.b -; SVE-NEXT: pfalse p15.b -; SVE-NEXT: ret +; SVE-OR-SME-LABEL: all_float: +; SVE-OR-SME: // %bb.0: // %entry +; SVE-OR-SME-NEXT: fcvt d1, s1 +; SVE-OR-SME-NEXT: fmul d0, d1, d0 +; SVE-OR-SME-NEXT: mov x0, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x1, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x2, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x3, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x4, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x5, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x8, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x9, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x10, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x11, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x12, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x13, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x14, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x15, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x16, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x17, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z4.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z5.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z6.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z7.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z16.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z17.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z18.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z19.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z20.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z21.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z22.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z23.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z24.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z25.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z26.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z27.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z28.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z29.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z30.d, #0 // =0x0 +; SVE-OR-SME-NEXT: mov z31.d, #0 // =0x0 +; SVE-OR-SME-NEXT: pfalse p0.b +; SVE-OR-SME-NEXT: pfalse p1.b +; SVE-OR-SME-NEXT: pfalse p2.b +; SVE-OR-SME-NEXT: pfalse p3.b +; SVE-OR-SME-NEXT: pfalse p4.b +; SVE-OR-SME-NEXT: pfalse p5.b +; SVE-OR-SME-NEXT: pfalse p6.b +; SVE-OR-SME-NEXT: pfalse p7.b +; SVE-OR-SME-NEXT: pfalse p8.b +; SVE-OR-SME-NEXT: pfalse p9.b +; SVE-OR-SME-NEXT: pfalse p10.b +; SVE-OR-SME-NEXT: pfalse p11.b +; SVE-OR-SME-NEXT: pfalse p12.b +; SVE-OR-SME-NEXT: pfalse p13.b +; SVE-OR-SME-NEXT: pfalse p14.b +; SVE-OR-SME-NEXT: pfalse p15.b +; SVE-OR-SME-NEXT: ret +; +; STREAMING-COMPAT-LABEL: all_float: +; STREAMING-COMPAT: // %bb.0: // %entry +; STREAMING-COMPAT-NEXT: fcvt d1, s1 +; STREAMING-COMPAT-NEXT: fmul d0, d1, d0 +; STREAMING-COMPAT-NEXT: mov x0, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x1, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x3, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x4, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x9, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x10, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x11, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x12, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x13, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x14, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x16, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x17, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: fmov d1, xzr +; STREAMING-COMPAT-NEXT: fmov d2, xzr +; STREAMING-COMPAT-NEXT: fmov d3, xzr +; STREAMING-COMPAT-NEXT: fmov d4, xzr +; STREAMING-COMPAT-NEXT: fmov d5, xzr +; STREAMING-COMPAT-NEXT: fmov d6, xzr +; STREAMING-COMPAT-NEXT: fmov d7, xzr +; STREAMING-COMPAT-NEXT: fmov d16, xzr +; STREAMING-COMPAT-NEXT: fmov d17, xzr +; STREAMING-COMPAT-NEXT: fmov d18, xzr +; STREAMING-COMPAT-NEXT: fmov d19, xzr +; STREAMING-COMPAT-NEXT: fmov d20, xzr +; STREAMING-COMPAT-NEXT: fmov d21, xzr +; STREAMING-COMPAT-NEXT: fmov d22, xzr +; STREAMING-COMPAT-NEXT: fmov d23, xzr +; STREAMING-COMPAT-NEXT: fmov d24, xzr +; STREAMING-COMPAT-NEXT: fmov d25, xzr +; STREAMING-COMPAT-NEXT: fmov d26, xzr +; STREAMING-COMPAT-NEXT: fmov d27, xzr +; STREAMING-COMPAT-NEXT: fmov d28, xzr +; STREAMING-COMPAT-NEXT: fmov d29, xzr +; STREAMING-COMPAT-NEXT: fmov d30, xzr +; STREAMING-COMPAT-NEXT: fmov d31, xzr +; STREAMING-COMPAT-NEXT: ret entry: %conv = fpext float %b to double |