diff options
author | chuongg3 <chuong.goh@arm.com> | 2023-11-13 10:32:24 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-13 10:32:24 +0000 |
commit | a604c4b562e99470e397f050f1e8707f923ebed7 (patch) | |
tree | 9252db2607a58aa1a93cf4e45ab488e6935c347f | |
parent | de58aa83726d465aecca66763892e62d8e8b636e (diff) | |
download | llvm-a604c4b562e99470e397f050f1e8707f923ebed7.zip llvm-a604c4b562e99470e397f050f1e8707f923ebed7.tar.gz llvm-a604c4b562e99470e397f050f1e8707f923ebed7.tar.bz2 |
[AArch64][GlobalISel] TableGen Selection for G_VECREDUCE_ADD (#70785)
Instruction Selection for G_VECREDUCE_ADD now uses TableGen
-rw-r--r-- | llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td | 1 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64InstrInfo.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp | 45 |
3 files changed, 17 insertions, 45 deletions
diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td index 7adc154..f28c1ed 100644 --- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td +++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td @@ -177,6 +177,7 @@ def : GINodeEquiv<G_VECREDUCE_UMIN, vecreduce_umin>; def : GINodeEquiv<G_VECREDUCE_UMAX, vecreduce_umax>; def : GINodeEquiv<G_VECREDUCE_SMIN, vecreduce_smin>; def : GINodeEquiv<G_VECREDUCE_SMAX, vecreduce_smax>; +def : GINodeEquiv<G_VECREDUCE_ADD, vecreduce_add>; def : GINodeEquiv<G_STRICT_FADD, strict_fadd>; def : GINodeEquiv<G_STRICT_FSUB, strict_fsub>; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index c01a3bd..290c79f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -6676,6 +6676,22 @@ def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))), ssub))>; } +// For vecreduce_add, used by GlobalISel not SDAG +def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))), + (i8 (ADDVv8i8v V64:$Rn))>; +def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))), + (i8 (ADDVv16i8v V128:$Rn))>; +def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))), + (i16 (ADDVv4i16v V64:$Rn))>; +def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))), + (i16 (ADDVv8i16v V128:$Rn))>; +def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))), + (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>; +def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))), + (i32 (ADDVv4i32v V128:$Rn))>; +def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))), + (i64 (ADDPv2i64p V128:$Rn))>; + defm : SIMDAcrossLanesSignedIntrinsic<"ADDV", AArch64saddv>; // vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))), diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index 3206844..bdaae4d 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -3558,8 +3558,6 @@ bool AArch64InstructionSelector::select(MachineInstr &I) { return selectConcatVectors(I, MRI); case TargetOpcode::G_JUMP_TABLE: return selectJumpTable(I, MRI); - case TargetOpcode::G_VECREDUCE_ADD: - return selectReduction(I, MRI); case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMCPY_INLINE: case TargetOpcode::G_MEMMOVE: @@ -3578,49 +3576,6 @@ bool AArch64InstructionSelector::selectAndRestoreState(MachineInstr &I) { return Success; } -bool AArch64InstructionSelector::selectReduction(MachineInstr &I, - MachineRegisterInfo &MRI) { - Register VecReg = I.getOperand(1).getReg(); - LLT VecTy = MRI.getType(VecReg); - if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { - // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit - // a subregister copy afterwards. - if (VecTy == LLT::fixed_vector(2, 32)) { - Register DstReg = I.getOperand(0).getReg(); - auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass}, - {VecReg, VecReg}); - auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) - .addReg(AddP.getReg(0), 0, AArch64::ssub) - .getReg(0); - RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI); - I.eraseFromParent(); - return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI); - } - - unsigned Opc = 0; - if (VecTy == LLT::fixed_vector(16, 8)) - Opc = AArch64::ADDVv16i8v; - else if (VecTy == LLT::fixed_vector(8, 8)) - Opc = AArch64::ADDVv8i8v; - else if (VecTy == LLT::fixed_vector(8, 16)) - Opc = AArch64::ADDVv8i16v; - else if (VecTy == LLT::fixed_vector(4, 16)) - Opc = AArch64::ADDVv4i16v; - else if (VecTy == LLT::fixed_vector(4, 32)) - Opc = AArch64::ADDVv4i32v; - else if (VecTy == LLT::fixed_vector(2, 64)) - Opc = AArch64::ADDPv2i64p; - else { - LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); - return false; - } - I.setDesc(TII.get(Opc)); - return constrainSelectedInstRegOperands(I, TII, TRI, RBI); - } - - return false; -} - bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, MachineRegisterInfo &MRI) { unsigned Mopcode; |