diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-06-07 10:18:45 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-07 10:18:45 +0100 |
commit | c0b468523c9c5517e61a197e7c1fe6cb52f8999c (patch) | |
tree | 2a87f6ce768452718800c967ffb353a0bdf4c683 /llvm/lib | |
parent | 3453dedfaf565429bc06c6d58533926f793ad650 (diff) | |
download | llvm-c0b468523c9c5517e61a197e7c1fe6cb52f8999c.zip llvm-c0b468523c9c5517e61a197e7c1fe6cb52f8999c.tar.gz llvm-c0b468523c9c5517e61a197e7c1fe6cb52f8999c.tar.bz2 |
[ARM] Add NEON support for ISD::ABDS/ABDU nodes. (#94504)
As noted on #94466, NEON has ABDS/ABDU instructions but only handles them via intrinsics, plus some VABDL custom patterns.
This patch flags basic ABDS/ABDU for neon types as legal and updates all tablegen patterns to use abds/abdu instead.
Fixes #94466
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/ARM/ARMISelLowering.cpp | 16 | ||||
-rw-r--r-- | llvm/lib/Target/ARM/ARMInstrNEON.td | 24 |
2 files changed, 25 insertions, 15 deletions
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 5212d2c..78aaaca 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -205,9 +205,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) { setOperationAction(ISD::SDIVREM, VT, Expand); setOperationAction(ISD::UDIVREM, VT, Expand); - if (!VT.isFloatingPoint() && - VT != MVT::v2i64 && VT != MVT::v1i64) - for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) + if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64) + for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX, + ISD::UMIN, ISD::UMAX}) setOperationAction(Opcode, VT, Legal); if (!VT.isFloatingPoint()) for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT}) @@ -4174,7 +4174,15 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, } case Intrinsic::arm_neon_vabs: return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(), - Op.getOperand(1)); + Op.getOperand(1)); + case Intrinsic::arm_neon_vabds: + if (Op.getValueType().isInteger()) + return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + return SDValue(); + case Intrinsic::arm_neon_vabdu: + return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); case Intrinsic::arm_neon_vmulls: case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 21a5817..c600478 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -5640,10 +5640,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, // VABD : Vector Absolute Difference defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "s", int_arm_neon_vabds, 1>; + "vabd", "s", abds, 1>; defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabd", "u", int_arm_neon_vabdu, 1>; + "vabd", "u", abdu, 1>; def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND, "vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>; def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, @@ -5657,20 +5657,22 @@ def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ, // VABDL : Vector Absolute Difference Long (Q = | D - D |) defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, - "vabdl", "s", int_arm_neon_vabds, zext, 1>; + "vabdl", "s", abds, zext, 1>; defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, - "vabdl", "u", int_arm_neon_vabdu, zext, 1>; + "vabdl", "u", abdu, zext, 1>; let Predicates = [HasNEON] in { -def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))), +def : Pat<(v8i16 (zext (abdu (v8i8 DPR:$opA), (v8i8 DPR:$opB)))), (VABDLuv8i16 DPR:$opA, DPR:$opB)>; -def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))), +def : Pat<(v4i32 (zext (abdu (v4i16 DPR:$opA), (v4i16 DPR:$opB)))), (VABDLuv4i32 DPR:$opA, DPR:$opB)>; +def : Pat<(v2i64 (zext (abdu (v2i32 DPR:$opA), (v2i32 DPR:$opB)))), + (VABDLuv2i64 DPR:$opA, DPR:$opB)>; } // ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the // shift/xor pattern for ABS. - +// TODO: Remove me. def abd_shr : PatFrag<(ops node:$in1, node:$in2, node:$shift), (ARMvshrsImm (sub (zext node:$in1), @@ -5686,15 +5688,15 @@ def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)), // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "s", int_arm_neon_vabds, add>; + "vaba", "s", abds, add>; defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ, - "vaba", "u", int_arm_neon_vabdu, add>; + "vaba", "u", abdu, add>; // VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |) defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD, - "vabal", "s", int_arm_neon_vabds, zext, add>; + "vabal", "s", abds, zext, add>; defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD, - "vabal", "u", int_arm_neon_vabdu, zext, add>; + "vabal", "u", abdu, zext, add>; // Vector Maximum and Minimum. |