diff options
author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2024-06-14 10:22:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-14 10:22:13 +0100 |
commit | 4bccd25467ce591869dad41c8b7c550093c20f1b (patch) | |
tree | 39362e5ae8173dfb9d545eb785bac84bb2c7ad07 | |
parent | ab0d01a5f0f17f20b106b0f6cc6d1b7d13cf4d65 (diff) | |
download | llvm-4bccd25467ce591869dad41c8b7c550093c20f1b.zip llvm-4bccd25467ce591869dad41c8b7c550093c20f1b.tar.gz llvm-4bccd25467ce591869dad41c8b7c550093c20f1b.tar.bz2 |
[AArch64] LowerAVG - fallback to default expansion (#95416)
The TargetLowering::expandAVG implementations now match or are better than the AArch64 override.
-rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 46 | ||||
-rw-r--r-- | llvm/test/CodeGen/AArch64/sve-hadd.ll | 192 |
2 files changed, 74 insertions, 164 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index af8b9d9..394b741 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -15007,55 +15007,13 @@ AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return SDValue(); } -// When x and y are extended, lower: -// avgfloor(x, y) -> (x + y) >> 1 -// avgceil(x, y) -> (x + y + 1) >> 1 - -// Otherwise, lower to: -// avgfloor(x, y) -> (x >> 1) + (y >> 1) + (x & y & 1) -// avgceil(x, y) -> (x >> 1) + (y >> 1) + ((x || y) & 1) SDValue AArch64TargetLowering::LowerAVG(SDValue Op, SelectionDAG &DAG, unsigned NewOp) const { if (Subtarget->hasSVE2()) return LowerToPredicatedOp(Op, DAG, NewOp); - SDLoc dl(Op); - SDValue OpA = Op->getOperand(0); - SDValue OpB = Op->getOperand(1); - EVT VT = Op.getValueType(); - bool IsCeil = - (Op->getOpcode() == ISD::AVGCEILS || Op->getOpcode() == ISD::AVGCEILU); - bool IsSigned = - (Op->getOpcode() == ISD::AVGFLOORS || Op->getOpcode() == ISD::AVGCEILS); - unsigned ShiftOpc = IsSigned ? ISD::SRA : ISD::SRL; - - assert(VT.isScalableVector() && "Only expect to lower scalable vector op!"); - - auto IsZeroExtended = [&DAG](SDValue &Node) { - KnownBits Known = DAG.computeKnownBits(Node, 0); - return Known.Zero.isSignBitSet(); - }; - - auto IsSignExtended = [&DAG](SDValue &Node) { - return (DAG.ComputeNumSignBits(Node, 0) > 1); - }; - - SDValue ConstantOne = DAG.getConstant(1, dl, VT); - if ((!IsSigned && IsZeroExtended(OpA) && IsZeroExtended(OpB)) || - (IsSigned && IsSignExtended(OpA) && IsSignExtended(OpB))) { - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, OpA, OpB); - if (IsCeil) - Add = DAG.getNode(ISD::ADD, dl, VT, Add, ConstantOne); - return DAG.getNode(ShiftOpc, dl, VT, Add, ConstantOne); - } - - SDValue ShiftOpA = DAG.getNode(ShiftOpc, dl, VT, OpA, ConstantOne); - SDValue ShiftOpB = DAG.getNode(ShiftOpc, dl, VT, OpB, ConstantOne); - - SDValue tmp = DAG.getNode(IsCeil ? ISD::OR : ISD::AND, dl, VT, OpA, OpB); - tmp = DAG.getNode(ISD::AND, dl, VT, tmp, ConstantOne); - SDValue Add = DAG.getNode(ISD::ADD, dl, VT, ShiftOpA, ShiftOpB); - return DAG.getNode(ISD::ADD, dl, VT, Add, tmp); + // Default to expand. + return SDValue(); } SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op, diff --git a/llvm/test/CodeGen/AArch64/sve-hadd.ll b/llvm/test/CodeGen/AArch64/sve-hadd.ll index 3fead88..6017e13 100644 --- a/llvm/test/CodeGen/AArch64/sve-hadd.ll +++ b/llvm/test/CodeGen/AArch64/sve-hadd.ll @@ -5,12 +5,10 @@ define <vscale x 2 x i64> @hadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) { ; SVE-LABEL: hadds_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: add z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v2i64: @@ -30,12 +28,10 @@ entry: define <vscale x 2 x i64> @hadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) { ; SVE-LABEL: hadds_v2i64_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: add z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v2i64_lsh: @@ -55,12 +51,10 @@ entry: define <vscale x 2 x i64> @haddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) { ; SVE-LABEL: haddu_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.d, z1.d, #1 -; SVE-NEXT: lsr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: lsr z1.d, z2.d, #1 +; SVE-NEXT: add z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v2i64: @@ -146,12 +140,10 @@ entry: define <vscale x 4 x i32> @hadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) { ; SVE-LABEL: hadds_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: add z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v4i32: @@ -171,12 +163,10 @@ entry: define <vscale x 4 x i32> @hadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) { ; SVE-LABEL: hadds_v4i32_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: add z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v4i32_lsh: @@ -196,12 +186,10 @@ entry: define <vscale x 4 x i32> @haddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) { ; SVE-LABEL: haddu_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.s, z1.s, #1 -; SVE-NEXT: lsr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: lsr z1.s, z2.s, #1 +; SVE-NEXT: add z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v4i32: @@ -360,12 +348,10 @@ entry: define <vscale x 8 x i16> @hadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) { ; SVE-LABEL: hadds_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: add z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v8i16: @@ -385,12 +371,10 @@ entry: define <vscale x 8 x i16> @hadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) { ; SVE-LABEL: hadds_v8i16_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: add z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v8i16_lsh: @@ -410,12 +394,10 @@ entry: define <vscale x 8 x i16> @haddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) { ; SVE-LABEL: haddu_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.h, z1.h, #1 -; SVE-NEXT: lsr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: lsr z1.h, z2.h, #1 +; SVE-NEXT: add z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v8i16: @@ -574,12 +556,10 @@ entry: define <vscale x 16 x i8> @hadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) { ; SVE-LABEL: hadds_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: add z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v16i8: @@ -599,12 +579,10 @@ entry: define <vscale x 16 x i8> @hadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) { ; SVE-LABEL: hadds_v16i8_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: add z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: hadds_v16i8_lsh: @@ -624,12 +602,10 @@ entry: define <vscale x 16 x i8> @haddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) { ; SVE-LABEL: haddu_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.b, z1.b, #1 -; SVE-NEXT: lsr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: and z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: lsr z1.b, z2.b, #1 +; SVE-NEXT: add z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: haddu_v16i8: @@ -649,12 +625,10 @@ entry: define <vscale x 2 x i64> @rhadds_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) { ; SVE-LABEL: rhadds_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: sub z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v2i64: @@ -675,12 +649,10 @@ entry: define <vscale x 2 x i64> @rhadds_v2i64_lsh(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) { ; SVE-LABEL: rhadds_v2i64_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.d, z1.d, #1 -; SVE-NEXT: asr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: asr z1.d, z2.d, #1 +; SVE-NEXT: sub z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v2i64_lsh: @@ -701,12 +673,10 @@ entry: define <vscale x 2 x i64> @rhaddu_v2i64(<vscale x 2 x i64> %s0, <vscale x 2 x i64> %s1) { ; SVE-LABEL: rhaddu_v2i64: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.d, z1.d, #1 -; SVE-NEXT: lsr z3.d, z0.d, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.d, z3.d, z2.d -; SVE-NEXT: and z0.d, z0.d, #0x1 -; SVE-NEXT: add z0.d, z1.d, z0.d +; SVE-NEXT: lsr z1.d, z2.d, #1 +; SVE-NEXT: sub z0.d, z0.d, z1.d ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v2i64: @@ -805,12 +775,10 @@ entry: define <vscale x 4 x i32> @rhadds_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) { ; SVE-LABEL: rhadds_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: sub z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v4i32: @@ -831,12 +799,10 @@ entry: define <vscale x 4 x i32> @rhadds_v4i32_lsh(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) { ; SVE-LABEL: rhadds_v4i32_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.s, z1.s, #1 -; SVE-NEXT: asr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: asr z1.s, z2.s, #1 +; SVE-NEXT: sub z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v4i32_lsh: @@ -857,12 +823,10 @@ entry: define <vscale x 4 x i32> @rhaddu_v4i32(<vscale x 4 x i32> %s0, <vscale x 4 x i32> %s1) { ; SVE-LABEL: rhaddu_v4i32: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.s, z1.s, #1 -; SVE-NEXT: lsr z3.s, z0.s, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.s, z3.s, z2.s -; SVE-NEXT: and z0.s, z0.s, #0x1 -; SVE-NEXT: add z0.s, z1.s, z0.s +; SVE-NEXT: lsr z1.s, z2.s, #1 +; SVE-NEXT: sub z0.s, z0.s, z1.s ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v4i32: @@ -1040,12 +1004,10 @@ entry: define <vscale x 8 x i16> @rhadds_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) { ; SVE-LABEL: rhadds_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: sub z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v8i16: @@ -1066,12 +1028,10 @@ entry: define <vscale x 8 x i16> @rhadds_v8i16_lsh(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) { ; SVE-LABEL: rhadds_v8i16_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.h, z1.h, #1 -; SVE-NEXT: asr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: asr z1.h, z2.h, #1 +; SVE-NEXT: sub z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v8i16_lsh: @@ -1092,12 +1052,10 @@ entry: define <vscale x 8 x i16> @rhaddu_v8i16(<vscale x 8 x i16> %s0, <vscale x 8 x i16> %s1) { ; SVE-LABEL: rhaddu_v8i16: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.h, z1.h, #1 -; SVE-NEXT: lsr z3.h, z0.h, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.h, z3.h, z2.h -; SVE-NEXT: and z0.h, z0.h, #0x1 -; SVE-NEXT: add z0.h, z1.h, z0.h +; SVE-NEXT: lsr z1.h, z2.h, #1 +; SVE-NEXT: sub z0.h, z0.h, z1.h ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v8i16: @@ -1275,12 +1233,10 @@ entry: define <vscale x 16 x i8> @rhadds_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) { ; SVE-LABEL: rhadds_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: sub z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v16i8: @@ -1301,12 +1257,10 @@ entry: define <vscale x 16 x i8> @rhadds_v16i8_lsh(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) { ; SVE-LABEL: rhadds_v16i8_lsh: ; SVE: // %bb.0: // %entry -; SVE-NEXT: asr z2.b, z1.b, #1 -; SVE-NEXT: asr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: asr z1.b, z2.b, #1 +; SVE-NEXT: sub z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: rhadds_v16i8_lsh: @@ -1327,12 +1281,10 @@ entry: define <vscale x 16 x i8> @rhaddu_v16i8(<vscale x 16 x i8> %s0, <vscale x 16 x i8> %s1) { ; SVE-LABEL: rhaddu_v16i8: ; SVE: // %bb.0: // %entry -; SVE-NEXT: lsr z2.b, z1.b, #1 -; SVE-NEXT: lsr z3.b, z0.b, #1 +; SVE-NEXT: eor z2.d, z0.d, z1.d ; SVE-NEXT: orr z0.d, z0.d, z1.d -; SVE-NEXT: add z1.b, z3.b, z2.b -; SVE-NEXT: and z0.b, z0.b, #0x1 -; SVE-NEXT: add z0.b, z1.b, z0.b +; SVE-NEXT: lsr z1.b, z2.b, #1 +; SVE-NEXT: sub z0.b, z0.b, z1.b ; SVE-NEXT: ret ; ; SVE2-LABEL: rhaddu_v16i8: |