aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPengfei Li <Pengfei.Li2@arm.com>2025-04-29 19:14:42 +0100
committerRichard Sandiford <richard.sandiford@arm.com>2025-04-29 19:14:42 +0100
commitd84fbc516ea57de7e88fce76ff6f342ee808c02e (patch)
tree583180a5b5238762dda3856b99a148ea6840d25d
parent69669180d29cc420b1b1ac86530a4f9573748d81 (diff)
downloadgcc-d84fbc516ea57de7e88fce76ff6f342ee808c02e.zip
gcc-d84fbc516ea57de7e88fce76ff6f342ee808c02e.tar.gz
gcc-d84fbc516ea57de7e88fce76ff6f342ee808c02e.tar.bz2
simplify-rtx: Combine bitwise operations in more cases
This patch transforms RTL expressions of the form (subreg (not X)) into (not (subreg X)) if the subreg is an operand of another binary logical operation. This transformation can expose opportunities to combine more logical operations. For example, it improves the codegen of the following AArch64 NEON intrinsics: vandq_s64(vreinterpretq_s64_s32(vmvnq_s32(a)), vreinterpretq_s64_s32(b)); from: not v0.16b, v0.16b and v0.16b, v0.16b, v1.16b to: bic v0.16b, v1.16b, v0.16b Regression tested on x86_64-linux-gnu, arm-linux-gnueabihf and aarch64-linux-gnu. gcc/ChangeLog: * simplify-rtx.cc (non_paradoxical_subreg_not_p): New function for pattern match of (subreg (not X)). (simplify_with_subreg_not): New function for simplification.
-rw-r--r--gcc/simplify-rtx.cc50
-rw-r--r--gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c17
2 files changed, 67 insertions, 0 deletions
diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index d9aa049..7c4d8e6 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -3032,6 +3032,44 @@ match_plus_neg_pattern (rtx op0, rtx op1, machine_mode mode)
return false;
}
+/* Check if OP matches the pattern of (subreg (not X)) and the subreg is
+ non-paradoxical. */
+
+static bool
+non_paradoxical_subreg_not_p (rtx op)
+{
+ return GET_CODE (op) == SUBREG
+ && !paradoxical_subreg_p (op)
+ && GET_CODE (SUBREG_REG (op)) == NOT;
+}
+
+/* Convert (binop (subreg (not X)) Y) into (binop (not (subreg X)) Y), or
+ (binop X (subreg (not Y))) into (binop X (not (subreg Y))) to expose
+ opportunities to combine another binary logical operation with NOT. */
+
+static rtx
+simplify_with_subreg_not (rtx_code binop, machine_mode mode, rtx op0, rtx op1)
+{
+ rtx opn = NULL_RTX;
+ if (non_paradoxical_subreg_not_p (op0))
+ opn = op0;
+ else if (non_paradoxical_subreg_not_p (op1))
+ opn = op1;
+
+ if (opn == NULL_RTX)
+ return NULL_RTX;
+
+ rtx new_subreg = simplify_gen_subreg (mode,
+ XEXP (SUBREG_REG (opn), 0),
+ GET_MODE (SUBREG_REG (opn)),
+ SUBREG_BYTE (opn));
+ rtx new_not = simplify_gen_unary (NOT, mode, new_subreg, mode);
+ if (opn == op0)
+ return simplify_gen_binary (binop, mode, new_not, op1);
+ else
+ return simplify_gen_binary (binop, mode, op0, new_not);
+}
+
/* Subroutine of simplify_binary_operation. Simplify a binary operation
CODE with result mode MODE, operating on OP0 and OP1. If OP0 and/or
OP1 are constant pool references, TRUEOP0 and TRUEOP1 represent the
@@ -3749,6 +3787,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
&& rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+ tem = simplify_with_subreg_not (code, mode, op0, op1);
+ if (tem)
+ return tem;
+
tem = simplify_byte_swapping_operation (code, mode, op0, op1);
if (tem)
return tem;
@@ -4017,6 +4059,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
&& rtx_equal_p (XEXP (XEXP (op0, 0), 0), op1))
return simplify_gen_binary (IOR, mode, XEXP (op0, 1), op1);
+ tem = simplify_with_subreg_not (code, mode, op0, op1);
+ if (tem)
+ return tem;
+
tem = simplify_byte_swapping_operation (code, mode, op0, op1);
if (tem)
return tem;
@@ -4285,6 +4331,10 @@ simplify_context::simplify_binary_operation_1 (rtx_code code,
return simplify_gen_binary (LSHIFTRT, mode, XEXP (op0, 0), XEXP (op0, 1));
}
+ tem = simplify_with_subreg_not (code, mode, op0, op1);
+ if (tem)
+ return tem;
+
tem = simplify_byte_swapping_operation (code, mode, op0, op1);
if (tem)
return tem;
diff --git a/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
new file mode 100644
index 0000000..1c66f21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/simd/bic_orn_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#include <arm_neon.h>
+
+int64x2_t bic_16b (int32x4_t a, int32x4_t b) {
+ return vandq_s64 (vreinterpretq_s64_s32 (vmvnq_s32 (a)),
+ vreinterpretq_s64_s32 (b));
+}
+
+int16x4_t orn_8b (int32x2_t a, int32x2_t b) {
+ return vorr_s16 (vreinterpret_s16_s32 (a),
+ vreinterpret_s16_s32 (vmvn_s32 (b)));
+}
+
+/* { dg-final { scan-assembler {\tbic\tv[0-9]+\.16b} } } */
+/* { dg-final { scan-assembler {\torn\tv[0-9]+\.8b} } } */