aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2022-06-07 07:49:40 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2022-06-07 07:49:40 +0100
commitc4320bde42c6497b701e2e6b8f1c5069bed19818 (patch)
treea32c7f647ec3f9ef4a79c2e97812b4c89e12007f
parent63eab5d5775da6c656f3527ad974b7dc25f5ad5e (diff)
downloadgcc-c4320bde42c6497b701e2e6b8f1c5069bed19818.zip
gcc-c4320bde42c6497b701e2e6b8f1c5069bed19818.tar.gz
gcc-c4320bde42c6497b701e2e6b8f1c5069bed19818.tar.bz2
Recognize vpcmov in combine with -mxop on x86.
By way of an apology for causing PR target/105791, where I'd overlooked the need to support V1TImode in TARGET_XOP's vpcmov instruction, this patch further improves support for TARGET_XOP's vpcmov instruction, by recognizing it in combine. Currently, the test case: typedef int v4si __attribute__ ((vector_size (16))); v4si foo(v4si c, v4si t, v4si f) { return (c&t)|(~c&f); } on x86_64 with -O2 -mxop generates: vpxor %xmm2, %xmm1, %xmm1 vpand %xmm0, %xmm1, %xmm1 vpxor %xmm2, %xmm1, %xmm0 ret but with this patch now generates: vpcmov %xmm0, %xmm2, %xmm1, %xmm0 ret On its own, the new combine splitter works fine on TARGET_64BIT, but alas with -m32 combine incorrectly thinks the replacement instruction is more expensive, as IF_THEN_ELSE isn't currently/correctly handled in ix86_rtx_costs. So to avoid the need for a target selector in the new tescase, I've updated ix86_rtx_costs to report that AMD's vpcmov has a latency of two cycles [it's now an obsolete instruction set extension and there's unlikely to ever be a processor where this instruction has a different timing], and while there I also added rtx_costs for x86_64's integer conditional move instructions (which have single cycle latency). 2022-06-07 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog * config/i386/i386.cc (ix86_rtx_costs): Add a new case for IF_THEN_ELSE, and provide costs for TARGET_XOP's vpcmov and TARGET_CMOVE's (scalar integer) conditional moves. * config/i386/sse.md (define_split): Recognize XOP's vpcmov from its equivalent (canonical) pxor;pand;pxor sequence. gcc/testsuite/ChangeLog * gcc.target/i386/xop-pcmov3.c: New test case.
-rw-r--r--gcc/config/i386/i386.cc31
-rw-r--r--gcc/config/i386/sse.md24
-rw-r--r--gcc/testsuite/gcc.target/i386/xop-pcmov3.c10
3 files changed, 65 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 11f4ddf..0823161 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -21009,6 +21009,37 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
}
return false;
+ case IF_THEN_ELSE:
+ if (TARGET_XOP
+ && VECTOR_MODE_P (mode)
+ && (GET_MODE_SIZE (mode) == 16 || GET_MODE_SIZE (mode) == 32))
+ {
+ /* vpcmov. */
+ *total = speed ? COSTS_N_INSNS (2) : COSTS_N_BYTES (6);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ if (!REG_P (XEXP (x, 2)))
+ *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
+ return true;
+ }
+ else if (TARGET_CMOVE
+ && SCALAR_INT_MODE_P (mode)
+ && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+ {
+ /* cmov. */
+ *total = COSTS_N_INSNS (1);
+ if (!REG_P (XEXP (x, 0)))
+ *total += rtx_cost (XEXP (x, 0), mode, code, 0, speed);
+ if (!REG_P (XEXP (x, 1)))
+ *total += rtx_cost (XEXP (x, 1), mode, code, 1, speed);
+ if (!REG_P (XEXP (x, 2)))
+ *total += rtx_cost (XEXP (x, 2), mode, code, 2, speed);
+ return true;
+ }
+ return false;
+
default:
return false;
}
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 62688f8..3ca89b9 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -23868,6 +23868,30 @@
"vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "sse4arg")])
+;; Recognize XOP's vpcmov from canonical (xor (and (xor t f) c) f)
+(define_split
+ [(set (match_operand:V_128_256 0 "register_operand")
+ (xor:V_128_256
+ (and:V_128_256
+ (xor:V_128_256 (match_operand:V_128_256 1 "register_operand")
+ (match_operand:V_128_256 2 "register_operand"))
+ (match_operand:V_128_256 3 "nonimmediate_operand"))
+ (match_operand:V_128_256 4 "register_operand")))]
+ "TARGET_XOP
+ && (REGNO (operands[4]) == REGNO (operands[1])
+ || REGNO (operands[4]) == REGNO (operands[2]))"
+ [(set (match_dup 0) (if_then_else:V_128_256 (match_dup 3)
+ (match_dup 5)
+ (match_dup 4)))]
+{
+ /* To handle the commutivity of XOR, operands[4] is either operands[1]
+ or operands[2], we need operands[5] to be the other one. */
+ if (REGNO (operands[4]) == REGNO (operands[1]))
+ operands[5] = operands[2];
+ else
+ operands[5] = operands[1];
+})
+
;; XOP horizontal add/subtract instructions
(define_insn "xop_phadd<u>bw"
[(set (match_operand:V8HI 0 "register_operand" "=x")
diff --git a/gcc/testsuite/gcc.target/i386/xop-pcmov3.c b/gcc/testsuite/gcc.target/i386/xop-pcmov3.c
new file mode 100644
index 0000000..6c40f33
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/xop-pcmov3.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mxop" } */
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si foo(v4si c, v4si t, v4si f)
+{
+ return (c&t)|(~c&f);
+}
+
+/* { dg-final { scan-assembler "vpcmov" } } */