aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSegher Boessenkool <segher@kernel.crashing.org>2016-12-09 20:31:06 +0100
committerSegher Boessenkool <segher@gcc.gnu.org>2016-12-09 20:31:06 +0100
commitbb0f9c0249ee6a1b53e6ae8bdd9d3543991c7291 (patch)
tree025154146b8b80c430204be6d22a894a97eea00f
parent59ab1319cb6664c39d8cf7f05bcf1ec0a625ecdb (diff)
downloadgcc-bb0f9c0249ee6a1b53e6ae8bdd9d3543991c7291.zip
gcc-bb0f9c0249ee6a1b53e6ae8bdd9d3543991c7291.tar.gz
gcc-bb0f9c0249ee6a1b53e6ae8bdd9d3543991c7291.tar.bz2
rs6000: clz/ctz/ffs improvement (PR78683)
On CPUs that implement popcnt[wd] but not cnttz[wd] we can do better for the ctz sequences than we do today. CL[TZ]_DEFINED_VALUE_AT_ZERO can return 2, since we always return the same fixed value (only dependent on TARGET_* options). PR target/78683 * config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. (CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. Handle TARGET_POPCNTD the same as TARGET_CTZ. * config/rs6000/rs6000.md (ctz<mode>2): Reimplement. (ffs<mode>2): Reimplement. From-SVN: r243499
-rw-r--r--gcc/ChangeLog10
-rw-r--r--gcc/config/rs6000/rs6000.h11
-rw-r--r--gcc/config/rs6000/rs6000.md62
3 files changed, 48 insertions, 35 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 96853f2..269f785 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2016-12-09 Segher Boessenkool <segher@kernel.crashing.org>
+
+ PR target/78683
+ * config/rs6000/rs6000.h (CLZ_DEFINED_VALUE_AT_ZERO): Use
+ GET_MODE_BITSIZE. Return 2.
+ (CTZ_DEFINED_VALUE_AT_ZERO): Use GET_MODE_BITSIZE. Return 2. Handle
+ TARGET_POPCNTD the same as TARGET_CTZ.
+ * config/rs6000/rs6000.md (ctz<mode>2): Reimplement.
+ (ffs<mode>2): Reimplement.
+
2016-12-09 Andre Vieira <andre.simoesdiasvieira@arm.com>
PR rtl-optimization/78255
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 5d56927..fe314bf 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -2199,14 +2199,15 @@ do { \
/* The cntlzw and cntlzd instructions return 32 and 64 for input of zero. */
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
- ((VALUE) = ((MODE) == SImode ? 32 : 64), 1)
+ ((VALUE) = GET_MODE_BITSIZE (MODE), 2)
/* The CTZ patterns that are implemented in terms of CLZ return -1 for input of
- zero. The hardware instructions added in Power9 return 32 or 64. */
+ zero. The hardware instructions added in Power9 and the sequences using
+ popcount return 32 or 64. */
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
- ((!TARGET_CTZ) \
- ? ((VALUE) = -1, 1) \
- : ((VALUE) = ((MODE) == SImode ? 32 : 64), 1))
+ (TARGET_CTZ || TARGET_POPCNTD \
+ ? ((VALUE) = GET_MODE_BITSIZE (MODE), 2) \
+ : ((VALUE) = -1, 2))
/* Specify the machine mode that pointers have.
After generation of rtl, the compiler makes no further distinction
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4726d73..777b996 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2220,17 +2220,8 @@
[(set_attr "type" "cntlz")])
(define_expand "ctz<mode>2"
- [(set (match_dup 2)
- (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
- (set (match_dup 3)
- (and:GPR (match_dup 1)
- (match_dup 2)))
- (set (match_dup 4)
- (clz:GPR (match_dup 3)))
- (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
- (minus:GPR (match_dup 5)
- (match_dup 4)))
- (clobber (reg:GPR CA_REGNO))])]
+ [(set (match_operand:GPR 0 "gpc_reg_operand")
+ (ctz:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
""
{
if (TARGET_CTZ)
@@ -2239,10 +2230,26 @@
DONE;
}
- operands[2] = gen_reg_rtx (<MODE>mode);
- operands[3] = gen_reg_rtx (<MODE>mode);
- operands[4] = gen_reg_rtx (<MODE>mode);
- operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+ rtx tmp1 = gen_reg_rtx (<MODE>mode);
+ rtx tmp2 = gen_reg_rtx (<MODE>mode);
+ rtx tmp3 = gen_reg_rtx (<MODE>mode);
+
+ if (TARGET_POPCNTD)
+ {
+ emit_insn (gen_add<mode>3 (tmp1, operands[1], constm1_rtx));
+ emit_insn (gen_one_cmpl<mode>2 (tmp2, operands[1]));
+ emit_insn (gen_and<mode>3 (tmp3, tmp1, tmp2));
+ emit_insn (gen_popcntd<mode>2 (operands[0], tmp3));
+ }
+ else
+ {
+ emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
+ emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
+ emit_insn (gen_clz<mode>2 (tmp3, tmp2));
+ emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits> - 1), tmp3));
+ }
+
+ DONE;
})
(define_insn "ctz<mode>2_hw"
@@ -2253,23 +2260,18 @@
[(set_attr "type" "cntlz")])
(define_expand "ffs<mode>2"
- [(set (match_dup 2)
- (neg:GPR (match_operand:GPR 1 "gpc_reg_operand" "")))
- (set (match_dup 3)
- (and:GPR (match_dup 1)
- (match_dup 2)))
- (set (match_dup 4)
- (clz:GPR (match_dup 3)))
- (parallel [(set (match_operand:GPR 0 "gpc_reg_operand" "")
- (minus:GPR (match_dup 5)
- (match_dup 4)))
- (clobber (reg:GPR CA_REGNO))])]
+ [(set (match_operand:GPR 0 "gpc_reg_operand")
+ (ffs:GPR (match_operand:GPR 1 "gpc_reg_operand")))]
""
{
- operands[2] = gen_reg_rtx (<MODE>mode);
- operands[3] = gen_reg_rtx (<MODE>mode);
- operands[4] = gen_reg_rtx (<MODE>mode);
- operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+ rtx tmp1 = gen_reg_rtx (<MODE>mode);
+ rtx tmp2 = gen_reg_rtx (<MODE>mode);
+ rtx tmp3 = gen_reg_rtx (<MODE>mode);
+ emit_insn (gen_neg<mode>2 (tmp1, operands[1]));
+ emit_insn (gen_and<mode>3 (tmp2, operands[1], tmp1));
+ emit_insn (gen_clz<mode>2 (tmp3, tmp2));
+ emit_insn (gen_sub<mode>3 (operands[0], GEN_INT (<bits>), tmp3));
+ DONE;
})