diff options
author | Kyrylo Tkachov <kyrylo.tkachov@arm.com> | 2017-06-29 09:21:57 +0000 |
---|---|---|
committer | Michael Collison <collison@gcc.gnu.org> | 2017-06-29 09:21:57 +0000 |
commit | 7813b280435f2e19c53df9f8b04a3d28bb561aa8 (patch) | |
tree | 98563d3d1aa4ffabc13c0d3178f3f9ad5dfe9233 /gcc/config | |
parent | 696bafe61327dbf2ce2dee7262af8f18b23fcfa1 (diff) | |
download | gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.zip gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.tar.gz gcc-7813b280435f2e19c53df9f8b04a3d28bb561aa8.tar.bz2 |
re PR target/70119 (AArch64 should take advantage of implicit truncation of variable shift amount without defining SHIFT_COUNT_TRUNCATED)
2017-06-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Michael Collison <michael.collison@arm.com>
PR target/70119
* config/aarch64/aarch64.md (*aarch64_<optab>_reg_<mode>3_mask1):
New pattern.
(*aarch64_reg_<mode>3_neg_mask2): New pattern.
(*aarch64_reg_<mode>3_minus_mask): New pattern.
(*aarch64_<optab>_reg_di3_mask2): New pattern.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Account for cost
of shift when the shift amount is masked with constant equal to
the size of the mode.
* config/aarch64/predicates.md (subreg_lowpart_operator): New
predicate.
2017-06-29 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Michael Collison <michael.collison@arm.com>
PR target/70119
* gcc.target/aarch64/var_shift_mask_1.c: New test.
Co-Authored-By: Michael Collison <michael.collison@arm.com>
From-SVN: r249774
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64.c | 50 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 91 | ||||
-rw-r--r-- | gcc/config/aarch64/predicates.md | 4 |
3 files changed, 131 insertions, 14 deletions
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 593263f..5cf41fc 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7541,17 +7541,26 @@ cost_plus: } else { - if (speed) + if (VECTOR_MODE_P (mode)) { - if (VECTOR_MODE_P (mode)) - { - /* Vector shift (register). */ - *cost += extra_cost->vect.alu; - } - else + if (speed) + /* Vector shift (register). */ + *cost += extra_cost->vect.alu; + } + else + { + if (speed) + /* LSLV. */ + *cost += extra_cost->alu.shift_reg; + + if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) + && CONST_INT_P (XEXP (op1, 1)) + && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) { - /* LSLV. */ - *cost += extra_cost->alu.shift_reg; + *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); + /* We already demanded XEXP (op1, 0) to be REG_P, so + don't recurse into it. */ + return true; } } return false; /* All arguments need to be in registers. */ @@ -7580,14 +7589,27 @@ cost_plus: } else { - - /* ASR (register) and friends. */ - if (speed) + if (VECTOR_MODE_P (mode)) { - if (VECTOR_MODE_P (mode)) + if (speed) + /* Vector shift (register). */ *cost += extra_cost->vect.alu; - else + } + else + { + if (speed) + /* ASR (register) and friends. */ *cost += extra_cost->alu.shift_reg; + + if (GET_CODE (op1) == AND && REG_P (XEXP (op1, 0)) + && CONST_INT_P (XEXP (op1, 1)) + && INTVAL (XEXP (op1, 1)) == GET_MODE_BITSIZE (mode) - 1) + { + *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed); + /* We already demanded XEXP (op1, 0) to be REG_P, so + don't recurse into it. */ + return true; + } } return false; /* All arguments need to be in registers. */ } diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 6bdbf65..e6e7e64 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -3942,6 +3942,97 @@ } ) +;; When the LSL, LSR, ASR, ROR instructions operate on all register arguments +;; they truncate the shift/rotate amount by the size of the registers they +;; operate on: 32 for W-regs, 64 for X-regs. This allows us to optimise away +;; such redundant masking instructions. GCC can do that automatically when +;; SHIFT_COUNT_TRUNCATED is true, but we can't enable it for TARGET_SIMD +;; because some of the SISD shift alternatives don't perform this truncations. +;; So this pattern exists to catch such cases. + +(define_insn "*aarch64_<optab>_reg_<mode>3_mask1" + [(set (match_operand:GPI 0 "register_operand" "=r") + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operator 4 "subreg_lowpart_operator" + [(and:GPI (match_operand:GPI 2 "register_operand" "r") + (match_operand 3 "const_int_operand" "n"))])))] + "(~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0" + "<shift>\t%<w>0, %<w>1, %<w>2" + [(set_attr "type" "shift_reg")] +) + +(define_insn_and_split "*aarch64_reg_<mode>3_neg_mask2" + [(set (match_operand:GPI 0 "register_operand" "=&r") + (SHIFT:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operator 4 "subreg_lowpart_operator" + [(neg:SI (and:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "const_int_operand" "n")))])))] + "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0)" + "#" + "&& true" + [(const_int 0)] + { + rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode) + : operands[0]); + emit_insn (gen_negsi2 (tmp, operands[2])); + + rtx and_op = gen_rtx_AND (SImode, tmp, operands[3]); + rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[4]), and_op, + SUBREG_BYTE (operands[4])); + emit_insn (gen_<optab><mode>3 (operands[0], operands[1], subreg_tmp)); + DONE; + } +) + +(define_insn_and_split "*aarch64_reg_<mode>3_minus_mask" + [(set (match_operand:GPI 0 "register_operand" "=&r") + (ashift:GPI + (match_operand:GPI 1 "register_operand" "r") + (minus:QI (match_operand 2 "const_int_operand" "n") + (match_operator 5 "subreg_lowpart_operator" + [(and:SI (match_operand:SI 3 "register_operand" "r") + (match_operand 4 "const_int_operand" "n"))]))))] + "((~INTVAL (operands[4]) & (GET_MODE_BITSIZE (<MODE>mode) - 1)) == 0) + && INTVAL (operands[2]) == GET_MODE_BITSIZE (<MODE>mode)" + "#" + "&& true" + [(const_int 0)] + { + rtx tmp = (can_create_pseudo_p () ? gen_reg_rtx (SImode) + : operands[0]); + + emit_insn (gen_negsi2 (tmp, operands[3])); + + rtx and_op = gen_rtx_AND (SImode, tmp, operands[4]); + rtx subreg_tmp = gen_rtx_SUBREG (GET_MODE (operands[5]), and_op, + SUBREG_BYTE (operands[5])); + + emit_insn (gen_ashl<mode>3 (operands[0], operands[1], subreg_tmp)); + DONE; + } +) + +(define_insn "*aarch64_<optab>_reg_di3_mask2" + [(set (match_operand:DI 0 "register_operand" "=r") + (SHIFT:DI + (match_operand:DI 1 "register_operand" "r") + (match_operator 4 "subreg_lowpart_operator" + [(and:SI (match_operand:SI 2 "register_operand" "r") + (match_operand 3 "aarch64_shift_imm_di" "Usd"))])))] + "((~INTVAL (operands[3]) & (GET_MODE_BITSIZE (DImode)-1)) == 0)" +{ + rtx xop[3]; + xop[0] = operands[0]; + xop[1] = operands[1]; + xop[2] = gen_lowpart (GET_MODE (operands[4]), operands[2]); + output_asm_insn ("<shift>\t%x0, %x1, %x2", xop); + return ""; +} + [(set_attr "type" "shift_reg")] +) + ;; Logical left shift using SISD or Integer instruction (define_insn "*aarch64_ashl_sisd_or_int_<mode>3" [(set (match_operand:GPI 0 "register_operand" "=r,r,w,w") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index cd7ded9..ad8a43c 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -35,6 +35,10 @@ (and (match_code "const_int") (match_test "op == CONST0_RTX (mode)"))) +(define_special_predicate "subreg_lowpart_operator" + (and (match_code "subreg") + (match_test "subreg_lowpart_p (op)"))) + (define_predicate "aarch64_ccmp_immediate" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), -31, 31)"))) |