diff options
author | Hongyu Wang <hongyu.wang@intel.com> | 2019-12-17 01:50:35 +0000 |
---|---|---|
committer | Hongtao Liu <liuhongt@gcc.gnu.org> | 2019-12-17 01:50:35 +0000 |
commit | da86c5af207cb04869108aa79b2d0117752e291a (patch) | |
tree | 3f900a6d3da0a393f0158d04091da841c165eaa6 | |
parent | 62dd2904f05ee752bbeff43a196b90ecd7b94616 (diff) | |
download | gcc-da86c5af207cb04869108aa79b2d0117752e291a.zip gcc-da86c5af207cb04869108aa79b2d0117752e291a.tar.gz gcc-da86c5af207cb04869108aa79b2d0117752e291a.tar.bz2 |
Add abs pattern to handle {si,di} mode abs to avoid pmax/cmove conversion.
2019-12-17 Hongyu Wang <hongyu.wang@intel.com>
gcc/
PR target/92651
* config/i386/i386.h (TARGET_EXPAND_ABS): New macro.
* config/i386/x86-tune.def (X86_TUNE_EXPAND_ABS): New.
* config/i386/i386.md (abs<SWI48x>2): New define_expand.
gcc/testsuite
* gcc.target/i386/pr92651.c: New testcase.
From-SVN: r279452
-rw-r--r-- | gcc/ChangeLog | 7 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 39 | ||||
-rw-r--r-- | gcc/config/i386/x86-tune.def | 7 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 4 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr92651.c | 16 |
6 files changed, 75 insertions, 0 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 9a37d82..bd1a252 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,10 @@ +2019-12-17 Hongyu Wang <hongyu.wang@intel.com> + + PR target/92651 + * config/i386/i386.h (TARGET_EXPAND_ABS): New macro. + * config/i386/x86-tune.def (X86_TUNE_EXPAND_ABS): New. + * config/i386/i386.md (abs<SWI48x>2): New define_expand. + 2019-12-17 H.J. Lu <hjl.tools@gmail.com> PR target/92807 diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 2542cb3..65f6c76 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -596,6 +596,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE] #define TARGET_EMIT_VZEROUPPER \ ix86_tune_features[X86_TUNE_EMIT_VZEROUPPER] +#define TARGET_EXPAND_ABS \ + ix86_tune_features[X86_TUNE_EXPAND_ABS] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 66a6f20..cf4a0cc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -9669,6 +9669,45 @@ "#" [(set_attr "isa" "noavx,noavx,avx,avx")]) +;; Special expand pattern to handle integer mode abs + +(define_expand "abs<mode>2" + [(set (match_operand:SWI48x 0 "register_operand") + (abs:SWI48x + (match_operand:SWI48x 1 "register_operand")))] + "TARGET_EXPAND_ABS" + { + machine_mode mode = <MODE>mode; + + /* Generate rtx abs using abs (x) = (((signed) x >> (W-1)) ^ x) - + ((signed) x >> (W-1)) */ + rtx shift_amount = gen_int_shift_amount (mode, + GET_MODE_PRECISION (mode) + - 1); + shift_amount = convert_modes (E_QImode, GET_MODE (shift_amount), + shift_amount, 1); + rtx shift_dst = gen_reg_rtx (mode); + rtx shift_op = gen_rtx_SET (shift_dst, + gen_rtx_fmt_ee (ASHIFTRT, mode, + operands[1], shift_amount)); + rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, + FLAGS_REG)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, shift_op, + clobber))); + + rtx xor_op = gen_rtx_SET (operands[0], + gen_rtx_fmt_ee (XOR, mode, shift_dst, + operands[1])); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, xor_op, clobber))); + + rtx minus_op = gen_rtx_SET (operands[0], + gen_rtx_fmt_ee (MINUS, mode, + operands[0], shift_dst)); + emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, minus_op, + clobber))); + DONE; + }) + (define_expand "<code><mode>2" [(set (match_operand:X87MODEF 0 "register_operand") (absneg:X87MODEF (match_operand:X87MODEF 1 "register_operand")))] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 328535d..58a81e5 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -317,6 +317,13 @@ DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", DEF_TUNE (X86_TUNE_USE_XCHG_FOR_ATOMIC_STORE, "use_xchg_for_atomic_store", m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC) +/* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by + generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) - + (signed) x >> (W-1)) instead of cmove or SSE max/abs instructions. */ +DEF_TUNE (X86_TUNE_EXPAND_ABS, "expand_abs", + m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT + | m_GOLDMONT_PLUS | m_TREMONT ) + /*****************************************************************************/ /* 387 instruction selection tuning */ /*****************************************************************************/ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 4142c50..5fff39e 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,7 @@ +2019-12-17 Hongyu Wang <hongyu.wang@intel.com> + + * gcc.target/i386/pr92651.c: New testcase. + 2019-12-17 H.J. Lu <hjl.tools@gmail.com> PR target/92807 diff --git a/gcc/testsuite/gcc.target/i386/pr92651.c b/gcc/testsuite/gcc.target/i386/pr92651.c new file mode 100644 index 0000000..3d0c3c7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr92651.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=corei7" } */ + +#include <stdlib.h> + +int foo(unsigned char a, unsigned char b) +{ + int isum=abs(a - b); + return isum; +} + +/* { dg-final { scan-assembler-not "cmov*" } } */ +/* { dg-final { scan-assembler "(cltd|cdq|shr)" } } */ +/* { dg-final { scan-assembler-times "xor" 1 } } */ +/* { dg-final { scan-assembler-times "sub" 2 } } */ + |