aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2020-06-26 14:56:40 -0700
committerH.J. Lu <hjl.tools@gmail.com>2020-08-25 14:30:38 -0700
commit4f73bf20d9c0ad1cd143db39d73f70841dcc1471 (patch)
treeabb14f1c1e05a81e0137e53ad7448c7293605fa8 /gcc
parent050fc8b27a852007f8bb667999e1c8cfd31f90e1 (diff)
downloadgcc-4f73bf20d9c0ad1cd143db39d73f70841dcc1471.zip
gcc-4f73bf20d9c0ad1cd143db39d73f70841dcc1471.tar.gz
gcc-4f73bf20d9c0ad1cd143db39d73f70841dcc1471.tar.bz2
x86: Change CTZ_DEFINED_VALUE_AT_ZERO to return 0/2
Change CTZ_DEFINED_VALUE_AT_ZERO/CTZ_DEFINED_VALUE_AT_ZERO to return 0/2 to enable table-based clz/ctz optimization: -- Macro: CLZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE) -- Macro: CTZ_DEFINED_VALUE_AT_ZERO (MODE, VALUE) A C expression that indicates whether the architecture defines a value for 'clz' or 'ctz' with a zero operand. A result of '0' indicates the value is undefined. If the value is defined for only the RTL expression, the macro should evaluate to '1'; if the value applies also to the corresponding optab entry (which is normally the case if it expands directly into the corresponding RTL), then the macro should evaluate to '2'. In the cases where the value is defined, VALUE should be set to this value. gcc/ PR target/95863 * config/i386/i386.h (CTZ_DEFINED_VALUE_AT_ZERO): Return 0/2. (CLZ_DEFINED_VALUE_AT_ZERO): Likewise. gcc/testsuite/ PR target/95863 * gcc.target/i386/pr95863-1.c: New test. * gcc.target/i386/pr95863-2.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.h4
-rw-r--r--gcc/testsuite/gcc.target/i386/pr95863-1.c47
-rw-r--r--gcc/testsuite/gcc.target/i386/pr95863-2.c27
3 files changed, 76 insertions, 2 deletions
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 852dd01..92b7475 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2955,9 +2955,9 @@ extern void debug_dispatch_window (int);
/* The value at zero is only defined for the BMI instructions
LZCNT and TZCNT, not the BSR/BSF insns in the original isa. */
#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
- ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 1 : 0)
+ ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_BMI ? 2 : 0)
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
- ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 1 : 0)
+ ((VALUE) = GET_MODE_BITSIZE (MODE), TARGET_LZCNT ? 2 : 0)
/* Flags returned by ix86_get_callcvt (). */
diff --git a/gcc/testsuite/gcc.target/i386/pr95863-1.c b/gcc/testsuite/gcc.target/i386/pr95863-1.c
new file mode 100644
index 0000000..f3918a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95863-1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mbmi" } */
+
+int ctz1 (unsigned x)
+{
+ static const char table[32] =
+ {
+ 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
+ 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
+ };
+
+ return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
+}
+
+int ctz2 (unsigned x)
+{
+#define u 0
+ static short table[64] =
+ {
+ 32, 0, 1,12, 2, 6, u,13, 3, u, 7, u, u, u, u,14,
+ 10, 4, u, u, 8, u, u,25, u, u, u, u, u,21,27,15,
+ 31,11, 5, u, u, u, u, u, 9, u, u,24, u, u,20,26,
+ 30, u, u, u, u,23, u,19,29, u,22,18,28,17,16, u
+ };
+
+ x = (x & -x) * 0x0450FBAF;
+ return table[x >> 26];
+}
+
+int ctz3 (unsigned x)
+{
+ static int table[32] =
+ {
+ 0, 1, 2,24, 3,19, 6,25, 22, 4,20,10,16, 7,12,26,
+ 31,23,18, 5,21, 9,15,11,30,17, 8,14,29,13,28,27
+ };
+
+ if (x == 0) return 32;
+ x = (x & -x) * 0x04D7651F;
+ return table[x >> 27];
+}
+
+/* { dg-final { scan-assembler-times "tzcntl\t" 3 } } */
+/* { dg-final { scan-assembler-times "andl\t" 1 } } */
+/* { dg-final { scan-assembler-not "neg" } } */
+/* { dg-final { scan-assembler-not "imul" } } */
+/* { dg-final { scan-assembler-not "shr" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95863-2.c b/gcc/testsuite/gcc.target/i386/pr95863-2.c
new file mode 100644
index 0000000..cb56dfc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95863-2.c
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O -mbmi" } */
+
+static const unsigned long long magic = 0x03f08c5392f756cdULL;
+
+static const char table[64] = {
+ 0, 1, 12, 2, 13, 22, 17, 3,
+ 14, 33, 23, 36, 18, 58, 28, 4,
+ 62, 15, 34, 26, 24, 48, 50, 37,
+ 19, 55, 59, 52, 29, 44, 39, 5,
+ 63, 11, 21, 16, 32, 35, 57, 27,
+ 61, 25, 47, 49, 54, 51, 43, 38,
+ 10, 20, 31, 56, 60, 46, 53, 42,
+ 9, 30, 45, 41, 8, 40, 7, 6,
+};
+
+int ctz4 (unsigned long long x)
+{
+ unsigned long long lsb = x & -x;
+ return table[(lsb * magic) >> 58];
+}
+
+/* { dg-final { scan-assembler-times "tzcntq\t" 1 } } */
+/* { dg-final { scan-assembler-times "andl\t" 1 } } */
+/* { dg-final { scan-assembler-not "negq" } } */
+/* { dg-final { scan-assembler-not "imulq" } } */
+/* { dg-final { scan-assembler-not "shrq" } } */