aboutsummaryrefslogtreecommitdiff
path: root/gcc/match.pd
diff options
context:
space:
mode:
authorRoger Sayle <roger@nextmovesoftware.com>2021-07-08 11:46:14 +0100
committerRoger Sayle <roger@nextmovesoftware.com>2021-07-08 11:47:55 +0100
commit4c619132b3f14dc5e672a7f2f0e09cb784193559 (patch)
tree63d472f471b0c12d4140ad40b0fc3f53630ae5ec /gcc/match.pd
parent663a014e77709bfbd4145c605b178169eaf334fc (diff)
downloadgcc-4c619132b3f14dc5e672a7f2f0e09cb784193559.zip
gcc-4c619132b3f14dc5e672a7f2f0e09cb784193559.tar.gz
gcc-4c619132b3f14dc5e672a7f2f0e09cb784193559.tar.bz2
PR tree-optimization/40210: Fold (bswap(X)>>C1)&C2 to (X>>C3)&C2 in match.pd
All of the optimizations/transformations mentioned in bugzilla for PR tree-optimization/40210 are already implemented in mainline GCC, with one exception. In comment #5, there's a suggestion that (bswap64(x)>>56)&0xff can be implemented without the bswap as (unsigned char)x, or equivalently x&0xff. This patch implements the above optimization, and closely related variants. For any single bit, (bswap(X)>>C1)&1 can be simplified to (X>>C2)&1, where bit position C2 is the appropriate permutation of C1. Similarly, the bswap can eliminated if the desired set of bits all lie within the same byte, hence (bswap(x)>>8)&255 can always be optimized, as can (bswap(x)>>8)&123. Previously, int foo(long long x) { return (__builtin_bswap64(x) >> 56) & 0xff; } compiled with -O2 to foo: movq %rdi, %rax bswap %rax shrq $56, %rax ret with this patch, it now compiles to foo: movzbl %dil, %eax ret 2021-07-08 Roger Sayle <roger@nextmovesoftware.com> Richard Biener <rguenther@suse.de> gcc/ChangeLog PR tree-optimization/40210 * match.pd (bswap optimizations): Simplify (bswap(x)>>C1)&C2 as (x>>C3)&C2 when possible. Simplify bswap(x)>>C1 as ((T)x)>>C2 when possible. Simplify bswap(x)&C1 as (x>>C2)&C1 when 0<=C1<=255. gcc/testsuite/ChangeLog PR tree-optimization/40210 * gcc.dg/builtin-bswap-13.c: New test. * gcc.dg/builtin-bswap-14.c: New test.
Diffstat (limited to 'gcc/match.pd')
-rw-r--r--gcc/match.pd68
1 files changed, 66 insertions, 2 deletions
diff --git a/gcc/match.pd b/gcc/match.pd
index 72860fb..334e8cc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3610,7 +3610,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(complex (convert:itype @0) (negate (convert:itype @1)))))
/* BSWAP simplifications, transforms checked by gcc.dg/builtin-bswap-8.c. */
-(for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32 BUILT_IN_BSWAP64)
+(for bswap (BUILT_IN_BSWAP16 BUILT_IN_BSWAP32
+ BUILT_IN_BSWAP64 BUILT_IN_BSWAP128)
(simplify
(bswap (bswap @0))
@0)
@@ -3620,7 +3621,70 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(for bitop (bit_xor bit_ior bit_and)
(simplify
(bswap (bitop:c (bswap @0) @1))
- (bitop @0 (bswap @1)))))
+ (bitop @0 (bswap @1))))
+ /* (bswap(x) >> C1) & C2 can sometimes be simplified to (x >> C3) & C2. */
+ (simplify
+ (bit_and (convert1? (rshift@0 (convert2? (bswap@4 @1)) INTEGER_CST@2))
+ INTEGER_CST@3)
+ (if (BITS_PER_UNIT == 8
+ && tree_fits_uhwi_p (@2)
+ && tree_fits_uhwi_p (@3))
+ (with
+ {
+ unsigned HOST_WIDE_INT prec = TYPE_PRECISION (TREE_TYPE (@4));
+ unsigned HOST_WIDE_INT bits = tree_to_uhwi (@2);
+ unsigned HOST_WIDE_INT mask = tree_to_uhwi (@3);
+ unsigned HOST_WIDE_INT lo = bits & 7;
+ unsigned HOST_WIDE_INT hi = bits - lo;
+ }
+ (if (bits < prec
+ && mask < (256u>>lo)
+ && bits < TYPE_PRECISION (TREE_TYPE(@0)))
+ (with { unsigned HOST_WIDE_INT ns = (prec - (hi + 8)) + lo; }
+ (if (ns == 0)
+ (bit_and (convert @1) @3)
+ (with
+ {
+ tree utype = unsigned_type_for (TREE_TYPE (@1));
+ tree nst = build_int_cst (integer_type_node, ns);
+ }
+ (bit_and (convert (rshift:utype (convert:utype @1) {nst;})) @3))))))))
+ /* bswap(x) >> C1 can sometimes be simplified to (T)x >> C2. */
+ (simplify
+ (rshift (convert? (bswap@2 @0)) INTEGER_CST@1)
+ (if (BITS_PER_UNIT == 8
+ && CHAR_TYPE_SIZE == 8
+ && tree_fits_uhwi_p (@1))
+ (with
+ {
+ unsigned HOST_WIDE_INT prec = TYPE_PRECISION (TREE_TYPE (@2));
+ unsigned HOST_WIDE_INT bits = tree_to_uhwi (@1);
+ }
+ (if (bits + 8 == prec)
+ (if (TYPE_UNSIGNED (type))
+ (convert (convert:unsigned_char_type_node @0))
+ (convert (convert:signed_char_type_node @0)))
+ (if (bits < prec && bits + 8 > prec)
+ (with
+ {
+ tree nst = build_int_cst (integer_type_node, bits & 7);
+ tree bt = TYPE_UNSIGNED (type) ? unsigned_char_type_node
+ : signed_char_type_node;
+ }
+ (convert (rshift:bt (convert:bt @0) {nst;}))))))))
+ /* bswap(x) & C1 can sometimes be simplified to (x >> C2) & C1. */
+ (simplify
+ (bit_and (convert? (bswap@2 @0)) INTEGER_CST@1)
+ (if (BITS_PER_UNIT == 8
+ && tree_fits_uhwi_p (@1)
+ && tree_to_uhwi (@1) < 256)
+ (with
+ {
+ unsigned HOST_WIDE_INT prec = TYPE_PRECISION (TREE_TYPE (@2));
+ tree utype = unsigned_type_for (TREE_TYPE (@0));
+ tree nst = build_int_cst (integer_type_node, prec - 8);
+ }
+ (bit_and (convert (rshift:utype (convert:utype @0) {nst;})) @1)))))
/* Combine COND_EXPRs and VEC_COND_EXPRs. */