diff options
author | Jakub Jelinek <jakub@redhat.com> | 2017-11-20 11:08:48 +0100 |
---|---|---|
committer | Jakub Jelinek <jakub@gcc.gnu.org> | 2017-11-20 11:08:48 +0100 |
commit | dffec8ebdb449be77bf02fe0cf59237362be991a (patch) | |
tree | 39db051e345ba306c9d06d607abcfdcb79212369 /gcc/gimple-ssa-store-merging.c | |
parent | 12b8cb2e5b236faeb012fc544b27d32ce6cedde7 (diff) | |
download | gcc-dffec8ebdb449be77bf02fe0cf59237362be991a.zip gcc-dffec8ebdb449be77bf02fe0cf59237362be991a.tar.gz gcc-dffec8ebdb449be77bf02fe0cf59237362be991a.tar.bz2 |
tree-ssa-math-opts.c (nop_stats, [...]): Moved to ...
* tree-ssa-math-opts.c (nop_stats, bswap_stats, struct symbolic_number,
BITS_PER_MARKER, MARKER_MASK, MARKER_BYTE_UNKNOWN, HEAD_MARKER, CMPNOP,
CMPXCHG, do_shift_rotate, verify_symbolic_number_p,
init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
find_bswap_or_nop_1, find_bswap_or_nop, pass_data_optimize_bswap,
class pass_optimize_bswap, bswap_replace,
pass_optimize_bswap::execute): Moved to ...
* gimple-ssa-store-merging.c: ... this file.
Include optabs-tree.h.
(nop_stats, bswap_stats, do_shift_rotate, verify_symbolic_number_p,
init_symbolic_number, find_bswap_or_nop_load, perform_symbolic_merge,
find_bswap_or_nop_1, find_bswap_or_nop, bswap_replace): Put into
anonymous namespace, remove static keywords.
(pass_optimize_bswap::gate): Test BITS_PER_UNIT == 8 here...
(pass_optimize_bswap::execute): ... rather than here. Formatting fix.
From-SVN: r254947
Diffstat (limited to 'gcc/gimple-ssa-store-merging.c')
-rw-r--r-- | gcc/gimple-ssa-store-merging.c | 1082 |
1 files changed, 1078 insertions, 4 deletions
diff --git a/gcc/gimple-ssa-store-merging.c b/gcc/gimple-ssa-store-merging.c index b8920d9..b84f892 100644 --- a/gcc/gimple-ssa-store-merging.c +++ b/gcc/gimple-ssa-store-merging.c @@ -1,5 +1,5 @@ -/* GIMPLE store merging pass. - Copyright (C) 2016-2017 Free Software Foundation, Inc. +/* GIMPLE store merging and byte swapping passes. + Copyright (C) 2009-2017 Free Software Foundation, Inc. Contributed by ARM Ltd. This file is part of GCC. @@ -18,8 +18,8 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ -/* The purpose of this pass is to combine multiple memory stores of - constant values, values loaded from memory or bitwise operations +/* The purpose of the store merging pass is to combine multiple memory + stores of constant values, values loaded from memory or bitwise operations on those to consecutive memory locations into fewer wider stores. For example, if we have a sequence peforming four byte stores to consecutive memory locations: @@ -157,6 +157,7 @@ #include "gimplify-me.h" #include "rtl.h" #include "expr.h" /* For get_bit_range. */ +#include "optabs-tree.h" #include "selftest.h" /* The maximum size (in bits) of the stores this pass should generate. */ @@ -169,6 +170,1079 @@ namespace { +struct +{ + /* Number of hand-written 16-bit nop / bswaps found. */ + int found_16bit; + + /* Number of hand-written 32-bit nop / bswaps found. */ + int found_32bit; + + /* Number of hand-written 64-bit nop / bswaps found. */ + int found_64bit; +} nop_stats, bswap_stats; + +/* A symbolic number structure is used to detect byte permutation and selection + patterns of a source. To achieve that, its field N contains an artificial + number consisting of BITS_PER_MARKER sized markers tracking where does each + byte come from in the source: + + 0 - target byte has the value 0 + FF - target byte has an unknown value (eg. due to sign extension) + 1..size - marker value is the byte index in the source (0 for lsb). + + To detect permutations on memory sources (arrays and structures), a symbolic + number is also associated: + - a base address BASE_ADDR and an OFFSET giving the address of the source; + - a range which gives the difference between the highest and lowest accessed + memory location to make such a symbolic number; + - the address SRC of the source element of lowest address as a convenience + to easily get BASE_ADDR + offset + lowest bytepos; + - number of expressions N_OPS bitwise ored together to represent + approximate cost of the computation. + + Note 1: the range is different from size as size reflects the size of the + type of the current expression. For instance, for an array char a[], + (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while + (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this + time a range of 1. + + Note 2: for non-memory sources, range holds the same value as size. + + Note 3: SRC points to the SSA_NAME in case of non-memory source. */ + +struct symbolic_number { + uint64_t n; + tree type; + tree base_addr; + tree offset; + HOST_WIDE_INT bytepos; + tree src; + tree alias_set; + tree vuse; + unsigned HOST_WIDE_INT range; + int n_ops; +}; + +#define BITS_PER_MARKER 8 +#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1) +#define MARKER_BYTE_UNKNOWN MARKER_MASK +#define HEAD_MARKER(n, size) \ + ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER))) + +/* The number which the find_bswap_or_nop_1 result should match in + order to have a nop. The number is masked according to the size of + the symbolic number before using it. */ +#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \ + (uint64_t)0x08070605 << 32 | 0x04030201) + +/* The number which the find_bswap_or_nop_1 result should match in + order to have a byte swap. The number is masked according to the + size of the symbolic number before using it. */ +#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \ + (uint64_t)0x01020304 << 32 | 0x05060708) + +/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic + number N. Return false if the requested operation is not permitted + on a symbolic number. */ + +inline bool +do_shift_rotate (enum tree_code code, + struct symbolic_number *n, + int count) +{ + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + unsigned head_marker; + + if (count % BITS_PER_UNIT != 0) + return false; + count = (count / BITS_PER_UNIT) * BITS_PER_MARKER; + + /* Zero out the extra bits of N in order to avoid them being shifted + into the significant bits. */ + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + + switch (code) + { + case LSHIFT_EXPR: + n->n <<= count; + break; + case RSHIFT_EXPR: + head_marker = HEAD_MARKER (n->n, size); + n->n >>= count; + /* Arithmetic shift of signed type: result is dependent on the value. */ + if (!TYPE_UNSIGNED (n->type) && head_marker) + for (i = 0; i < count / BITS_PER_MARKER; i++) + n->n |= (uint64_t) MARKER_BYTE_UNKNOWN + << ((size - 1 - i) * BITS_PER_MARKER); + break; + case LROTATE_EXPR: + n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count)); + break; + case RROTATE_EXPR: + n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count)); + break; + default: + return false; + } + /* Zero unused bits for size. */ + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + return true; +} + +/* Perform sanity checking for the symbolic number N and the gimple + statement STMT. */ + +inline bool +verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt) +{ + tree lhs_type; + + lhs_type = gimple_expr_type (stmt); + + if (TREE_CODE (lhs_type) != INTEGER_TYPE) + return false; + + if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type)) + return false; + + return true; +} + +/* Initialize the symbolic number N for the bswap pass from the base element + SRC manipulated by the bitwise OR expression. */ + +bool +init_symbolic_number (struct symbolic_number *n, tree src) +{ + int size; + + if (! INTEGRAL_TYPE_P (TREE_TYPE (src))) + return false; + + n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE; + n->src = src; + + /* Set up the symbolic number N by setting each byte to a value between 1 and + the byte size of rhs1. The highest order byte is set to n->size and the + lowest order byte to 1. */ + n->type = TREE_TYPE (src); + size = TYPE_PRECISION (n->type); + if (size % BITS_PER_UNIT != 0) + return false; + size /= BITS_PER_UNIT; + if (size > 64 / BITS_PER_MARKER) + return false; + n->range = size; + n->n = CMPNOP; + n->n_ops = 1; + + if (size < 64 / BITS_PER_MARKER) + n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1; + + return true; +} + +/* Check if STMT might be a byte swap or a nop from a memory source and returns + the answer. If so, REF is that memory source and the base of the memory area + accessed and the offset of the access from that base are recorded in N. */ + +bool +find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n) +{ + /* Leaf node is an array or component ref. Memorize its base and + offset from base to compare to other such leaf node. */ + HOST_WIDE_INT bitsize, bitpos; + machine_mode mode; + int unsignedp, reversep, volatilep; + tree offset, base_addr; + + /* Not prepared to handle PDP endian. */ + if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) + return false; + + if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt)) + return false; + + base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode, + &unsignedp, &reversep, &volatilep); + + if (TREE_CODE (base_addr) == MEM_REF) + { + offset_int bit_offset = 0; + tree off = TREE_OPERAND (base_addr, 1); + + if (!integer_zerop (off)) + { + offset_int boff, coff = mem_ref_offset (base_addr); + boff = coff << LOG2_BITS_PER_UNIT; + bit_offset += boff; + } + + base_addr = TREE_OPERAND (base_addr, 0); + + /* Avoid returning a negative bitpos as this may wreak havoc later. */ + if (wi::neg_p (bit_offset)) + { + offset_int mask = wi::mask <offset_int> (LOG2_BITS_PER_UNIT, false); + offset_int tem = wi::bit_and_not (bit_offset, mask); + /* TEM is the bitpos rounded to BITS_PER_UNIT towards -Inf. + Subtract it to BIT_OFFSET and add it (scaled) to OFFSET. */ + bit_offset -= tem; + tem >>= LOG2_BITS_PER_UNIT; + if (offset) + offset = size_binop (PLUS_EXPR, offset, + wide_int_to_tree (sizetype, tem)); + else + offset = wide_int_to_tree (sizetype, tem); + } + + bitpos += bit_offset.to_shwi (); + } + + if (bitpos % BITS_PER_UNIT) + return false; + if (bitsize % BITS_PER_UNIT) + return false; + if (reversep) + return false; + + if (!init_symbolic_number (n, ref)) + return false; + n->base_addr = base_addr; + n->offset = offset; + n->bytepos = bitpos / BITS_PER_UNIT; + n->alias_set = reference_alias_ptr_type (ref); + n->vuse = gimple_vuse (stmt); + return true; +} + +/* Compute the symbolic number N representing the result of a bitwise OR on 2 + symbolic number N1 and N2 whose source statements are respectively + SOURCE_STMT1 and SOURCE_STMT2. */ + +gimple * +perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1, + gimple *source_stmt2, struct symbolic_number *n2, + struct symbolic_number *n) +{ + int i, size; + uint64_t mask; + gimple *source_stmt; + struct symbolic_number *n_start; + + tree rhs1 = gimple_assign_rhs1 (source_stmt1); + if (TREE_CODE (rhs1) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) + rhs1 = TREE_OPERAND (rhs1, 0); + tree rhs2 = gimple_assign_rhs1 (source_stmt2); + if (TREE_CODE (rhs2) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME) + rhs2 = TREE_OPERAND (rhs2, 0); + + /* Sources are different, cancel bswap if they are not memory location with + the same base (array, structure, ...). */ + if (rhs1 != rhs2) + { + uint64_t inc; + HOST_WIDE_INT start_sub, end_sub, end1, end2, end; + struct symbolic_number *toinc_n_ptr, *n_end; + basic_block bb1, bb2; + + if (!n1->base_addr || !n2->base_addr + || !operand_equal_p (n1->base_addr, n2->base_addr, 0)) + return NULL; + + if (!n1->offset != !n2->offset + || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0))) + return NULL; + + if (n1->bytepos < n2->bytepos) + { + n_start = n1; + start_sub = n2->bytepos - n1->bytepos; + } + else + { + n_start = n2; + start_sub = n1->bytepos - n2->bytepos; + } + + bb1 = gimple_bb (source_stmt1); + bb2 = gimple_bb (source_stmt2); + if (dominated_by_p (CDI_DOMINATORS, bb1, bb2)) + source_stmt = source_stmt1; + else + source_stmt = source_stmt2; + + /* Find the highest address at which a load is performed and + compute related info. */ + end1 = n1->bytepos + (n1->range - 1); + end2 = n2->bytepos + (n2->range - 1); + if (end1 < end2) + { + end = end2; + end_sub = end2 - end1; + } + else + { + end = end1; + end_sub = end1 - end2; + } + n_end = (end2 > end1) ? n2 : n1; + + /* Find symbolic number whose lsb is the most significant. */ + if (BYTES_BIG_ENDIAN) + toinc_n_ptr = (n_end == n1) ? n2 : n1; + else + toinc_n_ptr = (n_start == n1) ? n2 : n1; + + n->range = end - n_start->bytepos + 1; + + /* Check that the range of memory covered can be represented by + a symbolic number. */ + if (n->range > 64 / BITS_PER_MARKER) + return NULL; + + /* Reinterpret byte marks in symbolic number holding the value of + bigger weight according to target endianness. */ + inc = BYTES_BIG_ENDIAN ? end_sub : start_sub; + size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT; + for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER) + { + unsigned marker + = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK; + if (marker && marker != MARKER_BYTE_UNKNOWN) + toinc_n_ptr->n += inc; + } + } + else + { + n->range = n1->range; + n_start = n1; + source_stmt = source_stmt1; + } + + if (!n1->alias_set + || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set)) + n->alias_set = n1->alias_set; + else + n->alias_set = ptr_type_node; + n->vuse = n_start->vuse; + n->base_addr = n_start->base_addr; + n->offset = n_start->offset; + n->src = n_start->src; + n->bytepos = n_start->bytepos; + n->type = n_start->type; + size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + + for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER) + { + uint64_t masked1, masked2; + + masked1 = n1->n & mask; + masked2 = n2->n & mask; + if (masked1 && masked2 && masked1 != masked2) + return NULL; + } + n->n = n1->n | n2->n; + n->n_ops = n1->n_ops + n2->n_ops; + + return source_stmt; +} + +/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform + the operation given by the rhs of STMT on the result. If the operation + could successfully be executed the function returns a gimple stmt whose + rhs's first tree is the expression of the source operand and NULL + otherwise. */ + +gimple * +find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit) +{ + enum tree_code code; + tree rhs1, rhs2 = NULL; + gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1; + enum gimple_rhs_class rhs_class; + + if (!limit || !is_gimple_assign (stmt)) + return NULL; + + rhs1 = gimple_assign_rhs1 (stmt); + + if (find_bswap_or_nop_load (stmt, rhs1, n)) + return stmt; + + /* Handle BIT_FIELD_REF. */ + if (TREE_CODE (rhs1) == BIT_FIELD_REF + && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME) + { + unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1)); + unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2)); + if (bitpos % BITS_PER_UNIT == 0 + && bitsize % BITS_PER_UNIT == 0 + && init_symbolic_number (n, TREE_OPERAND (rhs1, 0))) + { + /* Handle big-endian bit numbering in BIT_FIELD_REF. */ + if (BYTES_BIG_ENDIAN) + bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize; + + /* Shift. */ + if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos)) + return NULL; + + /* Mask. */ + uint64_t mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; + for (unsigned i = 0; i < bitsize / BITS_PER_UNIT; + i++, tmp <<= BITS_PER_UNIT) + mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); + n->n &= mask; + + /* Convert. */ + n->type = TREE_TYPE (rhs1); + if (!n->base_addr) + n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + + return verify_symbolic_number_p (n, stmt) ? stmt : NULL; + } + + return NULL; + } + + if (TREE_CODE (rhs1) != SSA_NAME) + return NULL; + + code = gimple_assign_rhs_code (stmt); + rhs_class = gimple_assign_rhs_class (stmt); + rhs1_stmt = SSA_NAME_DEF_STMT (rhs1); + + if (rhs_class == GIMPLE_BINARY_RHS) + rhs2 = gimple_assign_rhs2 (stmt); + + /* Handle unary rhs and binary rhs with integer constants as second + operand. */ + + if (rhs_class == GIMPLE_UNARY_RHS + || (rhs_class == GIMPLE_BINARY_RHS + && TREE_CODE (rhs2) == INTEGER_CST)) + { + if (code != BIT_AND_EXPR + && code != LSHIFT_EXPR + && code != RSHIFT_EXPR + && code != LROTATE_EXPR + && code != RROTATE_EXPR + && !CONVERT_EXPR_CODE_P (code)) + return NULL; + + source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1); + + /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and + we have to initialize the symbolic number. */ + if (!source_stmt1) + { + if (gimple_assign_load_p (stmt) + || !init_symbolic_number (n, rhs1)) + return NULL; + source_stmt1 = stmt; + } + + switch (code) + { + case BIT_AND_EXPR: + { + int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + uint64_t val = int_cst_value (rhs2), mask = 0; + uint64_t tmp = (1 << BITS_PER_UNIT) - 1; + + /* Only constants masking full bytes are allowed. */ + for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT) + if ((val & tmp) != 0 && (val & tmp) != tmp) + return NULL; + else if (val & tmp) + mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER); + + n->n &= mask; + } + break; + case LSHIFT_EXPR: + case RSHIFT_EXPR: + case LROTATE_EXPR: + case RROTATE_EXPR: + if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2))) + return NULL; + break; + CASE_CONVERT: + { + int i, type_size, old_type_size; + tree type; + + type = gimple_expr_type (stmt); + type_size = TYPE_PRECISION (type); + if (type_size % BITS_PER_UNIT != 0) + return NULL; + type_size /= BITS_PER_UNIT; + if (type_size > 64 / BITS_PER_MARKER) + return NULL; + + /* Sign extension: result is dependent on the value. */ + old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT; + if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size + && HEAD_MARKER (n->n, old_type_size)) + for (i = 0; i < type_size - old_type_size; i++) + n->n |= (uint64_t) MARKER_BYTE_UNKNOWN + << ((type_size - 1 - i) * BITS_PER_MARKER); + + if (type_size < 64 / BITS_PER_MARKER) + { + /* If STMT casts to a smaller type mask out the bits not + belonging to the target type. */ + n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1; + } + n->type = type; + if (!n->base_addr) + n->range = type_size; + } + break; + default: + return NULL; + }; + return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL; + } + + /* Handle binary rhs. */ + + if (rhs_class == GIMPLE_BINARY_RHS) + { + struct symbolic_number n1, n2; + gimple *source_stmt, *source_stmt2; + + if (code != BIT_IOR_EXPR) + return NULL; + + if (TREE_CODE (rhs2) != SSA_NAME) + return NULL; + + rhs2_stmt = SSA_NAME_DEF_STMT (rhs2); + + switch (code) + { + case BIT_IOR_EXPR: + source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1); + + if (!source_stmt1) + return NULL; + + source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1); + + if (!source_stmt2) + return NULL; + + if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type)) + return NULL; + + if (!n1.vuse != !n2.vuse + || (n1.vuse && !operand_equal_p (n1.vuse, n2.vuse, 0))) + return NULL; + + source_stmt + = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n); + + if (!source_stmt) + return NULL; + + if (!verify_symbolic_number_p (n, stmt)) + return NULL; + + break; + default: + return NULL; + } + return source_stmt; + } + return NULL; +} + +/* Check if STMT completes a bswap implementation or a read in a given + endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP + accordingly. It also sets N to represent the kind of operations + performed: size of the resulting expression and whether it works on + a memory source, and if so alias-set and vuse. At last, the + function returns a stmt whose rhs's first tree is the source + expression. */ + +gimple * +find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap) +{ + unsigned rsize; + uint64_t tmpn, mask; +/* The number which the find_bswap_or_nop_1 result should match in order + to have a full byte swap. The number is shifted to the right + according to the size of the symbolic number before using it. */ + uint64_t cmpxchg = CMPXCHG; + uint64_t cmpnop = CMPNOP; + + gimple *ins_stmt; + int limit; + + /* The last parameter determines the depth search limit. It usually + correlates directly to the number n of bytes to be touched. We + increase that number by log2(n) + 1 here in order to also + cover signed -> unsigned conversions of the src operand as can be seen + in libgcc, and for initial shift/and operation of the src operand. */ + limit = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (gimple_expr_type (stmt))); + limit += 1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit); + ins_stmt = find_bswap_or_nop_1 (stmt, n, limit); + + if (!ins_stmt) + return NULL; + + /* Find real size of result (highest non-zero byte). */ + if (n->base_addr) + for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++); + else + rsize = n->range; + + /* Zero out the bits corresponding to untouched bytes in original gimple + expression. */ + if (n->range < (int) sizeof (int64_t)) + { + mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1; + cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER; + cmpnop &= mask; + } + + /* Zero out the bits corresponding to unused bytes in the result of the + gimple expression. */ + if (rsize < n->range) + { + if (BYTES_BIG_ENDIAN) + { + mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; + cmpxchg &= mask; + cmpnop >>= (n->range - rsize) * BITS_PER_MARKER; + } + else + { + mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1; + cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER; + cmpnop &= mask; + } + n->range = rsize; + } + + /* A complete byte swap should make the symbolic number to start with + the largest digit in the highest order byte. Unchanged symbolic + number indicates a read with same endianness as target architecture. */ + if (n->n == cmpnop) + *bswap = false; + else if (n->n == cmpxchg) + *bswap = true; + else + return NULL; + + /* Useless bit manipulation performed by code. */ + if (!n->base_addr && n->n == cmpnop && n->n_ops == 1) + return NULL; + + n->range *= BITS_PER_UNIT; + return ins_stmt; +} + +const pass_data pass_data_optimize_bswap = +{ + GIMPLE_PASS, /* type */ + "bswap", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_NONE, /* tv_id */ + PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_optimize_bswap : public gimple_opt_pass +{ +public: + pass_optimize_bswap (gcc::context *ctxt) + : gimple_opt_pass (pass_data_optimize_bswap, ctxt) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8; + } + + virtual unsigned int execute (function *); + +}; // class pass_optimize_bswap + +/* Perform the bswap optimization: replace the expression computed in the rhs + of CUR_STMT by an equivalent bswap, load or load + bswap expression. + Which of these alternatives replace the rhs is given by N->base_addr (non + null if a load is needed) and BSWAP. The type, VUSE and set-alias of the + load to perform are also given in N while the builtin bswap invoke is given + in FNDEL. Finally, if a load is involved, SRC_STMT refers to one of the + load statements involved to construct the rhs in CUR_STMT and N->range gives + the size of the rhs expression for maintaining some statistics. + + Note that if the replacement involve a load, CUR_STMT is moved just after + SRC_STMT to do the load with the same VUSE which can lead to CUR_STMT + changing of basic block. */ + +bool +bswap_replace (gimple *cur_stmt, gimple *ins_stmt, tree fndecl, + tree bswap_type, tree load_type, struct symbolic_number *n, + bool bswap) +{ + gimple_stmt_iterator gsi; + tree src, tmp, tgt; + gimple *bswap_stmt; + + gsi = gsi_for_stmt (cur_stmt); + src = n->src; + tgt = gimple_assign_lhs (cur_stmt); + + /* Need to load the value from memory first. */ + if (n->base_addr) + { + gimple_stmt_iterator gsi_ins = gsi_for_stmt (ins_stmt); + tree addr_expr, addr_tmp, val_expr, val_tmp; + tree load_offset_ptr, aligned_load_type; + gimple *addr_stmt, *load_stmt; + unsigned align; + HOST_WIDE_INT load_offset = 0; + basic_block ins_bb, cur_bb; + + ins_bb = gimple_bb (ins_stmt); + cur_bb = gimple_bb (cur_stmt); + if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb)) + return false; + + align = get_object_alignment (src); + + /* Move cur_stmt just before one of the load of the original + to ensure it has the same VUSE. See PR61517 for what could + go wrong. */ + if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt)) + reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt)); + gsi_move_before (&gsi, &gsi_ins); + gsi = gsi_for_stmt (cur_stmt); + + /* Compute address to load from and cast according to the size + of the load. */ + addr_expr = build_fold_addr_expr (unshare_expr (src)); + if (is_gimple_mem_ref_addr (addr_expr)) + addr_tmp = addr_expr; + else + { + addr_tmp = make_temp_ssa_name (TREE_TYPE (addr_expr), NULL, + "load_src"); + addr_stmt = gimple_build_assign (addr_tmp, addr_expr); + gsi_insert_before (&gsi, addr_stmt, GSI_SAME_STMT); + } + + /* Perform the load. */ + aligned_load_type = load_type; + if (align < TYPE_ALIGN (load_type)) + aligned_load_type = build_aligned_type (load_type, align); + load_offset_ptr = build_int_cst (n->alias_set, load_offset); + val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp, + load_offset_ptr); + + if (!bswap) + { + if (n->range == 16) + nop_stats.found_16bit++; + else if (n->range == 32) + nop_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + nop_stats.found_64bit++; + } + + /* Convert the result of load if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tgt), load_type)) + { + val_tmp = make_temp_ssa_name (aligned_load_type, NULL, + "load_dst"); + load_stmt = gimple_build_assign (val_tmp, val_expr); + gimple_set_vuse (load_stmt, n->vuse); + gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); + gimple_assign_set_rhs_with_ops (&gsi, NOP_EXPR, val_tmp); + } + else + { + gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr); + gimple_set_vuse (cur_stmt, n->vuse); + } + update_stmt (cur_stmt); + + if (dump_file) + { + fprintf (dump_file, + "%d bit load in target endianness found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + return true; + } + else + { + val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst"); + load_stmt = gimple_build_assign (val_tmp, val_expr); + gimple_set_vuse (load_stmt, n->vuse); + gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT); + } + src = val_tmp; + } + else if (!bswap) + { + gimple *g; + if (!useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src))) + { + if (!is_gimple_val (src)) + return false; + g = gimple_build_assign (tgt, NOP_EXPR, src); + } + else + g = gimple_build_assign (tgt, src); + if (n->range == 16) + nop_stats.found_16bit++; + else if (n->range == 32) + nop_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + nop_stats.found_64bit++; + } + if (dump_file) + { + fprintf (dump_file, + "%d bit reshuffle in target endianness found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + gsi_replace (&gsi, g, true); + return true; + } + else if (TREE_CODE (src) == BIT_FIELD_REF) + src = TREE_OPERAND (src, 0); + + if (n->range == 16) + bswap_stats.found_16bit++; + else if (n->range == 32) + bswap_stats.found_32bit++; + else + { + gcc_assert (n->range == 64); + bswap_stats.found_64bit++; + } + + tmp = src; + + /* Convert the src expression if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type)) + { + gimple *convert_stmt; + + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc"); + convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src); + gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT); + } + + /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values + are considered as rotation of 2N bit values by N bits is generally not + equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which + gives 0x03040102 while a bswap for that value is 0x04030201. */ + if (bswap && n->range == 16) + { + tree count = build_int_cst (NULL, BITS_PER_UNIT); + src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count); + bswap_stmt = gimple_build_assign (NULL, src); + } + else + bswap_stmt = gimple_build_call (fndecl, 1, tmp); + + tmp = tgt; + + /* Convert the result if necessary. */ + if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type)) + { + gimple *convert_stmt; + + tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst"); + convert_stmt = gimple_build_assign (tgt, NOP_EXPR, tmp); + gsi_insert_after (&gsi, convert_stmt, GSI_SAME_STMT); + } + + gimple_set_lhs (bswap_stmt, tmp); + + if (dump_file) + { + fprintf (dump_file, "%d bit bswap implementation found at: ", + (int) n->range); + print_gimple_stmt (dump_file, cur_stmt, 0); + } + + gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT); + gsi_remove (&gsi, true); + return true; +} + +/* Find manual byte swap implementations as well as load in a given + endianness. Byte swaps are turned into a bswap builtin invokation + while endian loads are converted to bswap builtin invokation or + simple load according to the target endianness. */ + +unsigned int +pass_optimize_bswap::execute (function *fun) +{ + basic_block bb; + bool bswap32_p, bswap64_p; + bool changed = false; + tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE; + + bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32) + && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing); + bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64) + && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing + || (bswap32_p && word_mode == SImode))); + + /* Determine the argument type of the builtins. The code later on + assumes that the return and argument type are the same. */ + if (bswap32_p) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); + bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + + if (bswap64_p) + { + tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); + bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl))); + } + + memset (&nop_stats, 0, sizeof (nop_stats)); + memset (&bswap_stats, 0, sizeof (bswap_stats)); + calculate_dominance_info (CDI_DOMINATORS); + + FOR_EACH_BB_FN (bb, fun) + { + gimple_stmt_iterator gsi; + + /* We do a reverse scan for bswap patterns to make sure we get the + widest match. As bswap pattern matching doesn't handle previously + inserted smaller bswap replacements as sub-patterns, the wider + variant wouldn't be detected. */ + for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);) + { + gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi); + tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type; + enum tree_code code; + struct symbolic_number n; + bool bswap; + + /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt + might be moved to a different basic block by bswap_replace and gsi + must not points to it if that's the case. Moving the gsi_prev + there make sure that gsi points to the statement previous to + cur_stmt while still making sure that all statements are + considered in this basic block. */ + gsi_prev (&gsi); + + if (!is_gimple_assign (cur_stmt)) + continue; + + code = gimple_assign_rhs_code (cur_stmt); + switch (code) + { + case LROTATE_EXPR: + case RROTATE_EXPR: + if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt)) + || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt)) + % BITS_PER_UNIT) + continue; + /* Fall through. */ + case BIT_IOR_EXPR: + break; + default: + continue; + } + + ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap); + + if (!ins_stmt) + continue; + + switch (n.range) + { + case 16: + /* Already in canonical form, nothing to do. */ + if (code == LROTATE_EXPR || code == RROTATE_EXPR) + continue; + load_type = bswap_type = uint16_type_node; + break; + case 32: + load_type = uint32_type_node; + if (bswap32_p) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32); + bswap_type = bswap32_type; + } + break; + case 64: + load_type = uint64_type_node; + if (bswap64_p) + { + fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64); + bswap_type = bswap64_type; + } + break; + default: + continue; + } + + if (bswap && !fndecl && n.range != 16) + continue; + + if (bswap_replace (cur_stmt, ins_stmt, fndecl, bswap_type, load_type, + &n, bswap)) + changed = true; + } + } + + statistics_counter_event (fun, "16-bit nop implementations found", + nop_stats.found_16bit); + statistics_counter_event (fun, "32-bit nop implementations found", + nop_stats.found_32bit); + statistics_counter_event (fun, "64-bit nop implementations found", + nop_stats.found_64bit); + statistics_counter_event (fun, "16-bit bswap implementations found", + bswap_stats.found_16bit); + statistics_counter_event (fun, "32-bit bswap implementations found", + bswap_stats.found_32bit); + statistics_counter_event (fun, "64-bit bswap implementations found", + bswap_stats.found_64bit); + + return (changed ? TODO_update_ssa : 0); +} + +} // anon namespace + +gimple_opt_pass * +make_pass_optimize_bswap (gcc::context *ctxt) +{ + return new pass_optimize_bswap (ctxt); +} + +namespace { + /* Struct recording one operand for the store, which is either a constant, then VAL represents the constant and all the other fields are zero, or a memory load, then VAL represents the reference, BASE_ADDR is non-NULL |