diff options
-rw-r--r-- | gcc/config/riscv/bitmanip.md | 20 | ||||
-rw-r--r-- | gcc/config/riscv/riscv.cc | 67 |
2 files changed, 79 insertions, 8 deletions
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index bcd5302..fffd8c6 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -702,7 +702,19 @@ rtx t1 = force_reg (word_mode, operands[3]); a0 = force_reg (word_mode, gen_rtx_XOR (word_mode, a0, a1)); a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, q_reg)); + + /* XXX By adjusting Q we may be able to eliminate this shift. The size + of this shift seems to be dependent on the size of the CRC + output (aka N in N-bit CRC). */ a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (16))); + + /* CCC By adjusting operands[3] (which should be a constant) we may + be able to utilize CLMULH to get the bits in the right place and + avoid the shifts to extract the bitfield. If that is not possible + the shifts will still be needed and are dependent on input/output + sizes as well. Does adjusting the constant and shift counts + result in a constant that is more likely to bt synthesized in a + single instruction? */ a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, t1)); a0 = force_reg (word_mode, gen_rtx_LSHIFTRT (word_mode, a0, GEN_INT (24))); a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (24))); @@ -718,7 +730,13 @@ else { /* If we do not have the ZBC extension (ie, no clmul), then - use a table based algorithm to implement the CRC. */ + use a table based algorithm to implement the CRC. + + XXX What is the size of each element in this table and + how many entries are in the table? Does the element + size or number of elements vary based on the input or + output types for the CRC function? If so, do we need + to restrict it to only be used for certain modes? */ expand_crc_table_based (operands, QImode); } diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 842fe3f..3fc6b9c 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -6902,19 +6902,21 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask) return shamt == ctz_hwi (mask); } -/* Calculate the quotient of polynomial long division of x^2n by the polynomial +/* Return the quotient of polynomial long division of x^2n by POLYNOMIAL in GF (2^n). */ unsigned HOST_WIDE_INT gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial) { - vec<short> x2n, pol, q; + vec<bool>pol; + vec<short> x2n, q; + // Create vector of bits, for the polynomial. pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1); size_t n = 1; for ( ; polynomial; n++) { - pol.quick_push (polynomial&1); + pol.quick_push (polynomial & 1); polynomial >>= 1; } pol.quick_push (1); @@ -6923,6 +6925,8 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial) x2n.create (2 * n - 1); for (size_t i = 0; i < 2 * (n - 1); i++) x2n.safe_push (0); + + /* Is this the implicit bit on at the top of the poly? */ x2n.safe_push (1); q.create (n); @@ -6952,6 +6956,9 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial) } /* Calculates reciprocal CRC for initial CRC and given polynomial. */ +/* XXX Is this needed? It's not referenced anywhere. + If it is needed, it needs to be generalized rather than only + working on uint16_t. */ static uint16_t generate_crc_reciprocal (uint16_t crc, uint16_t polynomial) @@ -6967,6 +6974,8 @@ generate_crc_reciprocal (uint16_t crc, } /* Calculates CRC for initial CRC and given polynomial. */ +/* XXX This needs to be generalized rather than only working + on uint16_t. */ static uint16_t generate_crc (uint16_t crc, uint16_t polynomial) @@ -6983,6 +6992,19 @@ generate_crc (uint16_t crc, } /* Generates 16-bit CRC table. */ +/* XXX This needs to be generalized rather than only working + on uint16_t. + + This looks like it tries to share tables which is good, but + don't we have to verify that the polynomial and sizes are the + same for sharing to be safe? Doesn't that in turn argue that + the polynomial and size should be encoded into the table + name? + + Presumably the table should be going into a readonly data + section. It doesn't look like we make any attempt to switch + sections. Mixing code and data is exceedingly bad on + modern processors. */ rtx generate_crc16_table (uint16_t polynom) { @@ -7016,27 +7038,43 @@ generate_crc16_table (uint16_t polynom) return lab; } -void reflect (rtx op1, machine_mode mode) +/* XXX Is this needed? It's not referenced anywhere. */ +void +reflect (rtx op1, machine_mode mode) { // Reflect the bits op1 = gen_rtx_BSWAP (mode, op1); -// Adjust the position of the reflected bits + // Adjust the position of the reflected bits + /* XXX I don't understand the comment. Under what + conditions does mode != Pmode? */ if (mode != Pmode) op1 = gen_rtx_SUBREG (Pmode, op1, 0); -// Shift the reflected bits to the least significant end + // Shift the reflected bits to the least significant end + /* XXX This seems to assume we're always dealing with + a 16bit quantity. */ rtx shift_amt = gen_rtx_CONST_INT (Pmode, 8); op1 = gen_rtx_LSHIFTRT (Pmode, op1, shift_amt); + + /* XXX This routine is going to have no impact if it was + ever used. Changing OP1 above isn't reflected into + the caller. */ } /* Generate table based CRC code. */ +/* XXX This doesn't seem to be used. Is it the case that we're + eventually going to need to distinguish between a bit-reflected + CRC and a normal CRC for table based variants? If so, doesn't + that need to be in the operands for the .CRC IFN? */ void -expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode) +expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode) { machine_mode mode = GET_MODE (operands[0]); rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2])); rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode))); + /* XXX Under what conditions will mode != Pmode be true? Is this an + artifact of having the modes wrong for the crc expander? */ if (mode != Pmode) ix = gen_rtx_SUBREG (Pmode, ix, 0); ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode) @@ -7048,6 +7086,15 @@ expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode) rtx high = gen_rtx_LSHIFTRT (mode, operands[1], GEN_INT (data_mode)); rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high)); + + /* XXX In general we prefer to avoid SUBREGs, especially + paradoxical subregs (outer mode is wider than inner mode). + + It should be possible to replace a paradoxical subreg with + a sign or zero extension. + + If this is a narrowing subreg, then gen_lowpart might be + better. */ riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0)); } @@ -7060,6 +7107,8 @@ expand_crc_table_based (rtx *operands, machine_mode data_mode) GEN_INT (8)); rtx in = force_reg (mode, gen_rtx_XOR (mode, op1, operands[2])); rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode))); + /* XXX Under what conditions will mode != Pmode be true? Is this an + artifact of having the modes wrong for the crc expander? */ if (mode != Pmode) ix = gen_rtx_SUBREG (Pmode, ix, 0); ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode) @@ -7072,6 +7121,10 @@ expand_crc_table_based (rtx *operands, machine_mode data_mode) GEN_INT (8)); high = force_reg (mode, gen_rtx_AND (mode, high, GEN_INT (65535))); rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high)); + + /* Why is this different than the reflected version above? Doesn't + it have the same potential concers WRT mismatched modes between + these two objects? */ riscv_emit_move (operands[0], crc); } |