aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--gcc/config/riscv/bitmanip.md20
-rw-r--r--gcc/config/riscv/riscv.cc67
2 files changed, 79 insertions, 8 deletions
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index bcd5302..fffd8c6 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -702,7 +702,19 @@
rtx t1 = force_reg (word_mode, operands[3]);
a0 = force_reg (word_mode, gen_rtx_XOR (word_mode, a0, a1));
a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, q_reg));
+
+ /* XXX By adjusting Q we may be able to eliminate this shift. The size
+ of this shift seems to be dependent on the size of the CRC
+ output (aka N in N-bit CRC). */
a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (16)));
+
+ /* CCC By adjusting operands[3] (which should be a constant) we may
+ be able to utilize CLMULH to get the bits in the right place and
+ avoid the shifts to extract the bitfield. If that is not possible
+ the shifts will still be needed and are dependent on input/output
+ sizes as well. Does adjusting the constant and shift counts
+ result in a constant that is more likely to bt synthesized in a
+ single instruction? */
a0 = force_reg (word_mode, gen_rtx_CLMUL (word_mode, a0, t1));
a0 = force_reg (word_mode, gen_rtx_LSHIFTRT (word_mode, a0, GEN_INT (24)));
a0 = force_reg (word_mode, gen_rtx_ASHIFT (word_mode, a0, GEN_INT (24)));
@@ -718,7 +730,13 @@
else
{
/* If we do not have the ZBC extension (ie, no clmul), then
- use a table based algorithm to implement the CRC. */
+ use a table based algorithm to implement the CRC.
+
+ XXX What is the size of each element in this table and
+ how many entries are in the table? Does the element
+ size or number of elements vary based on the input or
+ output types for the CRC function? If so, do we need
+ to restrict it to only be used for certain modes? */
expand_crc_table_based (operands, QImode);
}
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 842fe3f..3fc6b9c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -6902,19 +6902,21 @@ riscv_shamt_matches_mask_p (int shamt, HOST_WIDE_INT mask)
return shamt == ctz_hwi (mask);
}
-/* Calculate the quotient of polynomial long division of x^2n by the polynomial
+/* Return the quotient of polynomial long division of x^2n by POLYNOMIAL
in GF (2^n). */
unsigned HOST_WIDE_INT
gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
{
- vec<short> x2n, pol, q;
+ vec<bool>pol;
+ vec<short> x2n, q;
+
// Create vector of bits, for the polynomial.
pol.create (sizeof (polynomial) * BITS_PER_UNIT + 1);
size_t n = 1;
for ( ; polynomial; n++)
{
- pol.quick_push (polynomial&1);
+ pol.quick_push (polynomial & 1);
polynomial >>= 1;
}
pol.quick_push (1);
@@ -6923,6 +6925,8 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
x2n.create (2 * n - 1);
for (size_t i = 0; i < 2 * (n - 1); i++)
x2n.safe_push (0);
+
+ /* Is this the implicit bit on at the top of the poly? */
x2n.safe_push (1);
q.create (n);
@@ -6952,6 +6956,9 @@ gf2n_poly_long_div_quotient (unsigned HOST_WIDE_INT polynomial)
}
/* Calculates reciprocal CRC for initial CRC and given polynomial. */
+/* XXX Is this needed? It's not referenced anywhere.
+ If it is needed, it needs to be generalized rather than only
+ working on uint16_t. */
static uint16_t
generate_crc_reciprocal (uint16_t crc,
uint16_t polynomial)
@@ -6967,6 +6974,8 @@ generate_crc_reciprocal (uint16_t crc,
}
/* Calculates CRC for initial CRC and given polynomial. */
+/* XXX This needs to be generalized rather than only working
+ on uint16_t. */
static uint16_t
generate_crc (uint16_t crc,
uint16_t polynomial)
@@ -6983,6 +6992,19 @@ generate_crc (uint16_t crc,
}
/* Generates 16-bit CRC table. */
+/* XXX This needs to be generalized rather than only working
+ on uint16_t.
+
+ This looks like it tries to share tables which is good, but
+ don't we have to verify that the polynomial and sizes are the
+ same for sharing to be safe? Doesn't that in turn argue that
+ the polynomial and size should be encoded into the table
+ name?
+
+ Presumably the table should be going into a readonly data
+ section. It doesn't look like we make any attempt to switch
+ sections. Mixing code and data is exceedingly bad on
+ modern processors. */
rtx
generate_crc16_table (uint16_t polynom)
{
@@ -7016,27 +7038,43 @@ generate_crc16_table (uint16_t polynom)
return lab;
}
-void reflect (rtx op1, machine_mode mode)
+/* XXX Is this needed? It's not referenced anywhere. */
+void
+reflect (rtx op1, machine_mode mode)
{
// Reflect the bits
op1 = gen_rtx_BSWAP (mode, op1);
-// Adjust the position of the reflected bits
+ // Adjust the position of the reflected bits
+ /* XXX I don't understand the comment. Under what
+ conditions does mode != Pmode? */
if (mode != Pmode)
op1 = gen_rtx_SUBREG (Pmode, op1, 0);
-// Shift the reflected bits to the least significant end
+ // Shift the reflected bits to the least significant end
+ /* XXX This seems to assume we're always dealing with
+ a 16bit quantity. */
rtx shift_amt = gen_rtx_CONST_INT (Pmode, 8);
op1 = gen_rtx_LSHIFTRT (Pmode, op1, shift_amt);
+
+ /* XXX This routine is going to have no impact if it was
+ ever used. Changing OP1 above isn't reflected into
+ the caller. */
}
/* Generate table based CRC code. */
+/* XXX This doesn't seem to be used. Is it the case that we're
+ eventually going to need to distinguish between a bit-reflected
+ CRC and a normal CRC for table based variants? If so, doesn't
+ that need to be in the operands for the .CRC IFN? */
void
-expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
+expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
{
machine_mode mode = GET_MODE (operands[0]);
rtx in = force_reg (mode, gen_rtx_XOR (mode, operands[1], operands[2]));
rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
+ /* XXX Under what conditions will mode != Pmode be true? Is this an
+ artifact of having the modes wrong for the crc expander? */
if (mode != Pmode)
ix = gen_rtx_SUBREG (Pmode, ix, 0);
ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
@@ -7048,6 +7086,15 @@ expand_crc_table_based_reflected (rtx *operands, machine_mode data_mode)
rtx high = gen_rtx_LSHIFTRT (mode, operands[1],
GEN_INT (data_mode));
rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
+
+ /* XXX In general we prefer to avoid SUBREGs, especially
+ paradoxical subregs (outer mode is wider than inner mode).
+
+ It should be possible to replace a paradoxical subreg with
+ a sign or zero extension.
+
+ If this is a narrowing subreg, then gen_lowpart might be
+ better. */
riscv_emit_move (operands[0], gen_rtx_SUBREG (mode, crc, 0));
}
@@ -7060,6 +7107,8 @@ expand_crc_table_based (rtx *operands, machine_mode data_mode)
GEN_INT (8));
rtx in = force_reg (mode, gen_rtx_XOR (mode, op1, operands[2]));
rtx ix = gen_rtx_AND (mode, in, GEN_INT (GET_MODE_MASK (data_mode)));
+ /* XXX Under what conditions will mode != Pmode be true? Is this an
+ artifact of having the modes wrong for the crc expander? */
if (mode != Pmode)
ix = gen_rtx_SUBREG (Pmode, ix, 0);
ix = gen_rtx_ASHIFT (Pmode, ix, GEN_INT (exact_log2 (GET_MODE_SIZE (mode)
@@ -7072,6 +7121,10 @@ expand_crc_table_based (rtx *operands, machine_mode data_mode)
GEN_INT (8));
high = force_reg (mode, gen_rtx_AND (mode, high, GEN_INT (65535)));
rtx crc = force_reg (mode, gen_rtx_XOR (mode, tab, high));
+
+ /* Why is this different than the reflected version above? Doesn't
+ it have the same potential concers WRT mismatched modes between
+ these two objects? */
riscv_emit_move (operands[0], crc);
}