[PATCH v6 01/12] Implement internal functions for efficient CRC computation.

Add two new internal functions (IFN_CRC, IFN_CRC_REV), to provide faster CRC generation. One performs bit-forward and the other bit-reversed CRC computation. If CRC optabs are supported, they are used for the CRC computation. Otherwise, table-based CRC is generated. The supported data and CRC sizes are 8, 16, 32, and 64 bits. The polynomial is without the leading 1. A table with 256 elements is used to store precomputed CRCs. For the reflection of inputs and the output, a simple algorithm involving SHIFT, AND, and OR operations is used. gcc/ * doc/md.texi (crc@var{m}@var{n}4, crc_rev@var{m}@var{n}4): Document. * expr.cc (calculate_crc): New function. (assemble_crc_table): Likewise. (generate_crc_table): Likewise. (calculate_table_based_CRC): Likewise. (expand_crc_table_based): Likewise. (gen_common_operation_to_reflect): Likewise. (reflect_64_bit_value): Likewise. (reflect_32_bit_value): Likewise. (reflect_16_bit_value): Likewise. (reflect_8_bit_value): Likewise. (generate_reflecting_code_standard): Likewise. (expand_reversed_crc_table_based): Likewise. * expr.h (generate_reflecting_code_standard): New function declaration. (expand_crc_table_based): Likewise. (expand_reversed_crc_table_based): Likewise. * internal-fn.cc: (crc_direct): Define. (direct_crc_optab_supported_p): Likewise. (expand_crc_optab_fn): New function * internal-fn.def (CRC, CRC_REV): New internal functions. * optabs.def (crc_optab, crc_rev_optab): New optabs. Signed-off-by: Mariam Arutunian <mariamarutunian@gmail.com> Co-authored-by: Joern Rennecke <joern.rennecke@embecosm.com> Co-authored-by: Jeff Law <jlaw@ventanamicro.com>
author: Mariam Arutunian <mariamarutunian@gmail.com> 2024-11-11 12:48:34 -0700
committer: Jeff Law <jlaw@ventanamicro.com> 2024-11-28 07:34:01 -0700
commit: bb46d05ad64e4e0acb3307e76bab340aa8587d3e (patch)
tree: 3f2bbf5efd898212a2dfc0fdd2a1ed4d7a0c9f71 /gcc/expr.cc
parent: bcb764ec7c063326a17eb6213313cc9c0fd348b3 (diff)
download: gcc-bb46d05ad64e4e0acb3307e76bab340aa8587d3e.zip
gcc-bb46d05ad64e4e0acb3307e76bab340aa8587d3e.tar.gz
gcc-bb46d05ad64e4e0acb3307e76bab340aa8587d3e.tar.bz2
1 files changed, 347 insertions, 0 deletions
diff --git a/gcc/expr.cc b/gcc/expr.cc
index f493914..de25437 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -14177,3 +14177,350 @@ int_expr_size (const_tree exp)
 
   return tree_to_shwi (size);
 }
+
+/* Calculate CRC for the initial CRC and given POLYNOMIAL.
+   CRC_BITS is CRC size.  */
+
+static unsigned HOST_WIDE_INT
+calculate_crc (unsigned HOST_WIDE_INT crc,
+	       unsigned HOST_WIDE_INT polynomial,
+	       unsigned short crc_bits)
+{
+  unsigned HOST_WIDE_INT msb = HOST_WIDE_INT_1U << (crc_bits - 1);
+  crc = crc << (crc_bits - 8);
+  for (short i = 8; i > 0; --i)
+    {
+      if (crc & msb)
+	crc = (crc << 1) ^ polynomial;
+      else
+	crc <<= 1;
+    }
+  /* Zero out bits in crc beyond the specified number of crc_bits.  */
+  if (crc_bits < sizeof (crc) * CHAR_BIT)
+    crc &= (HOST_WIDE_INT_1U << crc_bits) - 1;
+  return crc;
+}
+
+/* Assemble CRC table with 256 elements for the given POLYNOM and CRC_BITS with
+   given ID.
+   ID is the identifier of the table, the name of the table is unique,
+   contains CRC size and the polynomial.
+   POLYNOM is the polynomial used to calculate the CRC table's elements.
+   CRC_BITS is the size of CRC, may be 8, 16, ... . */
+
+rtx
+assemble_crc_table (tree id, unsigned HOST_WIDE_INT polynom,
+		    unsigned short crc_bits)
+{
+  unsigned table_el_n = 0x100;
+  tree ar = build_array_type (make_unsigned_type (crc_bits),
+			      build_index_type (size_int (table_el_n - 1)));
+  tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, id, ar);
+  SET_DECL_ASSEMBLER_NAME (decl, id);
+  DECL_ARTIFICIAL (decl) = 1;
+  rtx tab = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id));
+  TREE_ASM_WRITTEN (decl) = 0;
+
+  /* Initialize the table.  */
+  vec<tree, va_gc> *initial_values;
+  vec_alloc (initial_values, table_el_n);
+  for (size_t i = 0; i < table_el_n; ++i)
+    {
+      unsigned HOST_WIDE_INT crc = calculate_crc (i, polynom, crc_bits);
+      tree element = build_int_cstu (make_unsigned_type (crc_bits), crc);
+      vec_safe_push (initial_values, element);
+    }
+  DECL_INITIAL (decl) = build_constructor_from_vec (ar, initial_values);
+
+  TREE_READONLY (decl) = 1;
+  TREE_STATIC (decl) = 1;
+
+  if (TREE_PUBLIC (id))
+    {
+      TREE_PUBLIC (decl) = 1;
+      make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+    }
+
+  mark_decl_referenced (decl);
+  varpool_node::finalize_decl (decl);
+
+  return tab;
+}
+
+/* Generate CRC lookup table by calculating CRC for all possible
+   8-bit data values.  The table is stored with a specific name in the read-only
+   static data section.
+   POLYNOM is the polynomial used to calculate the CRC table's elements.
+   CRC_BITS is the size of CRC, may be 8, 16, ... .  */
+
+rtx
+generate_crc_table (unsigned HOST_WIDE_INT polynom, unsigned short crc_bits)
+{
+  gcc_assert (crc_bits <= 64);
+
+  /* Buf size - 24 letters + 6 '_'
+     + 20 numbers (2 for crc bit size + 2 for 0x + 16 for 64-bit polynomial)
+     + 1 for \0.  */
+  char buf[51];
+  sprintf (buf, "crc_table_for_crc_%u_polynomial_" HOST_WIDE_INT_PRINT_HEX,
+	   crc_bits, polynom);
+
+  tree id = maybe_get_identifier (buf);
+  if (id)
+    return gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (id));
+
+  id = get_identifier (buf);
+  return assemble_crc_table (id, polynom, crc_bits);
+}
+
+/* Generate table-based CRC code for the given CRC, INPUT_DATA and the
+   POLYNOMIAL (without leading 1).
+
+   First, using POLYNOMIAL's value generates CRC table of 256 elements,
+   then generates the assembly for the following code,
+   where crc_bit_size and data_bit_size may be 8, 16, 32, 64, depending on CRC:
+
+     for (int i = 0; i < data_bit_size / 8; i++)
+       crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8))
+				    ^ (data >> (data_bit_size - (i + 1) * 8)
+				    & 0xFF))];
+
+   So to take values from the table, we need 8-bit data.
+   If input data size is not 8, then first we extract upper 8 bits,
+   then the other 8 bits, and so on.  */
+
+void
+calculate_table_based_CRC (rtx *crc, const rtx &input_data,
+			   const rtx &polynomial,
+			   machine_mode crc_mode, machine_mode data_mode)
+{
+  unsigned short crc_bit_size = GET_MODE_BITSIZE (crc_mode).to_constant ();
+  unsigned short data_size = GET_MODE_SIZE (data_mode).to_constant ();
+  machine_mode mode = GET_MODE (*crc);
+  rtx tab = generate_crc_table (UINTVAL (polynomial), crc_bit_size);
+
+  for (unsigned short i = 0; i < data_size; i++)
+    {
+      /* crc >> (crc_bit_size - 8).  */
+      *crc = force_reg (crc_mode, *crc);
+      rtx op1 = expand_shift (RSHIFT_EXPR, mode, *crc, crc_bit_size - 8,
+			      NULL_RTX, 1);
+
+      /* data >> (8 * (GET_MODE_SIZE (data_mode).to_constant () - i - 1)).  */
+      unsigned range_8 = 8 * (data_size - i - 1);
+      rtx data = force_reg (data_mode, input_data);
+      data = expand_shift (RSHIFT_EXPR, mode, data, range_8, NULL_RTX, 1);
+
+      /* data >> (8 * (GET_MODE_SIZE (data_mode)
+					.to_constant () - i - 1)) & 0xFF.  */
+      rtx data_final = expand_and (mode, data,
+				   gen_int_mode (255, data_mode), NULL_RTX);
+
+      /* (crc >> (crc_bit_size - 8)) ^ data_8bit.  */
+      rtx in = expand_binop (mode, xor_optab, op1, data_final,
+			     NULL_RTX, 1, OPTAB_WIDEN);
+
+      /* ((crc >> (crc_bit_size - 8)) ^ data_8bit) & 0xFF.  */
+      rtx index = expand_and (mode, in, gen_int_mode (255, mode),
+			      NULL_RTX);
+      int log_crc_size = exact_log2 (GET_MODE_SIZE (crc_mode).to_constant ());
+      index = expand_shift (LSHIFT_EXPR, mode, index,
+			    log_crc_size, NULL_RTX, 0);
+
+      rtx addr = gen_reg_rtx (Pmode);
+      convert_move (addr, index, 1);
+      addr = expand_binop (Pmode, add_optab, addr, tab, NULL_RTX,
+			    0, OPTAB_DIRECT);
+
+      /* crc_table[(crc >> (crc_bit_size - 8)) ^ data_8bit]  */
+      rtx tab_el = validize_mem (gen_rtx_MEM (crc_mode, addr));
+
+      /* (crc << 8) if CRC is larger than 8, otherwise crc = 0.  */
+      rtx high = NULL_RTX;
+      if (crc_bit_size != 8)
+	high = expand_shift (LSHIFT_EXPR, mode, *crc, 8, NULL_RTX, 0);
+      else
+	high = gen_int_mode (0, mode);
+
+      /* crc = (crc << 8)
+	       ^ crc_table[(crc >> (crc_bit_size - 8)) ^ data_8bit];  */
+      *crc = expand_binop (mode, xor_optab, tab_el, high, NULL_RTX, 1,
+			   OPTAB_WIDEN);
+    }
+}
+
+/* Generate table-based CRC code for the given CRC, INPUT_DATA and the
+   POLYNOMIAL (without leading 1).
+
+   CRC is OP1, data is OP2 and the polynomial is OP3.
+   This must generate a CRC table and an assembly for the following code,
+   where crc_bit_size and data_bit_size may be 8, 16, 32, 64:
+   uint_crc_bit_size_t
+   crc_crc_bit_size (uint_crc_bit_size_t crc_init,
+		     uint_data_bit_size_t data, size_t size)
+   {
+     uint_crc_bit_size_t crc = crc_init;
+     for (int i = 0; i < data_bit_size / 8; i++)
+       crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8))
+				    ^ (data >> (data_bit_size - (i + 1) * 8)
+				    & 0xFF))];
+     return crc;
+   }  */
+
+void
+expand_crc_table_based (rtx op0, rtx op1, rtx op2, rtx op3,
+			machine_mode data_mode)
+{
+  gcc_assert (!CONST_INT_P (op0));
+  gcc_assert (CONST_INT_P (op3));
+  machine_mode crc_mode = GET_MODE (op0);
+  rtx crc = gen_reg_rtx (crc_mode);
+  convert_move (crc, op1, 0);
+  calculate_table_based_CRC (&crc, op2, op3, crc_mode, data_mode);
+  convert_move (op0, crc, 0);
+}
+
+/* Generate the common operation for reflecting values:
+   *OP = (*OP & AND1_VALUE) << SHIFT_VAL | (*OP & AND2_VALUE) >> SHIFT_VAL;  */
+
+void
+gen_common_operation_to_reflect (rtx *op,
+				 unsigned HOST_WIDE_INT and1_value,
+				 unsigned HOST_WIDE_INT and2_value,
+				 unsigned shift_val)
+{
+  rtx op1 = expand_and (GET_MODE (*op), *op,
+			gen_int_mode (and1_value, GET_MODE (*op)), NULL_RTX);
+  op1 = expand_shift (LSHIFT_EXPR, GET_MODE (*op), op1, shift_val, op1, 0);
+  rtx op2 = expand_and (GET_MODE (*op), *op,
+			gen_int_mode (and2_value, GET_MODE (*op)), NULL_RTX);
+  op2 = expand_shift (RSHIFT_EXPR, GET_MODE (*op), op2, shift_val, op2, 1);
+  *op = expand_binop (GET_MODE (*op), ior_optab, op1,
+		      op2, *op, 0, OPTAB_LIB_WIDEN);
+}
+
+/* Reflect 64-bit value for the 64-bit target.  */
+
+void
+reflect_64_bit_value (rtx *op)
+{
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x00000000FFFFFFFF),
+				   HOST_WIDE_INT_C (0xFFFFFFFF00000000), 32);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x0000FFFF0000FFFF),
+				   HOST_WIDE_INT_C (0xFFFF0000FFFF0000), 16);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x00FF00FF00FF00FF),
+				   HOST_WIDE_INT_C (0xFF00FF00FF00FF00), 8);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x0F0F0F0F0F0F0F0F),
+				   HOST_WIDE_INT_C (0xF0F0F0F0F0F0F0F0), 4);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x3333333333333333),
+				   HOST_WIDE_INT_C (0xCCCCCCCCCCCCCCCC), 2);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x5555555555555555),
+				   HOST_WIDE_INT_C (0xAAAAAAAAAAAAAAAA), 1);
+}
+
+/* Reflect 32-bit value for the 32-bit target.  */
+
+void
+reflect_32_bit_value (rtx *op)
+{
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x0000FFFF),
+				  HOST_WIDE_INT_C (0xFFFF0000), 16);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x00FF00FF),
+				  HOST_WIDE_INT_C (0xFF00FF00), 8);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x0F0F0F0F),
+				   HOST_WIDE_INT_C (0xF0F0F0F0), 4);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x33333333),
+				   HOST_WIDE_INT_C (0xCCCCCCCC), 2);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x55555555),
+				   HOST_WIDE_INT_C (0xAAAAAAAA), 1);
+}
+
+/* Reflect 16-bit value for the 16-bit target.  */
+
+void
+reflect_16_bit_value (rtx *op)
+{
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x00FF),
+				   HOST_WIDE_INT_C (0xFF00), 8);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x0F0F),
+				   HOST_WIDE_INT_C (0xF0F0), 4);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x3333),
+				   HOST_WIDE_INT_C (0xCCCC), 2);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x5555),
+				   HOST_WIDE_INT_C (0xAAAA), 1);
+}
+
+/* Reflect 8-bit value for the 8-bit target.  */
+
+void
+reflect_8_bit_value (rtx *op)
+{
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x0F),
+				   HOST_WIDE_INT_C (0xF0), 4);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x33),
+				   HOST_WIDE_INT_C (0xCC), 2);
+  gen_common_operation_to_reflect (op, HOST_WIDE_INT_C (0x55),
+				   HOST_WIDE_INT_C (0xAA), 1);
+}
+
+/* Generate instruction sequence which reflects the value of the OP
+   using shift, and, or operations.  OP's mode may be less than word_mode.  */
+
+void
+generate_reflecting_code_standard (rtx *op)
+{
+  gcc_assert (GET_MODE_BITSIZE (GET_MODE (*op)).to_constant ()  >= 8
+	      && GET_MODE_BITSIZE (GET_MODE (*op)).to_constant () <= 64);
+
+  if (GET_MODE_BITSIZE (GET_MODE (*op)).to_constant () == 64)
+    reflect_64_bit_value (op);
+  else if (GET_MODE_BITSIZE (GET_MODE (*op)).to_constant () == 32)
+    reflect_32_bit_value (op);
+  else if (GET_MODE_BITSIZE (GET_MODE (*op)).to_constant () == 16)
+    reflect_16_bit_value (op);
+  else
+    reflect_8_bit_value (op);
+}
+
+/* Generate table-based reversed CRC code for the given CRC, INPUT_DATA and
+   the POLYNOMIAL (without leading 1).
+
+   CRC is OP1, data is OP2 and the polynomial is OP3.
+   This must generate CRC table and assembly for the following code,
+   where crc_bit_size and data_bit_size may be 8, 16, 32, 64:
+   uint_crc_bit_size_t
+   crc_crc_bit_size (uint_crc_bit_size_t crc_init,
+			   uint_data_bit_size_t data, size_t size)
+   {
+     reflect (crc_init)
+     uint_crc_bit_size_t crc = crc_init;
+     reflect (data);
+     for (int i = 0; i < data_bit_size / 8; i++)
+       crc = (crc << 8) ^ crc_table[(crc >> (crc_bit_size - 8))
+			  ^ (data >> (data_bit_size - (i + 1) * 8) & 0xFF))];
+     reflect (crc);
+     return crc;
+   }  */
+
+void
+expand_reversed_crc_table_based (rtx op0, rtx op1, rtx op2, rtx op3,
+				 machine_mode data_mode,
+				 void (*gen_reflecting_code) (rtx *op))
+{
+  gcc_assert (!CONST_INT_P (op0));
+  gcc_assert (CONST_INT_P (op3));
+  machine_mode crc_mode = GET_MODE (op0);
+
+  rtx crc = gen_reg_rtx (crc_mode);
+  convert_move (crc, op1, 0);
+  gen_reflecting_code (&crc);
+
+  rtx data = gen_reg_rtx (data_mode);
+  convert_move (data, op2, 0);
+  gen_reflecting_code (&data);
+
+  calculate_table_based_CRC (&crc, data, op3, crc_mode, data_mode);
+
+  gen_reflecting_code (&crc);
+  convert_move (op0, crc, 0);
+}
author	Mariam Arutunian <mariamarutunian@gmail.com>	2024-11-11 12:48:34 -0700
committer	Jeff Law <jlaw@ventanamicro.com>	2024-11-28 07:34:01 -0700
commit	bb46d05ad64e4e0acb3307e76bab340aa8587d3e (patch)
tree	3f2bbf5efd898212a2dfc0fdd2a1ed4d7a0c9f71 /gcc/expr.cc
parent	bcb764ec7c063326a17eb6213313cc9c0fd348b3 (diff)
download	gcc-bb46d05ad64e4e0acb3307e76bab340aa8587d3e.zip gcc-bb46d05ad64e4e0acb3307e76bab340aa8587d3e.tar.gz gcc-bb46d05ad64e4e0acb3307e76bab340aa8587d3e.tar.bz2