RISC-V: Add auto-vect pattern for vector rotate shift

This patch add the vector rotate shift pattern for auto-vect. With this patch, the scalar rotate shift can be automatically vectorized into vector rotate shift. gcc/ChangeLog: * config/riscv/autovec.md (v<bitmanip_optab><mode>3): Add new define_expand pattern for vector rotate shift. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/binop/vrolr-1.c: New test. * gcc.target/riscv/rvv/autovec/binop/vrolr-run.c: New test. * gcc.target/riscv/rvv/autovec/binop/vrolr-template.h: New test.
author: Feng Wang <wangfeng@eswincomputing.com> 2024-08-17 08:40:42 -0600
committer: Jeff Law <jlaw@ventanamicro.com> 2024-08-17 08:41:19 -0600
commit: 54b228d80c54d32ab49cee6148cfd1364b2bc817 (patch)
tree: 2acff179a83c8a2ff85839590920f0dc8d26bf44 /gcc
parent: e68ab0f16072af97f0898fa0a14e72fcda442775 (diff)
download: gcc-54b228d80c54d32ab49cee6148cfd1364b2bc817.zip
gcc-54b228d80c54d32ab49cee6148cfd1364b2bc817.tar.gz
gcc-54b228d80c54d32ab49cee6148cfd1364b2bc817.tar.bz2
4 files changed, 142 insertions, 0 deletions
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 0423d7b..decfe2b 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -2764,3 +2764,19 @@
     operands[2] = const0_rtx;
   }
 )
+
+;; -------------------------------------------------------------------------
+;; - vrol.vv vror.vv
+;; -------------------------------------------------------------------------
+(define_expand "v<bitmanip_optab><mode>3"
+  [(set (match_operand:VI 0 "register_operand")
+	(bitmanip_rotate:VI
+	  (match_operand:VI 1 "register_operand")
+	  (match_operand:VI 2 "register_operand")))]
+  "TARGET_ZVBB || TARGET_ZVKB"
+  {
+    riscv_vector::emit_vlmax_insn (code_for_pred_v (<CODE>, <MODE>mode),
+				   riscv_vector::BINARY_OP, operands);
+    DONE;
+  }
+)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c
new file mode 100644
index 0000000..55dac27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-1.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include "vrolr-template.h"
+
+/* { dg-final { scan-assembler-times {\tvrol\.vv} 4 } } */
+/* { dg-final { scan-assembler-times {\tvror\.vv} 4 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c
new file mode 100644
index 0000000..b659a08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-run.c
@@ -0,0 +1,88 @@
+/* { dg-do run } */
+/* { dg-require-effective-target "riscv_zvbb_ok" } */
+/* { dg-add-options "riscv_v" } */
+/* { dg-add-options "riscv_zvbb" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+#include <assert.h>
+
+#include <stdio.h>
+#include <stdint.h>
+#include <assert.h>
+
+#define ARRAY_SIZE 512
+
+#define CIRCULAR_LEFT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+    for (int i = 0; i < size; i++) { \
+        (arr)[i] = (((arr)[i] << (shifts)[i]) | ((arr)[i] >> (bit_size - (shifts)[i]))); \
+    }
+
+#define CIRCULAR_RIGHT_SHIFT_ARRAY(arr, shifts, bit_size, size) \
+    for (int i = 0; i < size; i++) { \
+        (arr)[i] = (((arr)[i] >> (shifts)[i]) | ((arr)[i] << (bit_size - (shifts)[i]))); \
+    }
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results8(
+    uint8_t *result_left, uint8_t *result_right,
+    int bit_size, uint8_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i])));
+        assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i])));
+    }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results16(
+    uint16_t *result_left, uint16_t *result_right,
+    int bit_size, uint16_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i])));
+        assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i])));
+    }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results32(
+    uint32_t *result_left, uint32_t *result_right,
+    int bit_size, uint32_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == (i << shift_values[i]) | (i >> (bit_size - shift_values[i])));
+        assert(result_right[i] == (i >> shift_values[i]) | (i << (bit_size - shift_values[i])));
+    }
+}
+
+void __attribute__((optimize("no-tree-vectorize"))) compare_results64(
+    uint64_t *result_left, uint64_t *result_right,
+    int bit_size, uint64_t *shift_values)
+{
+    for (int i = 0; i < ARRAY_SIZE; i++) {
+        assert(result_left[i] == ((uint64_t)i << shift_values[i]) | ((uint64_t)i >> (bit_size - shift_values[i])));
+        assert(result_right[i] == ((uint64_t)i >> shift_values[i]) | ((uint64_t)i << (bit_size - shift_values[i])));
+    }
+}
+
+#define TEST_SHIFT_OPERATIONS(TYPE, bit_size) \
+    TYPE shift_val##bit_size[ARRAY_SIZE];\
+    TYPE result_left##bit_size[ARRAY_SIZE];\
+    TYPE result_right##bit_size[ARRAY_SIZE];\
+    do { \
+        for (int i = 0; i < ARRAY_SIZE; i++) { \
+	    result_left##bit_size[i] = i;\
+	    result_right##bit_size[i] = i;\
+            shift_val##bit_size[i] = i % bit_size; \
+        } \
+	CIRCULAR_LEFT_SHIFT_ARRAY(result_left##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\
+	CIRCULAR_RIGHT_SHIFT_ARRAY(result_right##bit_size, shift_val##bit_size, bit_size, ARRAY_SIZE)\
+        compare_results##bit_size(result_left##bit_size, result_right##bit_size, bit_size, shift_val##bit_size); \
+    } while(0)
+
+
+int main() {
+    TEST_SHIFT_OPERATIONS(uint8_t, 8);
+    TEST_SHIFT_OPERATIONS(uint16_t, 16);
+    TEST_SHIFT_OPERATIONS(uint32_t, 32);
+    TEST_SHIFT_OPERATIONS(uint64_t, 64);
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h
new file mode 100644
index 0000000..3db0d86
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/binop/vrolr-template.h
@@ -0,0 +1,29 @@
+#include <stdint-gcc.h>
+
+#define VROL_VV(SEW, S, T) \
+__attribute__ ((noipa))\
+void autovect_vrol_vv_##S##SEW (T *out,  T *op1, T *op2, int n){\
+    for(int i=0; i<n; i++){ \
+        op2[i] = op2[i] & (SEW-1);\
+        out[i]= (op1[i] << op2[i]) | (op1[i] >> (SEW - op2[i]));\
+    }\
+}
+
+#define VROR_VV(SEW, S, T) \
+__attribute__ ((noipa))\
+void autovect_vror_vv_##S##SEW (T *out,  T *op1, T *op2, int n){\
+    for(int i=0; i<n; i++){ \
+        op2[i] = op2[i] & (SEW-1);\
+        out[i]= (op1[i] >> op2[i]) | (op1[i] << (SEW - op2[i]));\
+    }\
+}
+
+VROL_VV(8,  u, uint8_t)
+VROL_VV(16, u, uint16_t)
+VROL_VV(32, u, uint32_t)
+VROL_VV(64, u, uint64_t)
+
+VROR_VV(8,  u, uint8_t)
+VROR_VV(16, u, uint16_t)
+VROR_VV(32, u, uint32_t)
+VROR_VV(64, u, uint64_t)
author	Feng Wang <wangfeng@eswincomputing.com>	2024-08-17 08:40:42 -0600
committer	Jeff Law <jlaw@ventanamicro.com>	2024-08-17 08:41:19 -0600
commit	54b228d80c54d32ab49cee6148cfd1364b2bc817 (patch)
tree	2acff179a83c8a2ff85839590920f0dc8d26bf44 /gcc
parent	e68ab0f16072af97f0898fa0a14e72fcda442775 (diff)
download	gcc-54b228d80c54d32ab49cee6148cfd1364b2bc817.zip gcc-54b228d80c54d32ab49cee6148cfd1364b2bc817.tar.gz gcc-54b228d80c54d32ab49cee6148cfd1364b2bc817.tar.bz2