aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorLyut Nersisyan <lyut.nersisyan@gmail.com>2024-05-28 09:17:50 -0600
committerJeff Law <jlaw@ventanamicro.com>2024-05-28 09:22:28 -0600
commit236116068151bbc72aaaf53d0f223fe06f7e3bac (patch)
tree19b32ff6bb9f14fac7cb7b5645d5b16f1a85b670 /gcc
parenta3aeff4ce95bd616a2108dc2363d9cbaba53b170 (diff)
downloadgcc-236116068151bbc72aaaf53d0f223fe06f7e3bac.zip
gcc-236116068151bbc72aaaf53d0f223fe06f7e3bac.tar.gz
gcc-236116068151bbc72aaaf53d0f223fe06f7e3bac.tar.bz2
[to-be-committed] [RISC-V] Some basic patterns for zbkb code generation
And here's Lyut's basic Zbkb support. Essentially it's four new patterns for packh, packw, pack plus a bridge pattern needed for packh. packw is a bit ugly as we need to match a sign extension in an inconvenient location. We pull it out so that the extension is exposed in a convenient place for subsequent sign extension elimination. We need a bridge pattern to get packh. Thankfully the bridge pattern is a degenerate packh where one operand is x0, so it works as-is without splitting and provides the bridge to the more general form of packh. This patch also refines the condition for the constant reassociation patch to avoid a few more cases than can be handled efficiently with other preexisting patterns and one bugfix to avoid losing bits, particularly in the xor/ior case. Lyut did the core work here. I think I did some minor cleanups and the bridge pattern to work with gcc-15 and beyond. This is a prerequisite for using zbkb in constant synthesis. It also stands on its own. I know we've seen it trigger in spec without the constant synthesis bits. It's been through our internal CI and my tester. I'll obviously wait for the upstream CI to finish before taking further action. gcc/ * config/riscv/crypto.md: Add new combiner patterns to generate pack, packh, packw instrutions. * config/riscv/iterators.md (HX): New iterator for half X mode. * config/riscv/riscv.md (<optab>_shift_reverse<X:mode>): Tighten cases to avoid. Do not lose bits for XOR/IOR. gcc/testsuite * gcc.target/riscv/pack32.c: New test. * gcc.target/riscv/pack64.c: New test. * gcc.target/riscv/packh32.c: New test. * gcc.target/riscv/packh64.c: New test. * gcc.target/riscv/packw.c: New test. Co-authored-by: Jeffrey A Law <jlaw@ventanamicro.com>
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/crypto.md63
-rw-r--r--gcc/config/riscv/iterators.md3
-rw-r--r--gcc/config/riscv/riscv.md9
-rw-r--r--gcc/testsuite/gcc.target/riscv/pack32.c18
-rw-r--r--gcc/testsuite/gcc.target/riscv/pack64.c17
-rw-r--r--gcc/testsuite/gcc.target/riscv/packh32.c13
-rw-r--r--gcc/testsuite/gcc.target/riscv/packh64.c6
-rw-r--r--gcc/testsuite/gcc.target/riscv/packw.c13
8 files changed, 139 insertions, 3 deletions
diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index dd2bc94..b632312 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -104,6 +104,19 @@
"pack\t%0,%1,%2"
[(set_attr "type" "crypto")])
+;; This is slightly more complex than the other pack patterns
+;; that fully expose the RTL as it needs to self-adjust to
+;; rv32 and rv64. But it's not that hard.
+(define_insn "*riscv_xpack_<X:mode>_2"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (ior:X (ashift:X (match_operand:X 1 "register_operand" "r")
+ (match_operand 2 "immediate_operand" "n"))
+ (zero_extend:X
+ (match_operand:HX 3 "register_operand" "r"))))]
+ "TARGET_ZBKB && INTVAL (operands[2]) == BITS_PER_WORD / 2"
+ "pack\t%0,%3,%1"
+ [(set_attr "type" "crypto")])
+
(define_insn "riscv_packh_<mode>"
[(set (match_operand:X 0 "register_operand" "=r")
(unspec:X [(match_operand:QI 1 "register_operand" "r")
@@ -113,6 +126,29 @@
"packh\t%0,%1,%2"
[(set_attr "type" "crypto")])
+;; So this is both a useful pattern unto itself and a bridge to the
+;; general packh pattern below.
+(define_insn "*riscv_packh_<mode>_2"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (and:X (ashift:X (match_operand:X 1 "register_operand" "r")
+ (const_int 8))
+ (const_int 65280)))]
+ "TARGET_ZBKB"
+ "packh\t%0,x0,%1"
+ [(set_attr "type" "crypto")])
+
+;; While the two operands of the IOR could be swapped, this appears
+;; to be the canonical form. The other form doesn't seem to trigger.
+(define_insn "*riscv_packh_<mode>_3"
+ [(set (match_operand:X 0 "register_operand" "=r")
+ (ior:X (and:X (ashift:X (match_operand:X 1 "register_operand" "r")
+ (const_int 8))
+ (const_int 65280))
+ (zero_extend:X (match_operand:QI 2 "register_operand" "r"))))]
+ "TARGET_ZBKB"
+ "packh\t%0,%2,%1"
+ [(set_attr "type" "crypto")])
+
(define_insn "riscv_packw"
[(set (match_operand:DI 0 "register_operand" "=r")
(unspec:DI [(match_operand:HI 1 "register_operand" "r")
@@ -122,6 +158,33 @@
"packw\t%0,%1,%2"
[(set_attr "type" "crypto")])
+;; Implemented as a splitter for initial recognition. It generates
+;; new RTL with the extension moved to the outer position. This
+;; allows later code to eliminate subsequent explicit sign extensions.
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (ior:DI (ashift:DI
+ (sign_extend:DI (match_operand:HI 1 "register_operand"))
+ (const_int 16))
+ (zero_extend:DI (match_operand:HI 2 "register_operand"))))]
+ "TARGET_ZBKB && TARGET_64BIT"
+ [(set (match_dup 0)
+ (sign_extend:DI (ior:SI (ashift:SI (match_dup 1) (const_int 16))
+ (zero_extend:SI (match_dup 2)))))]
+ "operands[1] = gen_lowpart (SImode, operands[1]);")
+
+;; And this patches the result of the splitter above.
+(define_insn "*riscv_packw_2"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (sign_extend:DI
+ (ior:SI
+ (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (const_int 16))
+ (zero_extend:SI (match_operand:HI 2 "register_operand" "r")))))]
+ "TARGET_ZBKB && TARGET_64BIT"
+ "packw\t%0,%2,%1"
+ [(set_attr "type" "crypto")])
+
;; ZBKX extension
(define_insn "riscv_xperm4_<mode>"
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 8a9d198..3c139bc 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -37,6 +37,9 @@
;; Likewise, but for XLEN-sized quantities.
(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
+;; Likewise, but for XLEN/2 -sized quantities.
+(define_mode_iterator HX [(HI "!TARGET_64BIT") (SI "TARGET_64BIT")])
+
;; Branches operate on XLEN-sized quantities, but for RV64 we accept
;; QImode values so we can force zero-extension.
(define_mode_iterator BR [(QI "TARGET_64BIT") SI (DI "TARGET_64BIT")])
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index fe74b8d..25d341e 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -2844,9 +2844,12 @@
(match_operand 2 "immediate_operand" "n"))
(match_operand 3 "immediate_operand" "n")))]
"(!SMALL_OPERAND (INTVAL (operands[3]))
- && SMALL_OPERAND (INTVAL (operands[3]) >> INTVAL (operands[2]))
- && (popcount_hwi (INTVAL (operands[3]))
- <= popcount_hwi (INTVAL (operands[3]) >> INTVAL (operands[2]))))"
+ && SMALL_OPERAND (INTVAL (operands[3]) >> INTVAL (operands[2]))
+ && popcount_hwi (INTVAL (operands[3])) > 1
+ && (!TARGET_64BIT
+ || (exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1)
+ == -1))
+ && (INTVAL (operands[3]) & ((1ULL << INTVAL (operands[2])) - 1)) == 0)"
"#"
"&& 1"
[(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3)))
diff --git a/gcc/testsuite/gcc.target/riscv/pack32.c b/gcc/testsuite/gcc.target/riscv/pack32.c
new file mode 100644
index 0000000..24304d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pack32.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gc_zbkb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+
+#include <stdint-gcc.h>
+
+uint32_t foo1(uint32_t rs1, uint32_t rs2)
+{
+ return (rs1 << 16) | ((rs2 << 16) >> 16);
+}
+
+uint32_t foo2(uint32_t rs1, uint32_t rs2)
+{
+ return (rs1 << 16) | (rs2 & 65535);
+}
+
+/* { dg-final { scan-assembler-times "\\spack\\s" 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/pack64.c b/gcc/testsuite/gcc.target/riscv/pack64.c
new file mode 100644
index 0000000..7f54baa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pack64.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbkb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+#include <stdint-gcc.h>
+
+uint64_t foo1(uint64_t rs1, uint64_t rs2)
+{
+ return (rs1 << 32) | ((rs2 << 32) >> 32);
+}
+
+uint64_t foo2(uint64_t rs1, uint64_t rs2)
+{
+ return (rs1 << 32) | (rs2 & 4294967295);
+}
+
+/* { dg-final { scan-assembler-times "\\spack\\s" 2 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/packh32.c b/gcc/testsuite/gcc.target/riscv/packh32.c
new file mode 100644
index 0000000..8032241
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/packh32.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32gc_zbkb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+
+#include <stdint-gcc.h>
+
+int32_t foo1(int32_t rs1, int32_t rs2)
+{
+ return (rs1 & 255) | ((rs2 & 255) << 8);
+}
+
+/* { dg-final { scan-assembler-times "\\spackh\\s" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/packh64.c b/gcc/testsuite/gcc.target/riscv/packh64.c
new file mode 100644
index 0000000..b91d401
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/packh64.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbkb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+#include "packh32.c"
+/* { dg-final { scan-assembler-times "\\spackh\\s" 1 } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/packw.c b/gcc/testsuite/gcc.target/riscv/packw.c
new file mode 100644
index 0000000..c178738
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/packw.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gc_zbkb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-g" "-flto" "-O0" } } */
+#include <stdint-gcc.h>
+
+uint32_t foo1(uint32_t rs1, uint32_t rs2)
+{
+ return (rs1 << 16) | ((rs2 << 16) >> 16);
+}
+
+/* { dg-final { scan-assembler-times "\\spackw\\s" 1 } } */
+/* { dg-final { scan-assembler-not "\\ssext\\s" } } */
+