aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJeff Law <jlaw@ventanamicro.com>2024-06-18 06:40:40 -0600
committerJeff Law <jlaw@ventanamicro.com>2024-06-18 06:42:51 -0600
commita78e2c3a00d8b147b44416f7a843c9df61f04531 (patch)
tree131cff440687a5e9a69092c0756ca31ee3f4850a /gcc
parent89c26a99102d2cc00455333795d81d6426be7057 (diff)
downloadgcc-a78e2c3a00d8b147b44416f7a843c9df61f04531.zip
gcc-a78e2c3a00d8b147b44416f7a843c9df61f04531.tar.gz
gcc-a78e2c3a00d8b147b44416f7a843c9df61f04531.tar.bz2
[to-be-committed,RISC-V] Improve bset generation when bit position is limited
So more work in the ongoing effort to make better use of the Zbs extension. This time we're trying to exploit knowledge of the shift count/bit position to allow us to use a bset instruction. Consider this expression in SImode (1 << (pos & 0xf) None of the resulting values will have bit 31 set. So if there's an explicit zero or sign extension to DI we can drop that explicit extension and generate a simple bset with x0 as the input value. Or another example (which I think came from spec at some point and IIRC was the primary motivation for this patch): (1 << (7-(pos) % 8)) Before this change they'd generate something like this respectively: li a5,1 andi a0,a0,15 sllw a0,a5,a0 li a5,7 andn a0,a5,a0 li a5,1 sllw a0,a5,a0 After this change they generate: andi a0,a0,15 # 9 [c=4 l=4] *anddi3/1 bset a0,x0,a0 # 17 [c=8 l=4] *bsetdi_2 li a5,7 # 27 [c=4 l=4] *movdi_64bit/1 andn a0,a5,a0 # 28 [c=4 l=4] and_notdi3 bset a0,x0,a0 # 19 [c=8 l=4] *bsetdi_2 We achieve this with simple define_splits which target the bsetdi_2 pattern I recently added. Much better than the original implementation I did a few months back :-) I've got a bclr/binv variant from a few months back as well, but it needs to be updated to the simpler implementation found here. Just ran this through my tester. Will wait for the precommit CI to render its verdict before moving forward. gcc/ * config/riscv/bitmanip.md (bset splitters): New patterns for generating bset when bit position is limited.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/bitmanip.md36
-rw-r--r--gcc/testsuite/gcc.target/riscv/zbs-ext-2.c24
2 files changed, 60 insertions, 0 deletions
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 094bc2a..ae5e7e5 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -609,6 +609,42 @@
"bset\t%0,x0,%1"
[(set_attr "type" "bitmanip")])
+;; These two splitters take advantage of the limited range of the
+;; shift constant. With the limited range we know the SImode sign
+;; bit is never set, thus we can treat this as zero extending and
+;; generate the bsetdi_2 pattern.
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (any_extend:DI
+ (ashift:SI (const_int 1)
+ (subreg:QI
+ (and:DI (not:DI (match_operand:DI 1 "register_operand"))
+ (match_operand 2 "const_int_operand")) 0))))
+ (clobber (match_operand:DI 3 "register_operand"))]
+ "TARGET_64BIT
+ && TARGET_ZBS
+ && (TARGET_ZBB || TARGET_ZBKB)
+ && (INTVAL (operands[2]) & 0x1f) != 0x1f"
+ [(set (match_dup 0) (and:DI (not:DI (match_dup 1)) (match_dup 2)))
+ (set (match_dup 0) (zero_extend:DI (ashift:SI
+ (const_int 1)
+ (subreg:QI (match_dup 0) 0))))])
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (any_extend:DI
+ (ashift:SI (const_int 1)
+ (subreg:QI
+ (and:DI (match_operand:DI 1 "register_operand")
+ (match_operand 2 "const_int_operand")) 0))))]
+ "TARGET_64BIT
+ && TARGET_ZBS
+ && (INTVAL (operands[2]) & 0x1f) != 0x1f"
+ [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (zero_extend:DI (ashift:SI
+ (const_int 1)
+ (subreg:QI (match_dup 0) 0))))])
+
(define_insn "*bset<mode>_1_mask"
[(set (match_operand:X 0 "register_operand" "=r")
(ashift:X (const_int 1)
diff --git a/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c b/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c
new file mode 100644
index 0000000..301bc9d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbs-ext-2.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb_zbs -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" } } */
+
+
+typedef unsigned int uint32_t;
+uint32_t foo(uint32_t pos)
+{
+ return (1 << (7-(pos) % 8));
+}
+
+typedef unsigned int uint32_t;
+uint32_t foo2(uint32_t pos)
+{
+ return (1 << (pos & 0xf));
+}
+
+/* { dg-final { scan-assembler-not "sll\t" } } */
+/* { dg-final { scan-assembler-times "bset\t" 2 } } */
+/* { dg-final { scan-assembler-times "andi\t" 1 } } */
+/* { dg-final { scan-assembler-times "andn\t" 1 } } */
+/* { dg-final { scan-assembler-times "li\t" 1 } } */
+/* { dg-final { scan-assembler-times "ret" 2 } } */
+