diff options
author | Raphael Moreira Zinsly <rzinsly@ventanamicro.com> | 2024-09-05 21:50:54 -0600 |
---|---|---|
committer | Jeff Law <jlaw@ventanamicro.com> | 2024-09-05 21:50:54 -0600 |
commit | ecdb9f59d0915f154a4c8fa56e11d81479f535eb (patch) | |
tree | 7ffb050f6d92c2d6d11391b4fda907d18209354a /gcc | |
parent | a2e28b105cea4c44c3903d8d979c7a4afa1193f0 (diff) | |
download | gcc-ecdb9f59d0915f154a4c8fa56e11d81479f535eb.zip gcc-ecdb9f59d0915f154a4c8fa56e11d81479f535eb.tar.gz gcc-ecdb9f59d0915f154a4c8fa56e11d81479f535eb.tar.bz2 |
[PATCH 1/2 v2] RISC-V: Additional large constant synthesis improvements
Changes since v1:
- Fix bit31.
- Remove negative shift checks.
- Fix synthesis-7.c expected output.
-- >8 --
Improve handling of large constants in riscv_build_integer, generate
better code for constants where the high half can be constructed
by shifting/shiftNadding the low half or if the halves differ by less
than 2k.
gcc/ChangeLog:
* config/riscv/riscv.cc (riscv_build_integer): Detect new case
of constants that can be improved.
(riscv_move_integer): Add synthesys for concatening constants
without Zbkb.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/synthesis-7.c: Adjust expected output.
* gcc.target/riscv/synthesis-12.c: New test.
* gcc.target/riscv/synthesis-13.c: New test.
* gcc.target/riscv/synthesis-14.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/riscv/riscv.cc | 138 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/synthesis-12.c | 26 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/synthesis-13.c | 26 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/synthesis-14.c | 28 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/synthesis-7.c | 2 |
5 files changed, 213 insertions, 7 deletions
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index 39489c4..064ffa4 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -1231,6 +1231,122 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value, } } + else if (cost > 4 && TARGET_64BIT && can_create_pseudo_p () + && allow_new_pseudos) + { + struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS]; + int alt_cost; + + unsigned HOST_WIDE_INT loval = value & 0xffffffff; + unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32; + bool bit31 = (loval & 0x80000000) != 0; + int trailing_shift = ctz_hwi (loval) - ctz_hwi (hival); + int leading_shift = clz_hwi (loval) - clz_hwi (hival); + int shiftval = 0; + + /* Adjust the shift into the high half accordingly. */ + if ((trailing_shift > 0 && hival == (loval >> trailing_shift))) + shiftval = 32 - trailing_shift; + else if ((leading_shift > 0 && hival == (loval << leading_shift))) + shiftval = 32 + leading_shift; + + if (shiftval && !bit31) + alt_cost = 2 + riscv_build_integer_1 (alt_codes, sext_hwi (loval, 32), + mode); + + /* For constants where the upper half is a shift of the lower half we + can do a shift followed by an or. */ + if (shiftval && !bit31 && alt_cost < cost) + { + /* We need to save the first constant we build. */ + alt_codes[alt_cost - 3].save_temporary = true; + + /* Now we want to shift the previously generated constant into the + high half. */ + alt_codes[alt_cost - 2].code = ASHIFT; + alt_codes[alt_cost - 2].value = shiftval; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + + /* And the final step, IOR the two halves together. Since this uses + the saved temporary, use CONCAT similar to what we do for Zbkb. */ + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + alt_codes[alt_cost - 1].save_temporary = false; + + memcpy (codes, alt_codes, sizeof (alt_codes)); + cost = alt_cost; + } + + if (cost > 4 && !bit31 && TARGET_ZBA) + { + int value = 0; + + /* Check for a shNadd. */ + if (hival == loval * 3) + value = 3; + else if (hival == loval * 5) + value = 5; + else if (hival == loval * 9) + value = 9; + + if (value) + alt_cost = 2 + riscv_build_integer_1 (alt_codes, + sext_hwi (loval, 32), mode); + + /* For constants where the upper half is a shNadd of the lower half + we can do a similar transformation. */ + if (value && alt_cost < cost) + { + alt_codes[alt_cost - 3].save_temporary = true; + alt_codes[alt_cost - 2].code = FMA; + alt_codes[alt_cost - 2].value = value; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + alt_codes[alt_cost - 1].save_temporary = false; + + memcpy (codes, alt_codes, sizeof (alt_codes)); + cost = alt_cost; + } + } + + if (cost > 4 && !bit31) + { + int value = hival - loval; + + /* For constants were the halves differ by less than 2048 we can + generate the upper half by using an addi on the lower half then + using a shift 32 followed by an or. */ + if (IN_RANGE (value, -2048, 2047)) + { + alt_cost = 3 + riscv_build_integer_1 (alt_codes, + sext_hwi (loval, 32), mode); + if (alt_cost < cost) + { + alt_codes[alt_cost - 4].save_temporary = true; + alt_codes[alt_cost - 3].code = PLUS; + alt_codes[alt_cost - 3].value = value; + alt_codes[alt_cost - 3].use_uw = false; + alt_codes[alt_cost - 3].save_temporary = false; + alt_codes[alt_cost - 2].code = ASHIFT; + alt_codes[alt_cost - 2].value = 32; + alt_codes[alt_cost - 2].use_uw = false; + alt_codes[alt_cost - 2].save_temporary = false; + alt_codes[alt_cost - 1].code = CONCAT; + alt_codes[alt_cost - 1].value = 0; + alt_codes[alt_cost - 1].use_uw = false; + alt_codes[alt_cost - 1].save_temporary = false; + + memcpy (codes, alt_codes, sizeof (alt_codes)); + cost = alt_cost; + } + } + } + } return cost; } @@ -2864,12 +2980,22 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value, } else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE) { - rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp; - rtx t2 = codes[i].code == VEC_MERGE ? old_value : x; - gcc_assert (t2); - t2 = gen_lowpart (SImode, t2); - emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2)); - x = t; + if (codes[i].code == CONCAT && !TARGET_ZBKB) + { + /* The two values should have no bits in common, so we can + use PLUS instead of IOR which has a higher chance of + using a compressed instruction. */ + x = gen_rtx_PLUS (mode, x, old_value); + } + else + { + rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp; + rtx t2 = codes[i].code == VEC_MERGE ? old_value : x; + gcc_assert (t2); + t2 = gen_lowpart (SImode, t2); + emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2)); + x = t; + } } else x = gen_rtx_fmt_ee (codes[i].code, mode, diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-12.c b/gcc/testsuite/gcc.target/riscv/synthesis-12.c new file mode 100644 index 0000000..bf2f890 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/synthesis-12.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv64 } */ +/* We aggressively skip as we really just need to test the basic synthesis + which shouldn't vary based on the optimization level. -O1 seems to work + and eliminates the usual sources of extraneous dead code that would throw + off the counts. */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */ +/* { dg-options "-march=rv64gc" } */ + +/* Rather than test for a specific synthesis of all these constants or + having thousands of tests each testing one variant, we just test the + total number of instructions. + + This isn't expected to change much and any change is worthy of a look. */ +/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 45 } } */ + + +unsigned long foo_0x7857f2de7857f2de(void) { return 0x7857f2de7857f2deUL; } +unsigned long foo_0x7fffdffe3fffefff(void) { return 0x7fffdffe3fffefffUL; } +unsigned long foo_0x1ffff7fe3fffeffc(void) { return 0x1ffff7fe3fffeffcUL; } +unsigned long foo_0x0a3fdbf0028ff6fc(void) { return 0x0a3fdbf0028ff6fcUL; } +unsigned long foo_0x014067e805019fa0(void) { return 0x014067e805019fa0UL; } +unsigned long foo_0x09d87e90009d87e9(void) { return 0x09d87e90009d87e9UL; } +unsigned long foo_0x2302320000118119(void) { return 0x2302320000118119UL; } +unsigned long foo_0x000711eb00e23d60(void) { return 0x000711eb00e23d60UL; } +unsigned long foo_0x5983800001660e00(void) { return 0x5983800001660e00UL; } diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-13.c b/gcc/testsuite/gcc.target/riscv/synthesis-13.c new file mode 100644 index 0000000..957410a --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/synthesis-13.c @@ -0,0 +1,26 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv64 } */ +/* We aggressively skip as we really just need to test the basic synthesis + which shouldn't vary based on the optimization level. -O1 seems to work + and eliminates the usual sources of extraneous dead code that would throw + off the counts. */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */ +/* { dg-options "-march=rv64gc_zba" } */ + +/* Rather than test for a specific synthesis of all these constants or + having thousands of tests each testing one variant, we just test the + total number of instructions. + + This isn't expected to change much and any change is worthy of a look. */ +/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 45 } } */ + + +unsigned long foo_0x7907d89a2857f2de(void) { return 0x7907d89a2857f2deUL; } +unsigned long foo_0x4fffaffb0fffefff(void) { return 0x4fffaffb0fffefffUL; } +unsigned long foo_0x23ff6fdc03ffeffc(void) { return 0x23ff6fdc03ffeffcUL; } +unsigned long foo_0x170faedc028ff6fc(void) { return 0x170faedc028ff6fcUL; } +unsigned long foo_0x5704dee01d019fa0(void) { return 0x5704dee01d019fa0UL; } +unsigned long foo_0x0589c731009d87e9(void) { return 0x0589c731009d87e9UL; } +unsigned long foo_0x0057857d00118119(void) { return 0x0057857d00118119UL; } +unsigned long foo_0x546b32e010e23d60(void) { return 0x546b32e010e23d60UL; } +unsigned long foo_0x64322a0021660e00(void) { return 0x64322a0021660e00UL; } diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-14.c b/gcc/testsuite/gcc.target/riscv/synthesis-14.c new file mode 100644 index 0000000..bd4e4af --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/synthesis-14.c @@ -0,0 +1,28 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target rv64 } */ +/* We aggressively skip as we really just need to test the basic synthesis + which shouldn't vary based on the optimization level. -O1 seems to work + and eliminates the usual sources of extraneous dead code that would throw + off the counts. */ +/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */ +/* { dg-options "-march=rv64gc" } */ + +/* Rather than test for a specific synthesis of all these constants or + having thousands of tests each testing one variant, we just test the + total number of instructions. + + This isn't expected to change much and any change is worthy of a look. */ +/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 65 } } */ + + +unsigned long foo_0x7857faae7857f2de(void) { return 0x7857faae7857f2deUL; } +unsigned long foo_0x0ffff7fe0fffefff(void) { return 0x0ffff7fe0fffefffUL; } +unsigned long foo_0x7857f2de7857faae(void) { return 0x7857f2de7857faaeUL; } +unsigned long foo_0x7857f2af7857faae(void) { return 0x7857f2af7857faaeUL; } +unsigned long foo_0x5fbfffff5fbffae5(void) { return 0x5fbfffff5fbffae5UL; } +unsigned long foo_0x3d3079db3d3079ac(void) { return 0x3d3079db3d3079acUL; } +unsigned long foo_0x046075fe046078a8(void) { return 0x046075fe046078a8UL; } +unsigned long foo_0x2411811a24118119(void) { return 0x2411811a24118119UL; } +unsigned long foo_0x70e23d6a70e23d6b(void) { return 0x70e23d6a70e23d6bUL; } +unsigned long foo_0x0c01df8c0c01df7d(void) { return 0x0c01df8c0c01df7dUL; } +unsigned long foo_0x7fff07d07fff0000(void) { return 0x7fff07d07fff0000UL; } diff --git a/gcc/testsuite/gcc.target/riscv/synthesis-7.c b/gcc/testsuite/gcc.target/riscv/synthesis-7.c index 5a69d2e..c71c3cd 100644 --- a/gcc/testsuite/gcc.target/riscv/synthesis-7.c +++ b/gcc/testsuite/gcc.target/riscv/synthesis-7.c @@ -12,7 +12,7 @@ total number of instructions. This isn't expected to change much and any change is worthy of a look. */ -/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 7571 } } */ +/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli|bclri)" 7575 } } */ unsigned long foo_0xfffff7fe7ffff7ff(void) { return 0xfffff7fe7ffff7ffUL; } unsigned long foo_0xffffeffe7ffff7ff(void) { return 0xffffeffe7ffff7ffUL; } |