aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorPan Li <pan2.li@intel.com>2023-06-13 23:19:14 +0800
committerPan Li <pan2.li@intel.com>2023-06-15 09:05:18 +0800
commit8a3a4fb273f9c69b8f9f6c303508610d34b3ebf1 (patch)
treed6e3c489a2def4a83d466df9829b786d58956cef /gcc
parentc1c5edef33e3307296af244f50f9256472c69c09 (diff)
downloadgcc-8a3a4fb273f9c69b8f9f6c303508610d34b3ebf1.zip
gcc-8a3a4fb273f9c69b8f9f6c303508610d34b3ebf1.tar.gz
gcc-8a3a4fb273f9c69b8f9f6c303508610d34b3ebf1.tar.bz2
RISC-V: Bugfix for vec_init repeating auto vectorization in RV32
When constructing a vector mask from individual elements we wrongly assumed that we can broadcast BITS_PER_WORD (i.e. XLEN). The maximum is actually the vector element length (i.e. ELEN). This patch fixes this. After this patch, below failures on RV32 will be fixed. FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution test FAIL: gcc.target/riscv/rvv/autovec/partial/multiple_rgroup_run-2.c execution test FAIL: gcc.target/riscv/rvv/autovec/vls-vlmax/repeat_run-3.c -std=c99 -O3 -ftree-vectorize --param riscv-autovec-preference=fixed-vlmax execution test Signed-off-by: Pan Li <pan2.li@intel.com> gcc/ChangeLog: * config/riscv/riscv-v.cc (rvv_builder::get_merge_scalar_mask): Take elen instead of scalar BITS_PER_WORD. (expand_vector_init_merge_repeating_sequence): Use inner_bits_size instead of scaler BITS_PER_WORD.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/riscv-v.cc16
1 files changed, 12 insertions, 4 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index e07d5c2..01f647b 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -399,10 +399,17 @@ rvv_builder::get_merge_scalar_mask (unsigned int index_in_pattern) const
{
unsigned HOST_WIDE_INT mask = 0;
unsigned HOST_WIDE_INT base_mask = (1ULL << index_in_pattern);
+ /* Here we construct a mask pattern that will later be broadcast
+ to a vector register. The maximum broadcast size for vmv.v.x/vmv.s.x
+ is determined by the length of a vector element (ELEN) and not by
+ XLEN so make sure we do not exceed it. One example is -march=zve32*
+ which mandates ELEN == 32 but can be combined with -march=rv64
+ with XLEN == 64. */
+ unsigned int elen = TARGET_VECTOR_ELEN_64 ? 64 : 32;
- gcc_assert (BITS_PER_WORD % npatterns () == 0);
+ gcc_assert (elen % npatterns () == 0);
- int limit = BITS_PER_WORD / npatterns ();
+ int limit = elen / npatterns ();
for (int i = 0; i < limit; i++)
mask |= base_mask << (i * npatterns ());
@@ -1928,7 +1935,7 @@ expand_vector_init_merge_repeating_sequence (rtx target,
rtx mask = gen_reg_rtx (mask_mode);
rtx dup = gen_reg_rtx (dup_mode);
- if (full_nelts <= BITS_PER_WORD) /* vmv.s.x. */
+ if (full_nelts <= builder.inner_bits_size ()) /* vmv.s.x. */
{
rtx ops[] = {dup, gen_scalar_move_mask (dup_mask_mode),
RVV_VUNDEF (dup_mode), merge_mask};
@@ -1938,7 +1945,8 @@ expand_vector_init_merge_repeating_sequence (rtx target,
else /* vmv.v.x. */
{
rtx ops[] = {dup, force_reg (GET_MODE_INNER (dup_mode), merge_mask)};
- rtx vl = gen_int_mode (CEIL (full_nelts, BITS_PER_WORD), Pmode);
+ rtx vl = gen_int_mode (CEIL (full_nelts, builder.inner_bits_size ()),
+ Pmode);
emit_nonvlmax_integer_move_insn (code_for_pred_broadcast (dup_mode),
ops, vl);
}