aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorRobin Dapp <rdapp@ventanamicro.com>2025-09-01 11:41:34 +0200
committerRobin Dapp <rdapp@ventanamicro.com>2025-09-02 17:24:34 +0200
commitf957d352bd6f240829226405e9be7960071d1b9c (patch)
tree707b3d3335b3794febaaf062e49f35d998bf4d40 /gcc
parent6e0590ba45e8df543133028b0ccf692a21cf2a4d (diff)
downloadgcc-f957d352bd6f240829226405e9be7960071d1b9c.zip
gcc-f957d352bd6f240829226405e9be7960071d1b9c.tar.gz
gcc-f957d352bd6f240829226405e9be7960071d1b9c.tar.bz2
RISC-V: Handle overlap in expand_vec_perm PR121742.
In a two-source gather we unconditionally overwrite target with the first gather's result already. If op1 == target this clobbers the source operand for the second gather. This patch uses a temporary in that case. PR target/121742 gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vec_perm): Use temporary if op1 and target overlap. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/pr121742.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/riscv-v.cc11
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121742.c30
2 files changed, 38 insertions, 3 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index edfb4ff..9cbd480 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -3313,15 +3313,17 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
mask_mode = get_mask_mode (data_mode);
rtx mask = gen_reg_rtx (mask_mode);
rtx max_sel = gen_const_vector_dup (sel_mode, nunits);
+ bool overlap = reg_overlap_mentioned_p (target, op1);
+ rtx tmp_target = overlap ? gen_reg_rtx (data_mode) : target;
/* Step 1: generate a mask that should select everything >= nunits into the
* mask. */
expand_vec_cmp (mask, GEU, sel_mod, max_sel);
- /* Step2: gather every op0 values indexed by sel into target,
+ /* Step2: gather every op0 values indexed by sel into TMP_TARGET,
we don't need to care about the result of the element
whose index >= nunits. */
- emit_vlmax_gather_insn (target, op0, sel_mod);
+ emit_vlmax_gather_insn (tmp_target, op0, sel_mod);
/* Step3: shift the range from (nunits, max_of_mode] to
[0, max_of_mode - nunits]. */
@@ -3331,7 +3333,10 @@ expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
/* Step4: gather those into the previously masked-out elements
of target. */
- emit_vlmax_masked_gather_mu_insn (target, op1, tmp, mask);
+ emit_vlmax_masked_gather_mu_insn (tmp_target, op1, tmp, mask);
+
+ if (overlap)
+ emit_move_insn (tmp_target, target);
}
/* Implement TARGET_VECTORIZE_VEC_PERM_CONST for RVV. */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121742.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121742.c
new file mode 100644
index 0000000..08491f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr121742.c
@@ -0,0 +1,30 @@
+/* { dg-do run } */
+/* { dg-require-effective-target riscv_v_ok } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d -O2" } */
+
+typedef unsigned long uint64_t;
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+typedef uint8_t a __attribute__((vector_size(4)));
+int b, c;
+
+uint64_t d() {
+ a e = {5, 9, 1, 5};
+ a bla = {0, 0, 0, 0};
+ int *f = &b;
+ uint32_t g = 0;
+ int i = 0;
+ for (; i < 2; i++)
+ for (c = 0; c <= 2; c++) {
+ *f ^= e[3] + 9;
+ e = __builtin_shufflevector(
+ ~__builtin_shufflevector(bla, e, 1, 4, 3, 4), e, 0, 1, 1, 7);
+ }
+ return g;
+}
+
+int main() {
+ int j = d ();
+ if (b != 0)
+ __builtin_abort ();
+}