diff options
author | Lehua Ding <lehua.ding@rivai.ai> | 2023-08-29 09:54:22 +0800 |
---|---|---|
committer | Lehua Ding <lehua.ding@rivai.ai> | 2023-08-29 10:33:38 +0800 |
commit | 973eb0deb467c79cc21f265a710a81054cfd3e8c (patch) | |
tree | f709f9691b996999d195b287c01c24d7c836c5b9 | |
parent | ebffc840f50112196d0d55a384a2c29b08964319 (diff) | |
download | gcc-973eb0deb467c79cc21f265a710a81054cfd3e8c.zip gcc-973eb0deb467c79cc21f265a710a81054cfd3e8c.tar.gz gcc-973eb0deb467c79cc21f265a710a81054cfd3e8c.tar.bz2 |
RISC-V: Fix error combine of pred_mov pattern
This patch fix PR110943 which will produce some error code. This is because
the error combine of some pred_mov pattern. Consider this code:
```
void foo9 (void *base, void *out, size_t vl)
{
int64_t scalar = *(int64_t*)(base + 100);
vint64m2_t v = __riscv_vmv_v_x_i64m2 (0, 1);
*(vint64m2_t*)out = v;
}
```
RTL before combine pass:
```
(insn 11 10 12 2 (set (reg/v:RVVM2DI 134 [ v ])
(if_then_else:RVVM2DI (unspec:RVVMF32BI [
(const_vector:RVVMF32BI repeat [
(const_int 1 [0x1])
])
(const_int 1 [0x1])
(const_int 2 [0x2]) repeated x2
(const_int 0 [0])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_VPREDICATE)
(const_vector:RVVM2DI repeat [
(const_int 0 [0])
])
(unspec:RVVM2DI [
(reg:SI 0 zero)
] UNSPEC_VUNDEF))) "/app/example.c":6:20 1089 {pred_movrvvm2di})
(insn 14 13 0 2 (set (mem:RVVM2DI (reg/v/f:DI 136 [ out ]) [1 MEM[(vint64m2_t *)out_4(D)]+0 S[32, 32] A128])
(reg/v:RVVM2DI 134 [ v ])) "/app/example.c":7:23 717 {*movrvvm2di_whole})
```
RTL after combine pass:
```
(insn 14 13 0 2 (set (mem:RVVM2DI (reg:DI 138) [1 MEM[(vint64m2_t *)out_4(D)]+0 S[32, 32] A128])
(if_then_else:RVVM2DI (unspec:RVVMF32BI [
(const_vector:RVVMF32BI repeat [
(const_int 1 [0x1])
])
(const_int 1 [0x1])
(const_int 2 [0x2]) repeated x2
(const_int 0 [0])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_VPREDICATE)
(const_vector:RVVM2DI repeat [
(const_int 0 [0])
])
(unspec:RVVM2DI [
(reg:SI 0 zero)
] UNSPEC_VUNDEF))) "/app/example.c":7:23 1089 {pred_movrvvm2di})
```
This combine change the semantics of insn 14. I split @pred_mov pattern and
restrict the conditon of @pred_mov.
PR target/110943
gcc/ChangeLog:
* config/riscv/predicates.md (vector_const_int_or_double_0_operand):
New predicate.
* config/riscv/riscv-vector-builtins.cc (function_expander::function_expander):
force_reg mem target operand.
* config/riscv/vector.md (@pred_mov<mode>): Wrapper.
(*pred_mov<mode>): Remove imm -> reg pattern.
(*pred_broadcast<mode>_imm): Add imm -> reg pattern.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c: Adjust.
* gcc.target/riscv/rvv/base/pr110943.c: New test.
-rw-r--r-- | gcc/config/riscv/predicates.md | 5 | ||||
-rw-r--r-- | gcc/config/riscv/riscv-vector-builtins.cc | 9 | ||||
-rw-r--r-- | gcc/config/riscv/vector.md | 98 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c | 33 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c | 10 |
5 files changed, 106 insertions, 49 deletions
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 618ad60..51cf7eb 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -296,6 +296,11 @@ (and (match_code "const_vector") (match_test "satisfies_constraint_Wc0 (op)"))) +(define_predicate "vector_const_int_or_double_0_operand" + (and (match_code "const_vector") + (match_test "satisfies_constraint_vi (op) + || satisfies_constraint_Wc0 (op)"))) + (define_predicate "vector_move_operand" (ior (match_operand 0 "nonimmediate_operand") (and (match_code "const_vector") diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index ad4a909..4a7eb47 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -3471,7 +3471,14 @@ function_expander::function_expander (const function_instance &instance, exp (exp_in), target (target_in), opno (0) { if (!function_returns_void_p ()) - create_output_operand (&m_ops[opno++], target, TYPE_MODE (TREE_TYPE (exp))); + { + if (target != NULL_RTX && MEM_P (target)) + /* Since there is no intrinsic where target is a mem operand, it + should be converted to reg if it is a mem operand. */ + target = force_reg (GET_MODE (target), target); + create_output_operand (&m_ops[opno++], target, + TYPE_MODE (TREE_TYPE (exp))); + } } /* Take argument ARGNO from EXP's argument list and convert it into diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index a442e0f..d6bfbe8 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -1450,69 +1450,61 @@ ;; - 15.1 Vector Mask-Register Logical Instructions ;; ------------------------------------------------------------------------------- -;; vle.v/vse.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f. -;; For vle.v/vmv.v.v/vmv.v.x/vmv.v.i/vfmv.v.f, we may need merge and mask operand. +;; vle.v/vse.v/vmv.v.v. +;; For vle.v/vmv.v.v, we may need merge and mask operand. ;; For vse.v, we don't need merge operand, so it should always match "vu". ;; constraint alternative 0 ~ 1 match vle.v. ;; constraint alternative 2 match vse.v. ;; constraint alternative 3 match vmv.v.v. -;; constraint alternative 4 match vmv.v.i. -;; For vmv.v.i, we allow 2 following cases: -;; 1. (const_vector:RVVMF8QI repeat [ -;; (const_int:QI N)]), -15 <= N < 16. -;; 2. (const_vector:RVVMF2SF repeat [ -;; (const_double:SF 0.0 [0x0.0p+0])]). - -;; We add "MEM_P (operands[0]) || MEM_P (operands[3]) || CONST_VECTOR_P (operands[1])" here to -;; make sure we don't want CSE to generate the following pattern: -;; (insn 17 8 19 2 (set (reg:RVVMF4HI 134 [ _1 ]) -;; (if_then_else:RVVMF4HI (unspec:RVVM1BI [ -;; (reg/v:RVVM1BI 137 [ mask ]) -;; (reg:DI 151) -;; (const_int 0 [0]) repeated x3 -;; (reg:SI 66 vl) -;; (reg:SI 67 vtype) -;; ] UNSPEC_VPREDICATE) -;; (const_vector:RVVMF4HI repeat [ -;; (const_int 0 [0]) -;; ]) -;; (reg/v:RVVMF4HI 140 [ merge ]))) "rvv.c":8:12 608 {pred_movvnx1hi} -;; (expr_list:REG_DEAD (reg:DI 151) -;; (expr_list:REG_DEAD (reg/v:RVVMF4HI 140 [ merge ]) -;; (expr_list:REG_DEAD (reg/v:RVVM1BI 137 [ mask ]) -;; (nil))))) -;; Since both vmv.v.v and vmv.v.i doesn't have mask operand. -(define_insn_and_split "@pred_mov<mode>" - [(set (match_operand:V_VLS 0 "nonimmediate_operand" "=vr, vr, vd, m, vr, vr, vr, vr") + +;; If operand 3 is a const_vector, then it is left to pred_braordcast patterns. +(define_expand "@pred_mov<mode>" + [(set (match_operand:V_VLS 0 "nonimmediate_operand") (if_then_else:V_VLS (unspec:<VM> - [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1, Wc1, vm, vmWc1, Wc1, Wc1, Wc1, Wc1") - (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK, rK, rK") - (match_operand 5 "const_int_operand" " i, i, i, i, i, i, i, i") - (match_operand 6 "const_int_operand" " i, i, i, i, i, i, i, i") - (match_operand 7 "const_int_operand" " i, i, i, i, i, i, i, i") + [(match_operand:<VM> 1 "vector_mask_operand") + (match_operand 4 "vector_length_operand") + (match_operand 5 "const_int_operand") + (match_operand 6 "const_int_operand") + (match_operand 7 "const_int_operand") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) - (match_operand:V_VLS 3 "vector_move_operand" " m, m, m, vr, vr, vr, viWc0, viWc0") - (match_operand:V_VLS 2 "vector_merge_operand" " 0, vu, vu, vu, vu, 0, vu, 0")))] - "TARGET_VECTOR && (MEM_P (operands[0]) || MEM_P (operands[3]) - || CONST_VECTOR_P (operands[1]))" + (match_operand:V_VLS 3 "vector_move_operand") + (match_operand:V_VLS 2 "vector_merge_operand")))] + "TARGET_VECTOR" + {}) + +;; vle.v/vse.v,vmv.v.v +(define_insn_and_split "*pred_mov<mode>" + [(set (match_operand:V_VLS 0 "nonimmediate_operand" "=vr, vr, vd, m, vr, vr") + (if_then_else:V_VLS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1, Wc1, vm, vmWc1, Wc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK, rK, rK, rK, rK") + (match_operand 5 "const_int_operand" " i, i, i, i, i, i") + (match_operand 6 "const_int_operand" " i, i, i, i, i, i") + (match_operand 7 "const_int_operand" " i, i, i, i, i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:V_VLS 3 "reg_or_mem_operand" " m, m, m, vr, vr, vr") + (match_operand:V_VLS 2 "vector_merge_operand" " 0, vu, vu, vu, vu, 0")))] + "(TARGET_VECTOR + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[3], <MODE>mode)))" "@ vle<sew>.v\t%0,%3%p1 vle<sew>.v\t%0,%3 vle<sew>.v\t%0,%3,%1.t vse<sew>.v\t%3,%0%p1 vmv.v.v\t%0,%3 - vmv.v.v\t%0,%3 - vmv.v.i\t%0,%v3 - vmv.v.i\t%0,%v3" + vmv.v.v\t%0,%3" "&& register_operand (operands[0], <MODE>mode) && register_operand (operands[3], <MODE>mode) && satisfies_constraint_vu (operands[2]) && INTVAL (operands[7]) == riscv_vector::VLMAX" [(set (match_dup 0) (match_dup 3))] "" - [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov,vimov,vimov") + [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov") (set_attr "mode" "<MODE>")]) ;; Dedicated pattern for vse.v instruction since we can't reuse pred_mov pattern to include @@ -1897,6 +1889,26 @@ [(set_attr "type" "vimovxv,vimovxv") (set_attr "mode" "<MODE>")]) +;; Because (vec_duplicate imm) will be converted to (const_vector imm), +;; This pattern is used to handle this case. +(define_insn "*pred_broadcast<mode>_imm" + [(set (match_operand:V_VLS 0 "register_operand" "=vr, vr") + (if_then_else:V_VLS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_all_trues_mask_operand" " Wc1, Wc1") + (match_operand 4 "vector_length_operand" " rK, rK") + (match_operand 5 "const_int_operand" " i, i") + (match_operand 6 "const_int_operand" " i, i") + (match_operand 7 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (match_operand:V_VLS 3 "vector_const_int_or_double_0_operand" "viWc0, viWc0") + (match_operand:V_VLS 2 "vector_merge_operand" " vu, 0")))] + "TARGET_VECTOR" + "vmv.v.i\t%0,%v3" + [(set_attr "type" "vimov,vimov") + (set_attr "mode" "<MODE>")]) + ;; ------------------------------------------------------------------------------- ;; ---- Predicated Strided loads/stores ;; ------------------------------------------------------------------------------- diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c new file mode 100644 index 0000000..8a6c00f --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/pr110943.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -march=rv64gcv -mabi=lp64d" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <riscv_vector.h> + +/* +** foo9: +** vsetivli\tzero,1,e64,m2,t[au],m[au] +** ... +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) +** ret +*/ +void foo9 (void *base, void *out, size_t vl) +{ + int64_t scalar = *(int64_t*)(base + 100); + vint64m2_t v = __riscv_vmv_v_x_i64m2 (0, 1); + *(vint64m2_t*)out = v; +} + +/* +** foo10: +** vsetivli\tzero,1,e64,m2,t[au],m[au] +** ... +** vs2r.v\tv[0-9]+,0\([a-x0-9]+\) +** ret +*/ +void foo10 (void *base, void *out, size_t vl) +{ + int64_t scalar = *(int64_t*)(base + 100); + vint64m2_t v = __riscv_vmv_s_x_i64m2 (0, 1); + *(vint64m2_t*)out = v; +} diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c index fc70c54..500748b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvfhmin-intrinsic.c @@ -194,12 +194,12 @@ vfloat16m4_t test_vget_v_f16m8_f16m4(vfloat16m8_t src, size_t index) { /* { dg-final { scan-assembler-times {vsetvli\s+[a-x0-9]+,\s*zero,\s*e16,\s*m8,\s*t[au],\s*m[au]} 5 } } */ /* { dg-final { scan-assembler-times {vfwcvt\.f\.f\.v\s+v[0-9]+,\s*v[0-9]+} 5 } } */ /* { dg-final { scan-assembler-times {vfncvt\.f\.f\.w\s+v[0-9]+,\s*v[0-9]+} 5 } } */ -/* { dg-final { scan-assembler-times {vle16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 20 } } */ +/* { dg-final { scan-assembler-times {vle16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */ /* { dg-final { scan-assembler-times {vse16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 15 } } */ -/* { dg-final { scan-assembler-times {vl1re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */ -/* { dg-final { scan-assembler-times {vl2re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 4 } } */ -/* { dg-final { scan-assembler-times {vl8re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */ -/* { dg-final { scan-assembler-times {vl4re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */ +/* { dg-final { scan-assembler-times {vl1re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 7 } } */ +/* { dg-final { scan-assembler-times {vl2re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */ +/* { dg-final { scan-assembler-times {vl8re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 6 } } */ +/* { dg-final { scan-assembler-times {vl4re16\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 6 } } */ /* { dg-final { scan-assembler-times {vs1r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */ /* { dg-final { scan-assembler-times {vs2r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */ /* { dg-final { scan-assembler-times {vs4r\.v\s+v[0-9]+,\s*0\([a-x][0-9]+\)} 5 } } */ |