aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJuzhe-Zhong <juzhe.zhong@rivai.ai>2023-11-06 22:12:48 +0800
committerPan Li <pan2.li@intel.com>2023-11-06 22:20:42 +0800
commit418bd6427e3b7e06d031fef037bbc36d529be23f (patch)
treebd0296bfe4d935c1f212436b88c23def4c078508
parentbf72d50d16f0c1fa8102ca2544d2a05772f8c273 (diff)
downloadgcc-418bd6427e3b7e06d031fef037bbc36d529be23f.zip
gcc-418bd6427e3b7e06d031fef037bbc36d529be23f.tar.gz
gcc-418bd6427e3b7e06d031fef037bbc36d529be23f.tar.bz2
RISC-V: Early expand DImode vec_duplicate in RV32 system
An ICE was discovered in recent rounding autovec support: config/riscv/riscv-v.cc:4314 65 | } | ^ 0x1fa5223 riscv_vector::validate_change_or_fail(rtx_def*, rtx_def**, rtx_def*, bool) /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-v.cc:4314 0x1fb1aa2 pre_vsetvl::remove_avl_operand() /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3342 0x1fb18c1 pre_vsetvl::cleaup() /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3308 0x1fb216d pass_vsetvl::lazy_vsetvl() /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3480 0x1fb2214 pass_vsetvl::execute(function*) /home/pli/repos/gcc/222/riscv-gnu-toolchain/gcc/__RISC-V_BUILD/../gcc/config/riscv/riscv-vsetvl.cc:3504 The root cause is that the RA reload into (set (reg) vec_duplicate:DI). However, it is not valid in RV32 system since we don't have a single broadcast instruction DI scalar in RV32 system. We should expand it early for RV32 system. gcc/ChangeLog: * config/riscv/predicates.md: Adapt predicate. * config/riscv/riscv-protos.h (can_be_broadcasted_p): New function. * config/riscv/riscv-v.cc (can_be_broadcasted_p): Ditto. * config/riscv/vector.md (vec_duplicate<mode>): New pattern. (*vec_duplicate<mode>): Adapt vec_duplicate insn pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c: New test.
-rw-r--r--gcc/config/riscv/predicates.md9
-rw-r--r--gcc/config/riscv/riscv-protos.h1
-rw-r--r--gcc/config/riscv/riscv-v.cc20
-rw-r--r--gcc/config/riscv/vector.md20
-rw-r--r--gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c29
5 files changed, 70 insertions, 9 deletions
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index a37d035..90567a8 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -547,14 +547,7 @@
;; The scalar operand can be directly broadcast by RVV instructions.
(define_predicate "direct_broadcast_operand"
- (and (match_test "!(reload_completed && !FLOAT_MODE_P (GET_MODE (op))
- && (register_operand (op, GET_MODE (op)) || CONST_INT_P (op)
- || rtx_equal_p (op, CONST0_RTX (GET_MODE (op))))
- && maybe_gt (GET_MODE_BITSIZE (GET_MODE (op)), GET_MODE_BITSIZE (Pmode)))")
- (ior (match_test "rtx_equal_p (op, CONST0_RTX (GET_MODE (op)))")
- (ior (match_code "const_int,const_poly_int")
- (ior (match_operand 0 "register_operand")
- (match_test "satisfies_constraint_Wdm (op)"))))))
+ (match_test "riscv_vector::can_be_broadcasted_p (op)"))
;; A CONST_INT operand that has exactly two bits cleared.
(define_predicate "const_nottwobits_operand"
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 52d2a2c..85d4f6e 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -597,6 +597,7 @@ uint8_t get_sew (rtx_insn *);
enum vlmul_type get_vlmul (rtx_insn *);
int count_regno_occurrences (rtx_insn *, unsigned int);
bool imm_avl_p (machine_mode);
+bool can_be_broadcasted_p (rtx);
}
/* We classify builtin types into two classes:
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index eeefda64..0dc2ac4 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -4362,4 +4362,24 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
return count;
}
+/* Return true if the OP can be directly broadcasted. */
+bool
+can_be_broadcasted_p (rtx op)
+{
+ machine_mode mode = GET_MODE (op);
+ /* We don't allow RA (register allocation) reload generate
+ (vec_duplicate:DI reg) in RV32 system wheras we allow
+ (vec_duplicate:DI mem) in RV32 system. */
+ if (!can_create_pseudo_p () && !FLOAT_MODE_P (mode)
+ && maybe_gt (GET_MODE_SIZE (mode), GET_MODE_SIZE (Pmode))
+ && !satisfies_constraint_Wdm (op))
+ return false;
+
+ if (satisfies_constraint_K (op) || register_operand (op, mode)
+ || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode)))
+ return true;
+
+ return can_create_pseudo_p () && nonmemory_operand (op, mode);
+}
+
} // namespace riscv_vector
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 8509c4f..e23f649 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1370,11 +1370,29 @@
;; ---- Duplicate Operations
;; -----------------------------------------------------------------
+(define_expand "vec_duplicate<mode>"
+ [(set (match_operand:V_VLS 0 "register_operand")
+ (vec_duplicate:V_VLS
+ (match_operand:<VEL> 1 "direct_broadcast_operand")))]
+ "TARGET_VECTOR"
+ {
+ /* Early expand DImode broadcast in RV32 system to avoid RA reload
+ generate (set (reg) (vec_duplicate:DI)). */
+ if (maybe_gt (GET_MODE_SIZE (<VEL>mode), GET_MODE_SIZE (Pmode)))
+ {
+ riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
+ riscv_vector::UNARY_OP, operands);
+ DONE;
+ }
+ /* Otherwise, allow it fall into general vec_duplicate pattern
+ which allow us to have vv->vx combine optimization in later pass. */
+ })
+
;; According to GCC internal:
;; This pattern only handles duplicates of non-constant inputs.
;; Constant vectors go through the movm pattern instead.
;; So "direct_broadcast_operand" can only be mem or reg, no CONSTANT.
-(define_insn_and_split "vec_duplicate<mode>"
+(define_insn_and_split "*vec_duplicate<mode>"
[(set (match_operand:V_VLS 0 "register_operand")
(vec_duplicate:V_VLS
(match_operand:<VEL> 1 "direct_broadcast_operand")))]
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c
new file mode 100644
index 0000000..8f813690
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/unop/sew64-rv32.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gcv -mabi=ilp32d -O3 -ftree-vectorize -fno-vect-cost-model -ffast-math" } */
+
+#include <stdint-gcc.h>
+#include "test-math.h"
+
+#define ARRAY_SIZE 128
+
+float in[ARRAY_SIZE];
+int64_t out[ARRAY_SIZE];
+int64_t ref[ARRAY_SIZE];
+
+TEST_UNARY_CALL_CVT (float, int64_t, __builtin_llrintf)
+TEST_ASSERT (int64_t)
+
+
+TEST_INIT_CVT (float, __builtin_inf (), int64_t, __builtin_llrintf (__builtin_inff ()), 19)
+TEST_INIT_CVT (float, -__builtin_inf (), int64_t, __builtin_llrintf (-__builtin_inff ()), 20)
+TEST_INIT_CVT (float, __builtin_nanf (""), int64_t, 0x7fffffffffffffff, 21)
+
+int
+main ()
+{
+ RUN_TEST_CVT (float, int64_t, 19, __builtin_llrintf, in, out, ref, ARRAY_SIZE);
+ RUN_TEST_CVT (float, int64_t, 20, __builtin_llrintf, in, out, ref, ARRAY_SIZE);
+ RUN_TEST_CVT (float, int64_t, 21, __builtin_llrintf, in, out, ref, ARRAY_SIZE);
+
+ return 0;
+}