diff options
author | Xi Ruoyao <xry111@xry111.site> | 2025-01-24 08:32:18 +0800 |
---|---|---|
committer | Xi Ruoyao <xry111@xry111.site> | 2025-02-19 14:34:44 +0800 |
commit | ed9794546db62279199a1cd84b8cdacd14ceab42 (patch) | |
tree | c66e80e6eaaa32b10de090fa181b7da19d2f203a /gcc | |
parent | ea3ebe48150d2109845f2c4622ebff182f618d97 (diff) | |
download | gcc-ed9794546db62279199a1cd84b8cdacd14ceab42.zip gcc-ed9794546db62279199a1cd84b8cdacd14ceab42.tar.gz gcc-ed9794546db62279199a1cd84b8cdacd14ceab42.tar.bz2 |
LoongArch: Try harder using vrepli instructions to materialize const vectors
For
a = (v4si){0xdddddddd, 0xdddddddd, 0xdddddddd, 0xdddddddd}
we just want
vrepli.b $vr0, 0xdd
but the compiler actually produces a load:
la.local $r14,.LC0
vld $vr0,$r14,0
It's because we only tried vrepli.d which wouldn't work. Try all vrepli
instructions for const int vector materializing to fix it.
gcc/ChangeLog:
* config/loongarch/loongarch-protos.h
(loongarch_const_vector_vrepli): New function prototype.
* config/loongarch/loongarch.cc (loongarch_const_vector_vrepli):
Implement.
(loongarch_const_insns): Call loongarch_const_vector_vrepli
instead of loongarch_const_vector_same_int_p.
(loongarch_split_vector_move_p): Likewise.
(loongarch_output_move): Use loongarch_const_vector_vrepli to
pun operend[1] into a better mode if it's a const int vector,
and decide the suffix of [x]vrepli with the new mode.
* config/loongarch/constraints.md (YI): Call
loongarch_const_vector_vrepli instead of
loongarch_const_vector_same_int_p.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/vrepli.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/loongarch/constraints.md | 2 | ||||
-rw-r--r-- | gcc/config/loongarch/loongarch-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/loongarch/loongarch.cc | 34 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/loongarch/vrepli.c | 15 |
4 files changed, 46 insertions, 6 deletions
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md index a7c31c2..97a4e4e 100644 --- a/gcc/config/loongarch/constraints.md +++ b/gcc/config/loongarch/constraints.md @@ -301,7 +301,7 @@ A replicated vector const in which the replicated value is in the range [-512,511]." (and (match_code "const_vector") - (match_test "loongarch_const_vector_same_int_p (op, mode, -512, 511)"))) + (match_test "loongarch_const_vector_vrepli (op, mode)"))) (define_constraint "YC" "@internal diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h index e7b3181..6d10759 100644 --- a/gcc/config/loongarch/loongarch-protos.h +++ b/gcc/config/loongarch/loongarch-protos.h @@ -121,6 +121,7 @@ extern bool loongarch_const_vector_same_int_p (rtx, machine_mode, extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode); extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode); extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode); +extern rtx loongarch_const_vector_vrepli (rtx, machine_mode); extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool); extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT); extern enum reg_class loongarch_secondary_reload_class (enum reg_class, diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 495b623..404deed 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -1846,6 +1846,28 @@ loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode) return true; } +rtx +loongarch_const_vector_vrepli (rtx x, machine_mode mode) +{ + int size = GET_MODE_SIZE (mode); + + if (GET_CODE (x) != CONST_VECTOR + || GET_MODE_CLASS (mode) != MODE_VECTOR_INT) + return NULL_RTX; + + for (scalar_int_mode elem_mode: {QImode, HImode, SImode, DImode}) + { + machine_mode new_mode = + mode_for_vector (elem_mode, size / GET_MODE_SIZE (elem_mode)) + .require (); + rtx op = lowpart_subreg (new_mode, x, mode); + if (loongarch_const_vector_same_int_p (op, new_mode, -512, 511)) + return op; + } + + return NULL_RTX; +} + /* Return true if rtx constants of mode MODE should be put into a small data section. */ @@ -2501,7 +2523,7 @@ loongarch_const_insns (rtx x) case CONST_VECTOR: if ((LSX_SUPPORTED_MODE_P (GET_MODE (x)) || LASX_SUPPORTED_MODE_P (GET_MODE (x))) - && loongarch_const_vector_same_int_p (x, GET_MODE (x), -512, 511)) + && loongarch_const_vector_vrepli (x, GET_MODE (x))) return 1; /* Fall through. */ case CONST_DOUBLE: @@ -4656,7 +4678,7 @@ loongarch_split_vector_move_p (rtx dest, rtx src) /* Check for vector set to an immediate const vector with valid replicated element. */ if (FP_REG_RTX_P (dest) - && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511)) + && loongarch_const_vector_vrepli (src, GET_MODE (src))) return false; /* Check for vector load zero immediate. */ @@ -4792,13 +4814,15 @@ loongarch_output_move (rtx *operands) && src_code == CONST_VECTOR && CONST_INT_P (CONST_VECTOR_ELT (src, 0))) { - gcc_assert (loongarch_const_vector_same_int_p (src, mode, -512, 511)); + operands[1] = loongarch_const_vector_vrepli (src, mode); + gcc_assert (operands[1]); + switch (GET_MODE_SIZE (mode)) { case 16: - return "vrepli.%v0\t%w0,%E1"; + return "vrepli.%v1\t%w0,%E1"; case 32: - return "xvrepli.%v0\t%u0,%E1"; + return "xvrepli.%v1\t%u0,%E1"; default: gcc_unreachable (); } } diff --git a/gcc/testsuite/gcc.target/loongarch/vrepli.c b/gcc/testsuite/gcc.target/loongarch/vrepli.c new file mode 100644 index 0000000..8deeb47 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vrepli.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -mlasx" } */ +/* { dg-final { scan-assembler "\tvrepli\\.b\t\\\$vr\[0-9\]+,-35" } } */ +/* { dg-final { scan-assembler "\txvrepli\\.b\t\\\$xr\[0-9\]+,-35" } } */ + +int f __attribute__((vector_size (16))); +int g __attribute__((vector_size (32))); + +void +test (void) +{ + constexpr int x = (int) 0xdddddddd; + f = (typeof(f)){x, x, x, x}; + g = (typeof(g)){x, x, x, x, x, x, x, x}; +} |