diff options
author | Xi Ruoyao <xry111@xry111.site> | 2025-03-02 19:02:50 +0800 |
---|---|---|
committer | Xi Ruoyao <xry111@xry111.site> | 2025-03-05 10:53:58 +0800 |
commit | 4856292f7a680ec478e7607f1b71781996d7d542 (patch) | |
tree | 69d33aca8e6231ce4ecc899ff49f42e530546428 | |
parent | c49ef76dc78e4fd185f046d038c9692781d3b3e4 (diff) | |
download | gcc-4856292f7a680ec478e7607f1b71781996d7d542.zip gcc-4856292f7a680ec478e7607f1b71781996d7d542.tar.gz gcc-4856292f7a680ec478e7607f1b71781996d7d542.tar.bz2 |
LoongArch: Fix incorrect reorder of __lsx_vldx and __lasx_xvldx [PR119084]
They could be incorrectly reordered with store instructions like st.b
because the RTL expression does not have a memory_operand or a (mem)
expression. The incorrect reorder has been observed in openh264 LTO
build.
Expand them to a (mem) expression instead of unspec to fix the issue.
Then we need to make loongarch_address_insns return 1 for
ADDRESS_REG_REG because the constraint "R" expects this behavior, or
the vldx instruction will be considered invalid by the register
allocate pass and turned to add.d + vld. Apply the ADDRESS_REG_REG
penalty in loongarch_address_cost instead, loongarch_rtx_costs should
also call loongarch_address_cost instead of loongarch_address_insns
then.
Closes: https://github.com/cisco/openh264/issues/3857
gcc/ChangeLog:
PR target/119084
* config/loongarch/lasx.md (UNSPEC_LASX_XVLDX): Remove.
(lasx_xvldx): Remove.
* config/loongarch/lsx.md (UNSPEC_LSX_VLDX): Remove.
(lsx_vldx): Remove.
* config/loongarch/simd.md (QIVEC): New define_mode_iterator.
(<simd_isa>_<x>vldx): New define_expand.
* config/loongarch/loongarch.cc (loongarch_address_insns_1): New
static function with most logic factored out from ...
(loongarch_address_insns): ... here. Call
loongarch_address_insns_1 with reg_reg_cost = 1.
(loongarch_address_cost): Call loongarch_address_insns_1 with
reg_reg_cost = la_addr_reg_reg_cost.
gcc/testsuite/ChangeLog:
PR target/119084
* gcc.target/loongarch/pr119084.c: New test.
-rw-r--r-- | gcc/config/loongarch/lasx.md | 13 | ||||
-rw-r--r-- | gcc/config/loongarch/loongarch.cc | 48 | ||||
-rw-r--r-- | gcc/config/loongarch/lsx.md | 13 | ||||
-rw-r--r-- | gcc/config/loongarch/simd.md | 9 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/loongarch/pr119084.c | 24 |
5 files changed, 61 insertions, 46 deletions
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index e4505c1..43e3ab0 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -119,7 +119,6 @@ UNSPEC_LASX_XVSSRLRN UNSPEC_LASX_XVEXTL_QU_DU UNSPEC_LASX_XVLDI - UNSPEC_LASX_XVLDX UNSPEC_LASX_XVSTX UNSPEC_LASX_VECINIT_MERGE UNSPEC_LASX_VEC_SET_INTERNAL @@ -3579,18 +3578,6 @@ [(set_attr "type" "simd_load") (set_attr "mode" "V4DI")]) -(define_insn "lasx_xvldx" - [(set (match_operand:V32QI 0 "register_operand" "=f") - (unspec:V32QI [(match_operand:DI 1 "register_operand" "r") - (match_operand:DI 2 "reg_or_0_operand" "rJ")] - UNSPEC_LASX_XVLDX))] - "ISA_HAS_LASX" -{ - return "xvldx\t%u0,%1,%z2"; -} - [(set_attr "type" "simd_load") - (set_attr "mode" "V32QI")]) - (define_insn "lasx_xvstx" [(set (mem:V32QI (plus:DI (match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "reg_or_0_operand" "rJ"))) diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index f2177f8..68f5d85 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -2363,14 +2363,9 @@ loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED) return true; } -/* Return the number of instructions needed to load or store a value - of mode MODE at address X. Return 0 if X isn't valid for MODE. - Assume that multiword moves may need to be split into word moves - if MIGHT_SPLIT_P, otherwise assume that a single load or store is - enough. */ - -int -loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) +static int +loongarch_address_insns_1 (rtx x, machine_mode mode, bool might_split_p, + int reg_reg_cost) { struct loongarch_address_info addr; int factor; @@ -2405,7 +2400,7 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) return factor; case ADDRESS_REG_REG: - return factor * la_addr_reg_reg_cost; + return factor * reg_reg_cost; case ADDRESS_CONST_INT: return lsx_p ? 0 : factor; @@ -2420,6 +2415,18 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) return 0; } +/* Return the number of instructions needed to load or store a value + of mode MODE at address X. Return 0 if X isn't valid for MODE. + Assume that multiword moves may need to be split into word moves + if MIGHT_SPLIT_P, otherwise assume that a single load or store is + enough. */ + +int +loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) +{ + return loongarch_address_insns_1 (x, mode, might_split_p, 1); +} + /* Return true if X fits within an unsigned field of BITS bits that is shifted left SHIFT bits before being used. */ @@ -3746,6 +3753,17 @@ loongarch_set_reg_reg_cost (machine_mode mode) } } +/* Implement TARGET_ADDRESS_COST. */ + +static int +loongarch_address_cost (rtx addr, machine_mode mode, + addr_space_t as ATTRIBUTE_UNUSED, + bool speed ATTRIBUTE_UNUSED) +{ + return loongarch_address_insns_1 (addr, mode, false, + la_addr_reg_reg_cost); +} + /* Implement TARGET_RTX_COSTS. */ static bool @@ -3814,7 +3832,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, *total = COSTS_N_INSNS (2); return true; } - cost = loongarch_address_insns (addr, mode, true); + cost = loongarch_address_cost (addr, mode, true, speed); if (cost > 0) { *total = COSTS_N_INSNS (cost + 1); @@ -4401,16 +4419,6 @@ loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs) vector_costs::finish_cost (scalar_costs); } -/* Implement TARGET_ADDRESS_COST. */ - -static int -loongarch_address_cost (rtx addr, machine_mode mode, - addr_space_t as ATTRIBUTE_UNUSED, - bool speed ATTRIBUTE_UNUSED) -{ - return loongarch_address_insns (addr, mode, false); -} - /* Implement TARGET_INSN_COST. */ static int diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md index c35826f..407c868 100644 --- a/gcc/config/loongarch/lsx.md +++ b/gcc/config/loongarch/lsx.md @@ -87,7 +87,6 @@ UNSPEC_LSX_VSSRLRN UNSPEC_LSX_VLDI UNSPEC_LSX_VSHUF_B - UNSPEC_LSX_VLDX UNSPEC_LSX_VSTX UNSPEC_LSX_VEXTL_QU_DU UNSPEC_LSX_VSETEQZ_V @@ -2746,18 +2745,6 @@ [(set_attr "type" "simd_shf") (set_attr "mode" "V16QI")]) -(define_insn "lsx_vldx" - [(set (match_operand:V16QI 0 "register_operand" "=f") - (unspec:V16QI [(match_operand:DI 1 "register_operand" "r") - (match_operand:DI 2 "reg_or_0_operand" "rJ")] - UNSPEC_LSX_VLDX))] - "ISA_HAS_LSX" -{ - return "vldx\t%w0,%1,%z2"; -} - [(set_attr "type" "simd_load") - (set_attr "mode" "V16QI")]) - (define_insn "lsx_vstx" [(set (mem:V16QI (plus:DI (match_operand:DI 1 "register_operand" "r") (match_operand:DI 2 "reg_or_0_operand" "rJ"))) diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md index 5e7bd49..8f7e912 100644 --- a/gcc/config/loongarch/simd.md +++ b/gcc/config/loongarch/simd.md @@ -217,6 +217,15 @@ (set_attr "mode" "<MODE>")]) +;; REG + REG load + +(define_mode_iterator QIVEC [(V16QI "ISA_HAS_LSX") (V32QI "ISA_HAS_LASX")]) +(define_expand "<simd_isa>_<x>vldx" + [(set (match_operand:QIVEC 0 "register_operand" "=f") + (mem:QIVEC (plus:DI (match_operand:DI 1 "register_operand") + (match_operand:DI 2 "register_operand"))))] + "TARGET_64BIT") + ;; ;; FP vector rounding instructions ;; diff --git a/gcc/testsuite/gcc.target/loongarch/pr119084.c b/gcc/testsuite/gcc.target/loongarch/pr119084.c new file mode 100644 index 0000000..b594330 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/pr119084.c @@ -0,0 +1,24 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mlsx" } */ +/* { dg-require-effective-target loongarch_sx_hw } */ + +typedef signed char V16QI __attribute__ ((vector_size (16))); +static char x[128]; + +__attribute__ ((noipa)) int +noopt (int x) +{ + return x; +} + +int +main (void) +{ + int t = noopt (32); + + x[32] = 1; + + V16QI y = __builtin_lsx_vldx (x, t); + if (y[0] != 1) + __builtin_trap (); +} |