aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorXi Ruoyao <xry111@xry111.site>2025-03-02 19:02:50 +0800
committerXi Ruoyao <xry111@xry111.site>2025-03-05 10:53:58 +0800
commit4856292f7a680ec478e7607f1b71781996d7d542 (patch)
tree69d33aca8e6231ce4ecc899ff49f42e530546428
parentc49ef76dc78e4fd185f046d038c9692781d3b3e4 (diff)
downloadgcc-4856292f7a680ec478e7607f1b71781996d7d542.zip
gcc-4856292f7a680ec478e7607f1b71781996d7d542.tar.gz
gcc-4856292f7a680ec478e7607f1b71781996d7d542.tar.bz2
LoongArch: Fix incorrect reorder of __lsx_vldx and __lasx_xvldx [PR119084]
They could be incorrectly reordered with store instructions like st.b because the RTL expression does not have a memory_operand or a (mem) expression. The incorrect reorder has been observed in openh264 LTO build. Expand them to a (mem) expression instead of unspec to fix the issue. Then we need to make loongarch_address_insns return 1 for ADDRESS_REG_REG because the constraint "R" expects this behavior, or the vldx instruction will be considered invalid by the register allocate pass and turned to add.d + vld. Apply the ADDRESS_REG_REG penalty in loongarch_address_cost instead, loongarch_rtx_costs should also call loongarch_address_cost instead of loongarch_address_insns then. Closes: https://github.com/cisco/openh264/issues/3857 gcc/ChangeLog: PR target/119084 * config/loongarch/lasx.md (UNSPEC_LASX_XVLDX): Remove. (lasx_xvldx): Remove. * config/loongarch/lsx.md (UNSPEC_LSX_VLDX): Remove. (lsx_vldx): Remove. * config/loongarch/simd.md (QIVEC): New define_mode_iterator. (<simd_isa>_<x>vldx): New define_expand. * config/loongarch/loongarch.cc (loongarch_address_insns_1): New static function with most logic factored out from ... (loongarch_address_insns): ... here. Call loongarch_address_insns_1 with reg_reg_cost = 1. (loongarch_address_cost): Call loongarch_address_insns_1 with reg_reg_cost = la_addr_reg_reg_cost. gcc/testsuite/ChangeLog: PR target/119084 * gcc.target/loongarch/pr119084.c: New test.
-rw-r--r--gcc/config/loongarch/lasx.md13
-rw-r--r--gcc/config/loongarch/loongarch.cc48
-rw-r--r--gcc/config/loongarch/lsx.md13
-rw-r--r--gcc/config/loongarch/simd.md9
-rw-r--r--gcc/testsuite/gcc.target/loongarch/pr119084.c24
5 files changed, 61 insertions, 46 deletions
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index e4505c1..43e3ab0 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -119,7 +119,6 @@
UNSPEC_LASX_XVSSRLRN
UNSPEC_LASX_XVEXTL_QU_DU
UNSPEC_LASX_XVLDI
- UNSPEC_LASX_XVLDX
UNSPEC_LASX_XVSTX
UNSPEC_LASX_VECINIT_MERGE
UNSPEC_LASX_VEC_SET_INTERNAL
@@ -3579,18 +3578,6 @@
[(set_attr "type" "simd_load")
(set_attr "mode" "V4DI")])
-(define_insn "lasx_xvldx"
- [(set (match_operand:V32QI 0 "register_operand" "=f")
- (unspec:V32QI [(match_operand:DI 1 "register_operand" "r")
- (match_operand:DI 2 "reg_or_0_operand" "rJ")]
- UNSPEC_LASX_XVLDX))]
- "ISA_HAS_LASX"
-{
- return "xvldx\t%u0,%1,%z2";
-}
- [(set_attr "type" "simd_load")
- (set_attr "mode" "V32QI")])
-
(define_insn "lasx_xvstx"
[(set (mem:V32QI (plus:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "reg_or_0_operand" "rJ")))
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index f2177f8..68f5d85 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -2363,14 +2363,9 @@ loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED)
return true;
}
-/* Return the number of instructions needed to load or store a value
- of mode MODE at address X. Return 0 if X isn't valid for MODE.
- Assume that multiword moves may need to be split into word moves
- if MIGHT_SPLIT_P, otherwise assume that a single load or store is
- enough. */
-
-int
-loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+static int
+loongarch_address_insns_1 (rtx x, machine_mode mode, bool might_split_p,
+ int reg_reg_cost)
{
struct loongarch_address_info addr;
int factor;
@@ -2405,7 +2400,7 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
return factor;
case ADDRESS_REG_REG:
- return factor * la_addr_reg_reg_cost;
+ return factor * reg_reg_cost;
case ADDRESS_CONST_INT:
return lsx_p ? 0 : factor;
@@ -2420,6 +2415,18 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
return 0;
}
+/* Return the number of instructions needed to load or store a value
+ of mode MODE at address X. Return 0 if X isn't valid for MODE.
+ Assume that multiword moves may need to be split into word moves
+ if MIGHT_SPLIT_P, otherwise assume that a single load or store is
+ enough. */
+
+int
+loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+{
+ return loongarch_address_insns_1 (x, mode, might_split_p, 1);
+}
+
/* Return true if X fits within an unsigned field of BITS bits that is
shifted left SHIFT bits before being used. */
@@ -3746,6 +3753,17 @@ loongarch_set_reg_reg_cost (machine_mode mode)
}
}
+/* Implement TARGET_ADDRESS_COST. */
+
+static int
+loongarch_address_cost (rtx addr, machine_mode mode,
+ addr_space_t as ATTRIBUTE_UNUSED,
+ bool speed ATTRIBUTE_UNUSED)
+{
+ return loongarch_address_insns_1 (addr, mode, false,
+ la_addr_reg_reg_cost);
+}
+
/* Implement TARGET_RTX_COSTS. */
static bool
@@ -3814,7 +3832,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
*total = COSTS_N_INSNS (2);
return true;
}
- cost = loongarch_address_insns (addr, mode, true);
+ cost = loongarch_address_cost (addr, mode, true, speed);
if (cost > 0)
{
*total = COSTS_N_INSNS (cost + 1);
@@ -4401,16 +4419,6 @@ loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
vector_costs::finish_cost (scalar_costs);
}
-/* Implement TARGET_ADDRESS_COST. */
-
-static int
-loongarch_address_cost (rtx addr, machine_mode mode,
- addr_space_t as ATTRIBUTE_UNUSED,
- bool speed ATTRIBUTE_UNUSED)
-{
- return loongarch_address_insns (addr, mode, false);
-}
-
/* Implement TARGET_INSN_COST. */
static int
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index c35826f..407c868 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -87,7 +87,6 @@
UNSPEC_LSX_VSSRLRN
UNSPEC_LSX_VLDI
UNSPEC_LSX_VSHUF_B
- UNSPEC_LSX_VLDX
UNSPEC_LSX_VSTX
UNSPEC_LSX_VEXTL_QU_DU
UNSPEC_LSX_VSETEQZ_V
@@ -2746,18 +2745,6 @@
[(set_attr "type" "simd_shf")
(set_attr "mode" "V16QI")])
-(define_insn "lsx_vldx"
- [(set (match_operand:V16QI 0 "register_operand" "=f")
- (unspec:V16QI [(match_operand:DI 1 "register_operand" "r")
- (match_operand:DI 2 "reg_or_0_operand" "rJ")]
- UNSPEC_LSX_VLDX))]
- "ISA_HAS_LSX"
-{
- return "vldx\t%w0,%1,%z2";
-}
- [(set_attr "type" "simd_load")
- (set_attr "mode" "V16QI")])
-
(define_insn "lsx_vstx"
[(set (mem:V16QI (plus:DI (match_operand:DI 1 "register_operand" "r")
(match_operand:DI 2 "reg_or_0_operand" "rJ")))
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 5e7bd49..8f7e912 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -217,6 +217,15 @@
(set_attr "mode" "<MODE>")])
+;; REG + REG load
+
+(define_mode_iterator QIVEC [(V16QI "ISA_HAS_LSX") (V32QI "ISA_HAS_LASX")])
+(define_expand "<simd_isa>_<x>vldx"
+ [(set (match_operand:QIVEC 0 "register_operand" "=f")
+ (mem:QIVEC (plus:DI (match_operand:DI 1 "register_operand")
+ (match_operand:DI 2 "register_operand"))))]
+ "TARGET_64BIT")
+
;;
;; FP vector rounding instructions
;;
diff --git a/gcc/testsuite/gcc.target/loongarch/pr119084.c b/gcc/testsuite/gcc.target/loongarch/pr119084.c
new file mode 100644
index 0000000..b594330
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/pr119084.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mlsx" } */
+/* { dg-require-effective-target loongarch_sx_hw } */
+
+typedef signed char V16QI __attribute__ ((vector_size (16)));
+static char x[128];
+
+__attribute__ ((noipa)) int
+noopt (int x)
+{
+ return x;
+}
+
+int
+main (void)
+{
+ int t = noopt (32);
+
+ x[32] = 1;
+
+ V16QI y = __builtin_lsx_vldx (x, t);
+ if (y[0] != 1)
+ __builtin_trap ();
+}