aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2026-04-29 19:50:38 +0800
committerH.J. Lu <hjl.tools@gmail.com>2026-04-30 16:21:45 +0800
commitb81218009e091e92145a3fc54971fbbdb82d7298 (patch)
tree88844d435130dc0f59576de64b6734acd08c7ae7
parent86a3af821a82fdfca8755495021685f005fa5565 (diff)
downloadgcc-master.tar.gz
gcc-master.tar.bz2
gcc-master.zip
x86_cse: Convert CONST_VECTOR load to constant integer loadHEADtrunkmaster
Convert CONST_VECTOR load no larger than integer register: (set (reg:V2SI 106) (const_vector:V2SI [(const_int 1 [1]) repeated x2])) to constant integer load: (set (subreg:DI (reg:V2SI 106 [ _20 ]) 0) (const_int 4294967297 [0x100000001])) and keep redundant constant integer load. Generate zero CONST_VECTOR load which works for both MMX and XMM registers. Tested on Linux/x86-64 and Linux/i686. gcc/ PR target/125026 PR target/125032 * config/i386/i386-features.cc (ix86_place_single_vector_set): Don't check CONST_VECTOR load size. (replace_vector_const): Handle constant integer load. (x86_cse::x86_cse): Convert CONST_VECTOR load no larger than integer to constant integer load and keep redundant constant integer load. Generate zero CONST_VECTOR load. gcc/testsuite/ PR target/125026 PR target/125032 * gcc.target/i386/pr125026.c: New test. * gcc.target/i386/pr125032-1.c: Likewise. * gcc.target/i386/pr125032-2.c: Likewise. Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
-rw-r--r--gcc/config/i386/i386-features.cc97
-rw-r--r--gcc/testsuite/gcc.target/i386/pr125026.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/pr125032-1.c30
-rw-r--r--gcc/testsuite/gcc.target/i386/pr125032-2.c15
4 files changed, 135 insertions, 25 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 63f9dcc9f93f..ce5f0e9c1789 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -3321,16 +3321,10 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
}
}
- /* CONST_VECTOR load no larger than integer register
-
- (set (reg:V2QI 294)
- (const_vector:V2QI [(const_int 0 [0]) repeated x2]))
-
- can use integer load. */
+ /* NB: CONST_VECTOR load is generated and handled in x86_cse. */
if (load
- && load->kind == X86_CSE_VEC_DUP
- && (!CONST_VECTOR_P (src)
- || GET_MODE_SIZE (GET_MODE (dest)) > UNITS_PER_WORD))
+ && !CONST_VECTOR_P (src)
+ && load->kind == X86_CSE_VEC_DUP)
{
/* Get the source from LOAD as (reg:SI 99) in
@@ -3644,7 +3638,9 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const,
rtx replace;
/* Replace the source operand with VECTOR_CONST. */
- if (SUBREG_P (src) || mode == vector_mode)
+ if (SUBREG_P (src)
+ || mode == vector_mode
+ || CONST_INT_P (vector_const))
replace = vector_const;
else
{
@@ -3686,6 +3682,11 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const,
print_rtl_single (dump_file, insn);
}
SET_SRC (set) = replace;
+ if (CONST_INT_P (replace))
+ {
+ dest = gen_rtx_SUBREG (scalar_mode, dest, 0);
+ SET_DEST (set) = dest;
+ }
/* Drop possible dead definitions. */
PATTERN (insn) = set;
INSN_CODE (insn) = -1;
@@ -4701,7 +4702,8 @@ pass_x86_cse::x86_cse (void)
if (load->count >= load->threshold)
{
machine_mode mode;
- rtx reg, broadcast_source, broadcast_reg;
+ rtx reg, broadcast_reg;
+ rtx broadcast_source = nullptr;
replaced = true;
switch (load->kind)
{
@@ -4716,9 +4718,61 @@ pass_x86_cse::x86_cse (void)
load->broadcast_reg = broadcast_reg;
break;
+ case X86_CSE_VEC_DUP:
+ if (CONST_INT_P (load->val)
+ && (load->val == CONST0_RTX (load->mode)
+ || load->size <= UNITS_PER_WORD))
+ {
+ /* Generate CONST_VECTOR load. */
+ mode = ix86_get_vector_cse_mode (load->size,
+ load->mode);
+
+ if (load->val == CONST0_RTX (load->mode))
+ broadcast_source = CONST0_RTX (mode);
+ else if (load->val == CONSTM1_RTX (load->mode))
+ broadcast_source = CONSTM1_RTX (mode);
+ else
+ {
+ int nunits = GET_MODE_NUNITS (mode);
+ rtvec v = rtvec_alloc (nunits);
+ for (int j = 0; j < nunits ; j++)
+ RTVEC_ELT (v, j) = load->val;
+ broadcast_source = gen_rtx_CONST_VECTOR (mode, v);
+ }
+
+ /* NB: Zero CONST_VECTOR load works for MMX and XMM
+ registers. */
+ if (load->size <= UNITS_PER_WORD)
+ {
+ /* Convert CONST_VECTOR load no larger than integer
+ register:
+
+ (set (reg:V2SI 106)
+ (const_vector:V2SI [(const_int 1 [1]) repeated x2]))
+
+ to constant integer load:
+
+ (set (subreg:DI (reg:V2SI 106 [ _20 ]) 0)
+ (const_int 4294967297 [0x100000001]))
+ */
+ machine_mode int_mode
+ = int_mode_for_mode (mode).require ();
+ broadcast_source = simplify_subreg (int_mode,
+ broadcast_source,
+ mode, 0);
+ gcc_assert (broadcast_source != nullptr);
+ replace_vector_const (mode, broadcast_source,
+ load->insns, int_mode);
+ /* Keep redundant constant integer load. */
+ load->broadcast_source = nullptr;
+ load->broadcast_reg = nullptr;
+ break;
+ }
+ }
+ /* FALLTHRU */
+
case X86_CSE_CONST0_VECTOR:
case X86_CSE_CONSTM1_VECTOR:
- case X86_CSE_VEC_DUP:
mode = ix86_get_vector_cse_mode (load->size, load->mode);
broadcast_reg = gen_reg_rtx (mode);
if (load->def_insn)
@@ -4743,18 +4797,7 @@ pass_x86_cse::x86_cse (void)
broadcast_source = CONSTM1_RTX (mode);
break;
case X86_CSE_VEC_DUP:
- if (CONST_INT_P (load->val)
- && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
- {
- /* CONST_VECTOR load no larger than integer
- register size can use integer load. */
- int nunits = GET_MODE_NUNITS (mode);
- rtvec v = rtvec_alloc (nunits);
- for (int j = 0; j < nunits ; j++)
- RTVEC_ELT (v, j) = load->val;
- broadcast_source = gen_rtx_CONST_VECTOR (mode, v);
- }
- else
+ if (!broadcast_source)
{
reg = gen_reg_rtx (load->mode);
broadcast_source = gen_rtx_VEC_DUPLICATE (mode,
@@ -4844,9 +4887,13 @@ pass_x86_cse::x86_cse (void)
updated_gnu_tls_insns,
updated_gnu2_tls_insns);
break;
+ case X86_CSE_VEC_DUP:
+ /* Keep redundant constant integer load. */
+ if (!load->broadcast_reg)
+ break;
+ /* FALLTHRU */
case X86_CSE_CONST0_VECTOR:
case X86_CSE_CONSTM1_VECTOR:
- case X86_CSE_VEC_DUP:
ix86_place_single_vector_set (load->broadcast_reg,
load->broadcast_source,
load->bbs,
diff --git a/gcc/testsuite/gcc.target/i386/pr125026.c b/gcc/testsuite/gcc.target/i386/pr125026.c
new file mode 100644
index 000000000000..96ac6a9ef207
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125026.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+extern void a(int[]);
+int b;
+int d(int e, volatile int f) {
+ b = f - e;
+ int t[2] = {b, b};
+ a(t);
+}
+void g(int h[1]) {
+ if (d(0, 1))
+ h[0] = 0;
+ d(0, 1);
+}
+
+/* { dg-final { scan-assembler-times "movabsq\[ \\t\]+\\\$4294967297, %r\[a-z0-9\]+" 2 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr125032-1.c b/gcc/testsuite/gcc.target/i386/pr125032-1.c
new file mode 100644
index 000000000000..7c54bab332aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125032-1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { ia32 && pie } } } */
+/* { dg-options "-O2 -march=i686 -mmmx -fPIE" } */
+
+typedef int __m64 __attribute__((__vector_size__(8)));
+typedef short __v4hi __attribute__((__vector_size__(8)));
+typedef char __v8qi __attribute__((__vector_size__(8)));
+int mmx_composite_over_n_8_0565_info_0, mmx_composite_over_n_8_0565_w;
+long long mmx_composite_over_n_8_0565_m3;
+__m64 mmx_composite_over_n_8_0565_v2, mmx_composite_over_n_8_0565_v3;
+__m64 in_over(__m64 src)
+{
+ __m64 __m1 = src, __m2;
+ return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
+}
+__m64 load8888()
+{
+ __m64 __m2, __m1;
+ return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
+}
+void mmx_composite_over_n_8_0565()
+{
+ __m64 vsrc = load8888();
+ mmx_composite_over_n_8_0565_w = mmx_composite_over_n_8_0565_info_0;
+ while (mmx_composite_over_n_8_0565_info_0)
+ if (mmx_composite_over_n_8_0565_m3)
+ {
+ mmx_composite_over_n_8_0565_v2 = in_over(vsrc);
+ mmx_composite_over_n_8_0565_v3 = in_over(vsrc);
+ }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr125032-2.c b/gcc/testsuite/gcc.target/i386/pr125032-2.c
new file mode 100644
index 000000000000..71b072a7bfe2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr125032-2.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target fpic } } */
+/* { dg-options "-O2 -fPIC" } */
+
+long _HMAC_SHA256_Init_Klen;
+char _crypt_HMAC_SHA256_Init_pad[64];
+char _crypt_HMAC_SHA256_Init_pad_0, _crypt_HMAC_SHA256_Init_K_0;
+void _crypt_HMAC_SHA256_Init_i() {
+ if (_HMAC_SHA256_Init_Klen)
+ _HMAC_SHA256_Init_Klen = 2;
+ long __trans_tmp_1 =
+ __builtin_dynamic_object_size(_crypt_HMAC_SHA256_Init_pad, 0);
+ __builtin___memset_chk(_crypt_HMAC_SHA256_Init_pad, 2, 64, __trans_tmp_1);
+ for (; _HMAC_SHA256_Init_Klen;)
+ _crypt_HMAC_SHA256_Init_pad_0 ^= _crypt_HMAC_SHA256_Init_K_0;
+}