diff options
| author | H.J. Lu <hjl.tools@gmail.com> | 2026-04-29 19:50:38 +0800 |
|---|---|---|
| committer | H.J. Lu <hjl.tools@gmail.com> | 2026-04-30 16:21:45 +0800 |
| commit | b81218009e091e92145a3fc54971fbbdb82d7298 (patch) | |
| tree | 88844d435130dc0f59576de64b6734acd08c7ae7 | |
| parent | 86a3af821a82fdfca8755495021685f005fa5565 (diff) | |
| download | gcc-master.tar.gz gcc-master.tar.bz2 gcc-master.zip | |
Convert CONST_VECTOR load no larger than integer register:
(set (reg:V2SI 106)
(const_vector:V2SI [(const_int 1 [1]) repeated x2]))
to constant integer load:
(set (subreg:DI (reg:V2SI 106 [ _20 ]) 0)
(const_int 4294967297 [0x100000001]))
and keep redundant constant integer load. Generate zero CONST_VECTOR
load which works for both MMX and XMM registers.
Tested on Linux/x86-64 and Linux/i686.
gcc/
PR target/125026
PR target/125032
* config/i386/i386-features.cc (ix86_place_single_vector_set):
Don't check CONST_VECTOR load size.
(replace_vector_const): Handle constant integer load.
(x86_cse::x86_cse): Convert CONST_VECTOR load no larger than
integer to constant integer load and keep redundant constant
integer load. Generate zero CONST_VECTOR load.
gcc/testsuite/
PR target/125026
PR target/125032
* gcc.target/i386/pr125026.c: New test.
* gcc.target/i386/pr125032-1.c: Likewise.
* gcc.target/i386/pr125032-2.c: Likewise.
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
| -rw-r--r-- | gcc/config/i386/i386-features.cc | 97 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/i386/pr125026.c | 18 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/i386/pr125032-1.c | 30 | ||||
| -rw-r--r-- | gcc/testsuite/gcc.target/i386/pr125032-2.c | 15 |
4 files changed, 135 insertions, 25 deletions
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 63f9dcc9f93f..ce5f0e9c1789 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3321,16 +3321,10 @@ ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs, } } - /* CONST_VECTOR load no larger than integer register - - (set (reg:V2QI 294) - (const_vector:V2QI [(const_int 0 [0]) repeated x2])) - - can use integer load. */ + /* NB: CONST_VECTOR load is generated and handled in x86_cse. */ if (load - && load->kind == X86_CSE_VEC_DUP - && (!CONST_VECTOR_P (src) - || GET_MODE_SIZE (GET_MODE (dest)) > UNITS_PER_WORD)) + && !CONST_VECTOR_P (src) + && load->kind == X86_CSE_VEC_DUP) { /* Get the source from LOAD as (reg:SI 99) in @@ -3644,7 +3638,9 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const, rtx replace; /* Replace the source operand with VECTOR_CONST. */ - if (SUBREG_P (src) || mode == vector_mode) + if (SUBREG_P (src) + || mode == vector_mode + || CONST_INT_P (vector_const)) replace = vector_const; else { @@ -3686,6 +3682,11 @@ replace_vector_const (machine_mode vector_mode, rtx vector_const, print_rtl_single (dump_file, insn); } SET_SRC (set) = replace; + if (CONST_INT_P (replace)) + { + dest = gen_rtx_SUBREG (scalar_mode, dest, 0); + SET_DEST (set) = dest; + } /* Drop possible dead definitions. */ PATTERN (insn) = set; INSN_CODE (insn) = -1; @@ -4701,7 +4702,8 @@ pass_x86_cse::x86_cse (void) if (load->count >= load->threshold) { machine_mode mode; - rtx reg, broadcast_source, broadcast_reg; + rtx reg, broadcast_reg; + rtx broadcast_source = nullptr; replaced = true; switch (load->kind) { @@ -4716,9 +4718,61 @@ pass_x86_cse::x86_cse (void) load->broadcast_reg = broadcast_reg; break; + case X86_CSE_VEC_DUP: + if (CONST_INT_P (load->val) + && (load->val == CONST0_RTX (load->mode) + || load->size <= UNITS_PER_WORD)) + { + /* Generate CONST_VECTOR load. */ + mode = ix86_get_vector_cse_mode (load->size, + load->mode); + + if (load->val == CONST0_RTX (load->mode)) + broadcast_source = CONST0_RTX (mode); + else if (load->val == CONSTM1_RTX (load->mode)) + broadcast_source = CONSTM1_RTX (mode); + else + { + int nunits = GET_MODE_NUNITS (mode); + rtvec v = rtvec_alloc (nunits); + for (int j = 0; j < nunits ; j++) + RTVEC_ELT (v, j) = load->val; + broadcast_source = gen_rtx_CONST_VECTOR (mode, v); + } + + /* NB: Zero CONST_VECTOR load works for MMX and XMM + registers. */ + if (load->size <= UNITS_PER_WORD) + { + /* Convert CONST_VECTOR load no larger than integer + register: + + (set (reg:V2SI 106) + (const_vector:V2SI [(const_int 1 [1]) repeated x2])) + + to constant integer load: + + (set (subreg:DI (reg:V2SI 106 [ _20 ]) 0) + (const_int 4294967297 [0x100000001])) + */ + machine_mode int_mode + = int_mode_for_mode (mode).require (); + broadcast_source = simplify_subreg (int_mode, + broadcast_source, + mode, 0); + gcc_assert (broadcast_source != nullptr); + replace_vector_const (mode, broadcast_source, + load->insns, int_mode); + /* Keep redundant constant integer load. */ + load->broadcast_source = nullptr; + load->broadcast_reg = nullptr; + break; + } + } + /* FALLTHRU */ + case X86_CSE_CONST0_VECTOR: case X86_CSE_CONSTM1_VECTOR: - case X86_CSE_VEC_DUP: mode = ix86_get_vector_cse_mode (load->size, load->mode); broadcast_reg = gen_reg_rtx (mode); if (load->def_insn) @@ -4743,18 +4797,7 @@ pass_x86_cse::x86_cse (void) broadcast_source = CONSTM1_RTX (mode); break; case X86_CSE_VEC_DUP: - if (CONST_INT_P (load->val) - && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) - { - /* CONST_VECTOR load no larger than integer - register size can use integer load. */ - int nunits = GET_MODE_NUNITS (mode); - rtvec v = rtvec_alloc (nunits); - for (int j = 0; j < nunits ; j++) - RTVEC_ELT (v, j) = load->val; - broadcast_source = gen_rtx_CONST_VECTOR (mode, v); - } - else + if (!broadcast_source) { reg = gen_reg_rtx (load->mode); broadcast_source = gen_rtx_VEC_DUPLICATE (mode, @@ -4844,9 +4887,13 @@ pass_x86_cse::x86_cse (void) updated_gnu_tls_insns, updated_gnu2_tls_insns); break; + case X86_CSE_VEC_DUP: + /* Keep redundant constant integer load. */ + if (!load->broadcast_reg) + break; + /* FALLTHRU */ case X86_CSE_CONST0_VECTOR: case X86_CSE_CONSTM1_VECTOR: - case X86_CSE_VEC_DUP: ix86_place_single_vector_set (load->broadcast_reg, load->broadcast_source, load->bbs, diff --git a/gcc/testsuite/gcc.target/i386/pr125026.c b/gcc/testsuite/gcc.target/i386/pr125026.c new file mode 100644 index 000000000000..96ac6a9ef207 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr125026.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 -march=x86-64" } */ + +extern void a(int[]); +int b; +int d(int e, volatile int f) { + b = f - e; + int t[2] = {b, b}; + a(t); +} +void g(int h[1]) { + if (d(0, 1)) + h[0] = 0; + d(0, 1); +} + +/* { dg-final { scan-assembler-times "movabsq\[ \\t\]+\\\$4294967297, %r\[a-z0-9\]+" 2 { target { ! ia32 } } } } */ +/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */ diff --git a/gcc/testsuite/gcc.target/i386/pr125032-1.c b/gcc/testsuite/gcc.target/i386/pr125032-1.c new file mode 100644 index 000000000000..7c54bab332aa --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr125032-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target { ia32 && pie } } } */ +/* { dg-options "-O2 -march=i686 -mmmx -fPIE" } */ + +typedef int __m64 __attribute__((__vector_size__(8))); +typedef short __v4hi __attribute__((__vector_size__(8))); +typedef char __v8qi __attribute__((__vector_size__(8))); +int mmx_composite_over_n_8_0565_info_0, mmx_composite_over_n_8_0565_w; +long long mmx_composite_over_n_8_0565_m3; +__m64 mmx_composite_over_n_8_0565_v2, mmx_composite_over_n_8_0565_v3; +__m64 in_over(__m64 src) +{ + __m64 __m1 = src, __m2; + return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); +} +__m64 load8888() +{ + __m64 __m2, __m1; + return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); +} +void mmx_composite_over_n_8_0565() +{ + __m64 vsrc = load8888(); + mmx_composite_over_n_8_0565_w = mmx_composite_over_n_8_0565_info_0; + while (mmx_composite_over_n_8_0565_info_0) + if (mmx_composite_over_n_8_0565_m3) + { + mmx_composite_over_n_8_0565_v2 = in_over(vsrc); + mmx_composite_over_n_8_0565_v3 = in_over(vsrc); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr125032-2.c b/gcc/testsuite/gcc.target/i386/pr125032-2.c new file mode 100644 index 000000000000..71b072a7bfe2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr125032-2.c @@ -0,0 +1,15 @@ +/* { dg-do compile { target fpic } } */ +/* { dg-options "-O2 -fPIC" } */ + +long _HMAC_SHA256_Init_Klen; +char _crypt_HMAC_SHA256_Init_pad[64]; +char _crypt_HMAC_SHA256_Init_pad_0, _crypt_HMAC_SHA256_Init_K_0; +void _crypt_HMAC_SHA256_Init_i() { + if (_HMAC_SHA256_Init_Klen) + _HMAC_SHA256_Init_Klen = 2; + long __trans_tmp_1 = + __builtin_dynamic_object_size(_crypt_HMAC_SHA256_Init_pad, 0); + __builtin___memset_chk(_crypt_HMAC_SHA256_Init_pad, 2, 64, __trans_tmp_1); + for (; _HMAC_SHA256_Init_Klen;) + _crypt_HMAC_SHA256_Init_pad_0 ^= _crypt_HMAC_SHA256_Init_K_0; +} |
