diff options
author | liuhongt <hongtao.liu@intel.com> | 2023-07-18 16:32:54 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2023-08-02 14:47:17 +0800 |
commit | 1b446a9760942bfcfbde042290452f0c8f298276 (patch) | |
tree | 91e494592fa43a182f868b3bcc85c631a3fe7cb4 /gcc | |
parent | 5b501863ac7da57858fdd464dfb7a776143f22a2 (diff) | |
download | gcc-1b446a9760942bfcfbde042290452f0c8f298276.zip gcc-1b446a9760942bfcfbde042290452f0c8f298276.tar.gz gcc-1b446a9760942bfcfbde042290452f0c8f298276.tar.bz2 |
Optimize vlddqu + inserti128 to vbroadcasti128
vlddqu + vinserti128 will use shuffle port in addition to load port
comparing to vbroadcasti128, For latency perspective,vbroadcasti is no
worse than vlddqu + vinserti128.
gcc/ChangeLog:
* config/i386/sse.md (*avx2_lddqu_inserti_to_bcasti): New
pre_reload define_insn_and_split.
gcc/testsuite/ChangeLog:
* gcc.target/i386/vlddqu_vinserti128.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/i386/sse.md | 18 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c | 11 |
2 files changed, 29 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 51961bb..8dea057 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -26609,6 +26609,24 @@ (set_attr "prefix" "vex,evex,evex") (set_attr "mode" "OI")]) +;; optimize vlddqu + vinserti128 to vbroadcasti128, the former will use +;; extra shuffle port in addition to load port than the latter. +;; For latency perspective,vbroadcasti is no worse. +(define_insn_and_split "avx2_lddqu_inserti_to_bcasti" + [(set (match_operand:V4DI 0 "register_operand" "=x,v,v") + (vec_concat:V4DI + (subreg:V2DI + (unspec:V16QI [(match_operand:V16QI 1 "memory_operand")] + UNSPEC_LDDQU) 0) + (subreg:V2DI (unspec:V16QI [(match_dup 1)] + UNSPEC_LDDQU) 0)))] + "TARGET_AVX2 && ix86_pre_reload_split ()" + "#" + "&& 1" + [(set (match_dup 0) + (vec_concat:V4DI (match_dup 1) (match_dup 1)))] + "operands[1] = adjust_address_nv (operands[1], V2DImode, 0);") + ;; Modes handled by AVX vec_dup patterns. (define_mode_iterator AVX_VEC_DUP_MODE [V8SI V8SF V4DI V4DF]) diff --git a/gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c b/gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c new file mode 100644 index 0000000..29699a5 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -O2" } */ +/* { dg-final { scan-assembler-times "vbroadcasti128" 1 } } */ +/* { dg-final { scan-assembler-not {(?n)vlddqu.*xmm} } } */ + +#include <immintrin.h> +__m256i foo(void *data) { + __m128i X1 = _mm_lddqu_si128((__m128i*)data); + __m256i V1 = _mm256_broadcastsi128_si256 (X1); + return V1; +} |