aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2023-07-18 16:32:54 +0800
committerliuhongt <hongtao.liu@intel.com>2023-08-02 14:47:17 +0800
commit1b446a9760942bfcfbde042290452f0c8f298276 (patch)
tree91e494592fa43a182f868b3bcc85c631a3fe7cb4 /gcc
parent5b501863ac7da57858fdd464dfb7a776143f22a2 (diff)
downloadgcc-1b446a9760942bfcfbde042290452f0c8f298276.zip
gcc-1b446a9760942bfcfbde042290452f0c8f298276.tar.gz
gcc-1b446a9760942bfcfbde042290452f0c8f298276.tar.bz2
Optimize vlddqu + inserti128 to vbroadcasti128
vlddqu + vinserti128 will use shuffle port in addition to load port comparing to vbroadcasti128, For latency perspective,vbroadcasti is no worse than vlddqu + vinserti128. gcc/ChangeLog: * config/i386/sse.md (*avx2_lddqu_inserti_to_bcasti): New pre_reload define_insn_and_split. gcc/testsuite/ChangeLog: * gcc.target/i386/vlddqu_vinserti128.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/sse.md18
-rw-r--r--gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c11
2 files changed, 29 insertions, 0 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 51961bb..8dea057 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -26609,6 +26609,24 @@
(set_attr "prefix" "vex,evex,evex")
(set_attr "mode" "OI")])
+;; optimize vlddqu + vinserti128 to vbroadcasti128, the former will use
+;; extra shuffle port in addition to load port than the latter.
+;; For latency perspective,vbroadcasti is no worse.
+(define_insn_and_split "avx2_lddqu_inserti_to_bcasti"
+ [(set (match_operand:V4DI 0 "register_operand" "=x,v,v")
+ (vec_concat:V4DI
+ (subreg:V2DI
+ (unspec:V16QI [(match_operand:V16QI 1 "memory_operand")]
+ UNSPEC_LDDQU) 0)
+ (subreg:V2DI (unspec:V16QI [(match_dup 1)]
+ UNSPEC_LDDQU) 0)))]
+ "TARGET_AVX2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (vec_concat:V4DI (match_dup 1) (match_dup 1)))]
+ "operands[1] = adjust_address_nv (operands[1], V2DImode, 0);")
+
;; Modes handled by AVX vec_dup patterns.
(define_mode_iterator AVX_VEC_DUP_MODE
[V8SI V8SF V4DI V4DF])
diff --git a/gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c b/gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c
new file mode 100644
index 0000000..29699a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vlddqu_vinserti128.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx2 -O2" } */
+/* { dg-final { scan-assembler-times "vbroadcasti128" 1 } } */
+/* { dg-final { scan-assembler-not {(?n)vlddqu.*xmm} } } */
+
+#include <immintrin.h>
+__m256i foo(void *data) {
+ __m128i X1 = _mm_lddqu_si128((__m128i*)data);
+ __m256i V1 = _mm256_broadcastsi128_si256 (X1);
+ return V1;
+}