aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorliuhongt <hongtao.liu@intel.com>2023-06-20 15:41:00 +0800
committerliuhongt <hongtao.liu@intel.com>2023-06-25 12:06:13 +0800
commitc79476da46728e2ab17e0e546262d2f6377081aa (patch)
tree8359ff82323c28eee0d8650571d696e1dedb4327 /gcc
parentec1f1d3f7abd0fe52e38cf8ad5c1e5074d67cdf5 (diff)
downloadgcc-c79476da46728e2ab17e0e546262d2f6377081aa.zip
gcc-c79476da46728e2ab17e0e546262d2f6377081aa.tar.gz
gcc-c79476da46728e2ab17e0e546262d2f6377081aa.tar.bz2
Refine maskloadmn pattern with UNSPEC_MASKLOAD.
If mem_addr points to a memory region with less than whole vector size bytes of accessible memory and k is a mask that would prevent reading the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent it to be transformed to vpblendd. gcc/ChangeLog: PR target/110309 * config/i386/sse.md (maskload<mode><avx512fmaskmodelower>): Refine pattern with UNSPEC_MASKLOAD. (maskload<mode><avx512fmaskmodelower>): Ditto. (*<avx512>_load<mode>_mask): Extend mode iterator to VI12HFBF_AVX512VL. (*<avx512>_load<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/pr110309.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/sse.md32
-rw-r--r--gcc/testsuite/gcc.target/i386/pr110309.c10
2 files changed, 28 insertions, 14 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index a847a2b..3b50c71 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1465,12 +1465,12 @@
})
(define_insn "*<avx512>_load<mode>_mask"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (vec_merge:VI12_AVX512VL
- (unspec:VI12_AVX512VL
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+ (vec_merge:VI12HFBF_AVX512VL
+ (unspec:VI12HFBF_AVX512VL
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
UNSPEC_MASKLOAD)
- (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C")
+ (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C")
(match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))]
"TARGET_AVX512BW"
"vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
@@ -1479,9 +1479,9 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "*<avx512>_load<mode>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
- (unspec:VI12_AVX512VL
- [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")]
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v")
+ (unspec:VI12HFBF_AVX512VL
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")]
UNSPEC_MASKLOAD))]
"TARGET_AVX512BW"
"#"
@@ -26979,17 +26979,21 @@
"TARGET_AVX")
(define_expand "maskload<mode><avx512fmaskmodelower>"
- [(set (match_operand:V48H_AVX512VL 0 "register_operand")
- (vec_merge:V48H_AVX512VL
- (match_operand:V48H_AVX512VL 1 "memory_operand")
+ [(set (match_operand:V48_AVX512VL 0 "register_operand")
+ (vec_merge:V48_AVX512VL
+ (unspec:V48_AVX512VL
+ [(match_operand:V48_AVX512VL 1 "memory_operand")]
+ UNSPEC_MASKLOAD)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512F")
(define_expand "maskload<mode><avx512fmaskmodelower>"
- [(set (match_operand:VI12_AVX512VL 0 "register_operand")
- (vec_merge:VI12_AVX512VL
- (match_operand:VI12_AVX512VL 1 "memory_operand")
+ [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand")
+ (vec_merge:VI12HFBF_AVX512VL
+ (unspec:VI12HFBF_AVX512VL
+ [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
+ UNSPEC_MASKLOAD)
(match_dup 0)
(match_operand:<avx512fmaskmode> 2 "register_operand")))]
"TARGET_AVX512BW")
diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c
new file mode 100644
index 0000000..f6e9e9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110309.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */
+/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */
+
+
+void foo (int * __restrict a, int *b)
+{
+ for (int i = 0; i < 6; ++i)
+ a[i] = b[i] + 42;
+}