diff options
author | liuhongt <hongtao.liu@intel.com> | 2023-06-20 15:41:00 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2023-06-25 12:06:13 +0800 |
commit | c79476da46728e2ab17e0e546262d2f6377081aa (patch) | |
tree | 8359ff82323c28eee0d8650571d696e1dedb4327 | |
parent | ec1f1d3f7abd0fe52e38cf8ad5c1e5074d67cdf5 (diff) | |
download | gcc-c79476da46728e2ab17e0e546262d2f6377081aa.zip gcc-c79476da46728e2ab17e0e546262d2f6377081aa.tar.gz gcc-c79476da46728e2ab17e0e546262d2f6377081aa.tar.bz2 |
Refine maskloadmn pattern with UNSPEC_MASKLOAD.
If mem_addr points to a memory region with less than whole vector size
bytes of accessible memory and k is a mask that would prevent reading
the inaccessible bytes from mem_addr, add UNSPEC_MASKLOAD to prevent
it to be transformed to vpblendd.
gcc/ChangeLog:
PR target/110309
* config/i386/sse.md (maskload<mode><avx512fmaskmodelower>):
Refine pattern with UNSPEC_MASKLOAD.
(maskload<mode><avx512fmaskmodelower>): Ditto.
(*<avx512>_load<mode>_mask): Extend mode iterator to
VI12HFBF_AVX512VL.
(*<avx512>_load<mode>): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr110309.c: New test.
-rw-r--r-- | gcc/config/i386/sse.md | 32 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/pr110309.c | 10 |
2 files changed, 28 insertions, 14 deletions
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a847a2b..3b50c71 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1465,12 +1465,12 @@ }) (define_insn "*<avx512>_load<mode>_mask" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") - (vec_merge:VI12_AVX512VL - (unspec:VI12_AVX512VL - [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")] + [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v") + (vec_merge:VI12HFBF_AVX512VL + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")] UNSPEC_MASKLOAD) - (match_operand:VI12_AVX512VL 2 "nonimm_or_0_operand" "0C") + (match_operand:VI12HFBF_AVX512VL 2 "nonimm_or_0_operand" "0C") (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk")))] "TARGET_AVX512BW" "vmovdqu<ssescalarsize>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}" @@ -1479,9 +1479,9 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn_and_split "*<avx512>_load<mode>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v") - (unspec:VI12_AVX512VL - [(match_operand:VI12_AVX512VL 1 "memory_operand" "m")] + [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand" "=v") + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand" "m")] UNSPEC_MASKLOAD))] "TARGET_AVX512BW" "#" @@ -26979,17 +26979,21 @@ "TARGET_AVX") (define_expand "maskload<mode><avx512fmaskmodelower>" - [(set (match_operand:V48H_AVX512VL 0 "register_operand") - (vec_merge:V48H_AVX512VL - (match_operand:V48H_AVX512VL 1 "memory_operand") + [(set (match_operand:V48_AVX512VL 0 "register_operand") + (vec_merge:V48_AVX512VL + (unspec:V48_AVX512VL + [(match_operand:V48_AVX512VL 1 "memory_operand")] + UNSPEC_MASKLOAD) (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand")))] "TARGET_AVX512F") (define_expand "maskload<mode><avx512fmaskmodelower>" - [(set (match_operand:VI12_AVX512VL 0 "register_operand") - (vec_merge:VI12_AVX512VL - (match_operand:VI12_AVX512VL 1 "memory_operand") + [(set (match_operand:VI12HFBF_AVX512VL 0 "register_operand") + (vec_merge:VI12HFBF_AVX512VL + (unspec:VI12HFBF_AVX512VL + [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")] + UNSPEC_MASKLOAD) (match_dup 0) (match_operand:<avx512fmaskmode> 2 "register_operand")))] "TARGET_AVX512BW") diff --git a/gcc/testsuite/gcc.target/i386/pr110309.c b/gcc/testsuite/gcc.target/i386/pr110309.c new file mode 100644 index 0000000..f6e9e9c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110309.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 --param vect-partial-vector-usage=1 -march=znver4 -mprefer-vector-width=256" } */ +/* { dg-final { scan-assembler-not {(?n)vpblendd.*ymm} } } */ + + +void foo (int * __restrict a, int *b) +{ + for (int i = 0; i < 6; ++i) + a[i] = b[i] + 42; +} |