diff options
author | Hu, Lin1 <lin1.hu@intel.com> | 2022-09-13 16:28:54 +0800 |
---|---|---|
committer | liuhongt <hongtao.liu@intel.com> | 2022-09-23 13:28:12 +0800 |
commit | a282f086ef26d90e9785e992cd09a0d118b24695 (patch) | |
tree | e43e7408f14a7fb49518ab2031ef81edbbca40ad /gcc/config | |
parent | 8b449dcd84334068c769a2f427812dadb95e61de (diff) | |
download | gcc-a282f086ef26d90e9785e992cd09a0d118b24695.zip gcc-a282f086ef26d90e9785e992cd09a0d118b24695.tar.gz gcc-a282f086ef26d90e9785e992cd09a0d118b24695.tar.bz2 |
i386: Optimize code generation of __mm256_zextsi128_si256(__mm_set1_epi8(-1))
gcc/ChangeLog:
PR target/94962
* config/i386/constraints.md (BH): New define_constraint.
* config/i386/i386.cc (standard_sse_constant_p): Add return
3/4 when operand matches new predicate.
(standard_sse_constant_opcode): Add new alternative branch to
return "vpcmpeqd".
* config/i386/predicates.md
(vector_all_ones_zero_extend_half_operand): New define_predicate.
(vector_all_ones_zero_extend_quarter_operand): Ditto.
* config/i386/sse.md: Add constraint to insn "mov<mode>_internal".
gcc/testsuite/ChangeLog:
PR target/94962
* gcc.target/i386/avx256-unaligned-load-1.c: Modify test.
* gcc.target/i386/avx256-unaligned-store-1.c: Ditto.
* gcc.target/i386/avx256-unaligned-store-2.c: Ditto.
* gcc.target/i386/avx256-unaligned-store-3.c: Ditto.
* gcc.target/i386/pr94962-1.c: New test.
* gcc.target/i386/pr94962-2.c: Ditto.
* gcc.target/i386/pr94962-3.c: Ditto.
* gcc.target/i386/pr94962-4.c: Ditto.
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/i386/constraints.md | 8 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 26 | ||||
-rw-r--r-- | gcc/config/i386/predicates.md | 49 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 8 |
4 files changed, 86 insertions, 5 deletions
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md index 7361687..95b2b14 100644 --- a/gcc/config/i386/constraints.md +++ b/gcc/config/i386/constraints.md @@ -168,6 +168,9 @@ ;; z Constant call address operand. ;; C Integer SSE constant with all bits set operand. ;; F Floating-point SSE constant with all bits set operand. +;; H Integer SSE constant that is 128/256bit all ones +;; and zero-extand to 256/512bit, or 128bit all ones +;; and zero-extend to 512bit. ;; M x86-64 memory operand. (define_constraint "Bf" @@ -233,6 +236,11 @@ (and (match_test "TARGET_SSE") (match_operand 0 "float_vector_all_ones_operand"))) +(define_constraint "BH" + "@internal integer constant with last half/quarter bits set operand." + (ior (match_operand 0 "vector_all_ones_zero_extend_half_operand") + (match_operand 0 "vector_all_ones_zero_extend_quarter_operand"))) + ;; NB: Similar to 'm', but don't use define_memory_constraint on x86-64 ;; to prevent LRA from converting the operand to the form '(mem (reg X))' ;; where X is a base register. diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index dadf453..ca799da 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -5186,7 +5186,8 @@ standard_80387_constant_rtx (int idx) XFmode); } -/* Return 1 if X is all bits 0 and 2 if X is all bits 1 +/* Return 1 if X is all bits 0, 2 if X is all bits 1 + and 3 if X is all bits 1 with zero extend in supported SSE/AVX vector mode. */ int @@ -5234,6 +5235,10 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode) } } + if (vector_all_ones_zero_extend_half_operand (x, mode) + || vector_all_ones_zero_extend_quarter_operand (x, mode)) + return 3; + return 0; } @@ -5341,6 +5346,25 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands) gcc_unreachable (); } } + else if (vector_all_ones_zero_extend_half_operand (x, mode)) + { + if (GET_MODE_SIZE (mode) == 64) + { + gcc_assert (TARGET_AVX512F); + return "vpcmpeqd \t %t0, %t0, %t0"; + } + else if (GET_MODE_SIZE (mode) == 32) + { + gcc_assert (TARGET_AVX); + return "vpcmpeqd \t %x0, %x0, %x0"; + } + gcc_unreachable (); + } + else if (vector_all_ones_zero_extend_quarter_operand (x, mode)) + { + gcc_assert (TARGET_AVX512F); + return "vpcmpeqd \t %x0, %x0, %x0"; + } gcc_unreachable (); } diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 4f16bb7..655eabf 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1159,6 +1159,55 @@ (match_test "INTEGRAL_MODE_P (GET_MODE (op))") (match_test "op == CONSTM1_RTX (GET_MODE (op))"))) +/* Return true if operand is an 128/256bit all ones vector + that zero-extends to 256/512bit. */ +(define_predicate "vector_all_ones_zero_extend_half_operand" + (match_code "const_vector") +{ + mode = GET_MODE (op); + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT + || (GET_MODE_SIZE (mode) != 32 + && GET_MODE_SIZE (mode) != 64)) + return false; + + int nelts = CONST_VECTOR_NUNITS (op); + for (int i = 0; i != nelts; i++) + { + rtx elt = CONST_VECTOR_ELT (op, i); + if (i < nelts / 2 + && elt != CONSTM1_RTX (GET_MODE_INNER (mode))) + return false; + if (i >= nelts / 2 + && elt != CONST0_RTX (GET_MODE_INNER (mode))) + return false; + } + return true; +}) + +/* Return true if operand is an 128bit all ones vector + that zero extends to 512bit. */ +(define_predicate "vector_all_ones_zero_extend_quarter_operand" + (match_code "const_vector") +{ + mode = GET_MODE (op); + if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT + || GET_MODE_SIZE (mode) != 64) + return false; + + int nelts = CONST_VECTOR_NUNITS (op); + for (int i = 0; i != nelts; i++) + { + rtx elt = CONST_VECTOR_ELT (op, i); + if (i < nelts / 4 + && elt != CONSTM1_RTX (GET_MODE_INNER (mode))) + return false; + if (i >= nelts / 4 + && elt != CONST0_RTX (GET_MODE_INNER (mode))) + return false; + } + return true; +}) + ; Return true when OP is operand acceptable for vector memory operand. ; Only AVX can have misaligned memory operand. (define_predicate "vector_memory_operand" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b60c0d3..5c18963 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1323,9 +1323,9 @@ (define_insn "mov<mode>_internal" [(set (match_operand:VMOVE 0 "nonimmediate_operand" - "=v,v ,v ,m") + "=v,v ,v,v ,m") (match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand" - " C,<sseconstm1>,vm,v"))] + " C,<sseconstm1>,BH,vm,v"))] "TARGET_SSE && (register_operand (operands[0], <MODE>mode) || register_operand (operands[1], <MODE>mode)) @@ -1343,7 +1343,7 @@ gcc_unreachable (); } } - [(set_attr "type" "sselog1,sselog1,ssemov,ssemov") + [(set_attr "type" "sselog1,sselog1,sselog1,ssemov,ssemov") (set_attr "prefix" "maybe_vex") (set (attr "mode") (cond [(match_test "TARGET_AVX") @@ -1354,7 +1354,7 @@ (and (match_test "<MODE>mode == V2DFmode") (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")) (const_string "V4SF") - (and (eq_attr "alternative" "3") + (and (eq_attr "alternative" "4") (match_test "TARGET_SSE_TYPELESS_STORES")) (const_string "V4SF") (and (eq_attr "alternative" "0") |