From 831017d5e72173f2c58e5475b7fcd35ee07a601f Mon Sep 17 00:00:00 2001 From: liuhongt Date: Fri, 4 Aug 2023 15:35:54 +0800 Subject: i386: Clear upper bits of XMM register for V4HFmode/V2HFmode operations [PR110762] Similar like r14-2786-gade30fad6669e5, the patch is for V4HF/V2HFmode. gcc/ChangeLog: PR target/110762 * config/i386/mmx.md (3): Changed from define_insn to define_expand and break into .. (v4hf3): .. this. (divv4hf3): .. this. (v2hf3): .. this. (divv2hf3): .. this. (movd_v2hf_to_sse): New define_expand. (movq__to_sse): Extend to V4HFmode. (mmxdoublevecmode): Ditto. (V2FI_V4HF): New mode iterator. * config/i386/sse.md (*vec_concatv4sf): Extend to hanlde V8HF by using mode iterator V4SF_V8HF, renamed to .. (*vec_concat): .. this. (*vec_concatv4sf_0): Extend to handle V8HF by using mode iterator V4SF_V8HF, renamed to .. (*vec_concat_0): .. this. (*vec_concatv8hf_movss): New define_insn. (V4SF_V8HF): New mode iterator. gcc/testsuite/ChangeLog: * gcc.target/i386/pr110762-v4hf.c: New test. --- gcc/config/i386/mmx.md | 109 +++++++++++++++++++++----- gcc/config/i386/sse.md | 40 +++++++--- gcc/testsuite/gcc.target/i386/pr110762-v4hf.c | 57 ++++++++++++++ 3 files changed, 177 insertions(+), 29 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr110762-v4hf.c diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md index f80dd6f..b49554e 100644 --- a/gcc/config/i386/mmx.md +++ b/gcc/config/i386/mmx.md @@ -79,9 +79,7 @@ ;; V2S* modes (define_mode_iterator V2FI [V2SF V2SI]) -;; 4-byte and 8-byte float16 vector modes -(define_mode_iterator VHF_32_64 [V4HF V2HF]) - +(define_mode_iterator V2FI_V4HF [V2SF V2SI V4HF]) ;; Mapping from integer vector mode to mnemonic suffix (define_mode_attr mmxvecsize [(V8QI "b") (V4QI "b") (V2QI "b") @@ -108,7 +106,7 @@ ;; Mapping of vector modes to a vector mode of double size (define_mode_attr mmxdoublevecmode - [(V2SF "V4SF") (V2SI "V4SI")]) + [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF")]) ;; Mapping of vector modes back to the scalar modes (define_mode_attr mmxscalarmode @@ -594,7 +592,7 @@ (define_expand "movq__to_sse" [(set (match_operand: 0 "register_operand") (vec_concat: - (match_operand:V2FI 1 "nonimmediate_operand") + (match_operand:V2FI_V4HF 1 "nonimmediate_operand") (match_dup 2)))] "TARGET_SSE2" "operands[2] = CONST0_RTX (mode);") @@ -1927,21 +1925,94 @@ ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(define_insn "3" - [(set (match_operand:VHF_32_64 0 "register_operand" "=v") - (plusminusmultdiv:VHF_32_64 - (match_operand:VHF_32_64 1 "register_operand" "v") - (match_operand:VHF_32_64 2 "register_operand" "v")))] +(define_expand "v4hf3" + [(set (match_operand:V4HF 0 "register_operand") + (plusminusmult:V4HF + (match_operand:V4HF 1 "nonimmediate_operand") + (match_operand:V4HF 2 "nonimmediate_operand")))] "TARGET_AVX512FP16 && TARGET_AVX512VL" - "vph\t{%2, %1, %0|%0, %1, %2}" - [(set (attr "type") - (cond [(match_test " == MULT") - (const_string "ssemul") - (match_test " == DIV") - (const_string "ssediv")] - (const_string "sseadd"))) - (set_attr "prefix" "evex") - (set_attr "mode" "V8HF")]) +{ + rtx op2 = gen_reg_rtx (V8HFmode); + rtx op1 = gen_reg_rtx (V8HFmode); + rtx op0 = gen_reg_rtx (V8HFmode); + + emit_insn (gen_movq_v4hf_to_sse (op2, operands[2])); + emit_insn (gen_movq_v4hf_to_sse (op1, operands[1])); + + emit_insn (gen_v8hf3 (op0, op1, op2)); + + emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode)); + DONE; +}) + +(define_expand "divv4hf3" + [(set (match_operand:V4HF 0 "register_operand") + (div:V4HF + (match_operand:V4HF 1 "nonimmediate_operand") + (match_operand:V4HF 2 "nonimmediate_operand")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" +{ + rtx op2 = gen_reg_rtx (V8HFmode); + rtx op1 = gen_reg_rtx (V8HFmode); + rtx op0 = gen_reg_rtx (V8HFmode); + + emit_insn (gen_movq_v4hf_to_sse (op1, operands[1])); + rtx tmp = gen_rtx_VEC_CONCAT (V8HFmode, operands[2], + force_reg (V4HFmode, CONST1_RTX (V4HFmode))); + emit_insn (gen_rtx_SET (op2, tmp)); + emit_insn (gen_divv8hf3 (op0, op1, op2)); + emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode)); + DONE; +}) + +(define_expand "movd_v2hf_to_sse" + [(set (match_operand:V8HF 0 "register_operand") + (vec_merge:V8HF + (vec_duplicate:V8HF + (match_operand:V2HF 1 "nonimmediate_operand")) + (match_operand:V8HF 2 "reg_or_0_operand") + (const_int 3)))] + "TARGET_SSE") + +(define_expand "v2hf3" + [(set (match_operand:V2HF 0 "register_operand") + (plusminusmult:V2HF + (match_operand:V2HF 1 "nonimmediate_operand") + (match_operand:V2HF 2 "nonimmediate_operand")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" +{ + rtx op2 = gen_reg_rtx (V8HFmode); + rtx op1 = gen_reg_rtx (V8HFmode); + rtx op0 = gen_reg_rtx (V8HFmode); + + emit_insn (gen_movd_v2hf_to_sse (op2, operands[2], CONST0_RTX (V8HFmode))); + emit_insn (gen_movd_v2hf_to_sse (op1, operands[1], CONST0_RTX (V8HFmode))); + emit_insn (gen_v8hf3 (op0, op1, op2)); + + emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode)); + DONE; +}) + +(define_expand "divv2hf3" + [(set (match_operand:V2HF 0 "register_operand") + (div:V2HF + (match_operand:V2HF 1 "nonimmediate_operand") + (match_operand:V2HF 2 "nonimmediate_operand")))] + "TARGET_AVX512FP16 && TARGET_AVX512VL" +{ + rtx op2 = gen_reg_rtx (V8HFmode); + rtx op1 = gen_reg_rtx (V8HFmode); + rtx op0 = gen_reg_rtx (V8HFmode); + + emit_insn (gen_movd_v2hf_to_sse (op2, operands[2], + force_reg (V8HFmode, CONST1_RTX (V8HFmode)))); + emit_insn (gen_movd_v2hf_to_sse (op1, operands[1], CONST0_RTX (V8HFmode))); + emit_insn (gen_divv8hf3 (op0, op1, op2)); + + emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode)); + DONE; +}) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b076140..48378be 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -430,6 +430,9 @@ (define_mode_iterator VFB_512 [V32HF V16SF V8DF]) +(define_mode_iterator V4SF_V8HF + [V4SF V8HF]) + (define_mode_iterator VI48_AVX512VL [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL") V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")]) @@ -10917,11 +10920,11 @@ (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov") (set_attr "mode" "V4SF,SF,DI,DI")]) -(define_insn "*vec_concatv4sf" - [(set (match_operand:V4SF 0 "register_operand" "=x,v,x,v") - (vec_concat:V4SF - (match_operand:V2SF 1 "register_operand" " 0,v,0,v") - (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))] +(define_insn "*vec_concat" + [(set (match_operand:V4SF_V8HF 0 "register_operand" "=x,v,x,v") + (vec_concat:V4SF_V8HF + (match_operand: 1 "register_operand" " 0,v,0,v") + (match_operand: 2 "nonimmediate_operand" " x,v,m,m")))] "TARGET_SSE" "@ movlhps\t{%2, %0|%0, %2} @@ -10933,17 +10936,34 @@ (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex") (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")]) -(define_insn "*vec_concatv4sf_0" - [(set (match_operand:V4SF 0 "register_operand" "=v") - (vec_concat:V4SF - (match_operand:V2SF 1 "nonimmediate_operand" "vm") - (match_operand:V2SF 2 "const0_operand")))] +(define_insn "*vec_concat_0" + [(set (match_operand:V4SF_V8HF 0 "register_operand" "=v") + (vec_concat:V4SF_V8HF + (match_operand: 1 "nonimmediate_operand" "vm") + (match_operand: 2 "const0_operand")))] "TARGET_SSE2" "%vmovq\t{%1, %0|%0, %1}" [(set_attr "type" "ssemov") (set_attr "prefix" "maybe_vex") (set_attr "mode" "DF")]) +(define_insn "*vec_concatv8hf_movss" + [(set (match_operand:V8HF 0 "register_operand" "=x,v,v") + (vec_merge:V8HF + (vec_duplicate:V8HF + (match_operand:V2HF 2 "nonimmediate_operand" "x,m,v")) + (match_operand:V8HF 1 "reg_or_0_operand" "0,C,v" ) + (const_int 3)))] + "TARGET_SSE" + "@ + movss\t{%2, %0|%0, %2} + %vmovss\t{%2, %0|%0, %2} + vmovss\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,*,avx") + (set_attr "type" "ssemov") + (set_attr "prefix" "orig,maybe_vex,maybe_vex") + (set_attr "mode" "SF")]) + ;; Avoid combining registers from different units in a single alternative, ;; see comment above inline_secondary_memory_needed function in i386.cc (define_insn "vec_set_0" diff --git a/gcc/testsuite/gcc.target/i386/pr110762-v4hf.c b/gcc/testsuite/gcc.target/i386/pr110762-v4hf.c new file mode 100644 index 0000000..332784a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr110762-v4hf.c @@ -0,0 +1,57 @@ +/* PR target/110762 */ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mavx512fp16 -mavx512vl -dp" } */ + +typedef _Float16 v4hf __attribute__((vector_size(8))); +typedef _Float16 v2hf __attribute__((vector_size(4))); + +v4hf +foo (v4hf a, v4hf b) +{ + return a + b; +} + +v4hf +foo2 (v4hf a, v4hf b) +{ + return a - b; +} + +v4hf +foo3 (v4hf a, v4hf b) +{ + return a * b; +} + +v4hf +foo1 (v4hf a, v4hf b) +{ + return a / b; +} + +v2hf +foo4 (v2hf a, v2hf b) +{ + return a + b; +} + +v2hf +foo5 (v2hf a, v2hf b) +{ + return a - b; +} + +v2hf +foo6 (v2hf a, v2hf b) +{ + return a * b; +} + +v2hf +foo7 (v2hf a, v2hf b) +{ + return a / b; +} + +/* { dg-final { scan-assembler-times "\\*vec_concatv8hf_0" 7 } } */ +/* { dg-final { scan-assembler-times "\\*vec_concatv8hf_movss" 8 } } */ -- cgit v1.1