From 831017d5e72173f2c58e5475b7fcd35ee07a601f Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Fri, 4 Aug 2023 15:35:54 +0800
Subject: i386: Clear upper bits of XMM register for V4HFmode/V2HFmode
 operations [PR110762]

Similar like r14-2786-gade30fad6669e5, the patch is for V4HF/V2HFmode.

gcc/ChangeLog:

	PR target/110762
	* config/i386/mmx.md (<insn><mode>3): Changed from define_insn
	to define_expand and break into ..
	(<insn>v4hf3): .. this.
	(divv4hf3): .. this.
	(<insn>v2hf3): .. this.
	(divv2hf3): .. this.
	(movd_v2hf_to_sse): New define_expand.
	(movq_<mode>_to_sse): Extend to V4HFmode.
	(mmxdoublevecmode): Ditto.
	(V2FI_V4HF): New mode iterator.
	* config/i386/sse.md (*vec_concatv4sf): Extend to hanlde V8HF
	by using mode iterator V4SF_V8HF, renamed to ..
	(*vec_concat<mode>): .. this.
	(*vec_concatv4sf_0): Extend to handle V8HF by using mode
	iterator V4SF_V8HF, renamed to ..
	(*vec_concat<mode>_0): .. this.
	(*vec_concatv8hf_movss): New define_insn.
	(V4SF_V8HF): New mode iterator.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr110762-v4hf.c: New test.
---
 gcc/config/i386/mmx.md                        | 109 +++++++++++++++++++++-----
 gcc/config/i386/sse.md                        |  40 +++++++---
 gcc/testsuite/gcc.target/i386/pr110762-v4hf.c |  57 ++++++++++++++
 3 files changed, 177 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr110762-v4hf.c
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f80dd6f..b49554e 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -79,9 +79,7 @@
 ;; V2S* modes
 (define_mode_iterator V2FI [V2SF V2SI])
 
-;; 4-byte and 8-byte float16 vector modes
-(define_mode_iterator VHF_32_64 [V4HF V2HF])
-
+(define_mode_iterator V2FI_V4HF [V2SF V2SI V4HF])
 ;; Mapping from integer vector mode to mnemonic suffix
 (define_mode_attr mmxvecsize
   [(V8QI "b") (V4QI "b") (V2QI "b")
@@ -108,7 +106,7 @@
 
 ;; Mapping of vector modes to a vector mode of double size
 (define_mode_attr mmxdoublevecmode
-  [(V2SF "V4SF") (V2SI "V4SI")])
+  [(V2SF "V4SF") (V2SI "V4SI") (V4HF "V8HF")])
 
 ;; Mapping of vector modes back to the scalar modes
 (define_mode_attr mmxscalarmode
@@ -594,7 +592,7 @@
 (define_expand "movq_<mode>_to_sse"
   [(set (match_operand:<mmxdoublevecmode> 0 "register_operand")
 	(vec_concat:<mmxdoublevecmode>
-	  (match_operand:V2FI 1 "nonimmediate_operand")
+	  (match_operand:V2FI_V4HF 1 "nonimmediate_operand")
 	  (match_dup 2)))]
   "TARGET_SSE2"
   "operands[2] = CONST0_RTX (<MODE>mode);")
@@ -1927,21 +1925,94 @@
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(define_insn "<insn><mode>3"
-  [(set (match_operand:VHF_32_64 0 "register_operand" "=v")
-	(plusminusmultdiv:VHF_32_64
-	  (match_operand:VHF_32_64 1 "register_operand" "<comm>v")
-	  (match_operand:VHF_32_64 2 "register_operand" "v")))]
+(define_expand "<insn>v4hf3"
+  [(set (match_operand:V4HF 0 "register_operand")
+	(plusminusmult:V4HF
+	  (match_operand:V4HF 1 "nonimmediate_operand")
+	  (match_operand:V4HF 2 "nonimmediate_operand")))]
   "TARGET_AVX512FP16 && TARGET_AVX512VL"
-  "v<insn>ph\t{%2, %1, %0|%0, %1, %2}"
-  [(set (attr "type")
-      (cond [(match_test "<CODE> == MULT")
-		(const_string "ssemul")
-	     (match_test "<CODE> == DIV")
-		(const_string "ssediv")]
-	     (const_string "sseadd")))
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "V8HF")])
+{
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_<insn>v8hf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
+  DONE;
+})
+
+(define_expand "divv4hf3"
+  [(set (match_operand:V4HF 0 "register_operand")
+	(div:V4HF
+	  (match_operand:V4HF 1 "nonimmediate_operand")
+	  (match_operand:V4HF 2 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movq_v4hf_to_sse (op1, operands[1]));
+  rtx tmp = gen_rtx_VEC_CONCAT (V8HFmode, operands[2],
+				force_reg (V4HFmode, CONST1_RTX (V4HFmode)));
+  emit_insn (gen_rtx_SET (op2, tmp));
+  emit_insn (gen_divv8hf3 (op0, op1, op2));
+  emit_move_insn (operands[0], lowpart_subreg (V4HFmode, op0, V8HFmode));
+  DONE;
+})
+
+(define_expand "movd_v2hf_to_sse"
+  [(set (match_operand:V8HF 0 "register_operand")
+	(vec_merge:V8HF
+	  (vec_duplicate:V8HF
+	    (match_operand:V2HF 1 "nonimmediate_operand"))
+	  (match_operand:V8HF 2 "reg_or_0_operand")
+	  (const_int 3)))]
+  "TARGET_SSE")
+
+(define_expand "<insn>v2hf3"
+  [(set (match_operand:V2HF 0 "register_operand")
+	(plusminusmult:V2HF
+	  (match_operand:V2HF 1 "nonimmediate_operand")
+	  (match_operand:V2HF 2 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movd_v2hf_to_sse (op2, operands[2], CONST0_RTX (V8HFmode)));
+  emit_insn (gen_movd_v2hf_to_sse (op1, operands[1], CONST0_RTX (V8HFmode)));
+  emit_insn (gen_<insn>v8hf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
+  DONE;
+})
+
+(define_expand "divv2hf3"
+  [(set (match_operand:V2HF 0 "register_operand")
+	(div:V2HF
+	  (match_operand:V2HF 1 "nonimmediate_operand")
+	  (match_operand:V2HF 2 "nonimmediate_operand")))]
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+  rtx op2 = gen_reg_rtx (V8HFmode);
+  rtx op1 = gen_reg_rtx (V8HFmode);
+  rtx op0 = gen_reg_rtx (V8HFmode);
+
+  emit_insn (gen_movd_v2hf_to_sse (op2, operands[2],
+				  force_reg (V8HFmode, CONST1_RTX (V8HFmode))));
+  emit_insn (gen_movd_v2hf_to_sse (op1, operands[1], CONST0_RTX (V8HFmode)));
+  emit_insn (gen_divv8hf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2HFmode, op0, V8HFmode));
+  DONE;
+})
+
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index b076140..48378be 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -430,6 +430,9 @@
 (define_mode_iterator VFB_512
   [V32HF V16SF V8DF])
 
+(define_mode_iterator V4SF_V8HF
+  [V4SF V8HF])
+
 (define_mode_iterator VI48_AVX512VL
   [V16SI (V8SI  "TARGET_AVX512VL") (V4SI  "TARGET_AVX512VL")
    V8DI  (V4DI  "TARGET_AVX512VL") (V2DI  "TARGET_AVX512VL")])
@@ -10917,11 +10920,11 @@
    (set_attr "type" "sselog,ssemov,mmxcvt,mmxmov")
    (set_attr "mode" "V4SF,SF,DI,DI")])
 
-(define_insn "*vec_concatv4sf"
-  [(set (match_operand:V4SF 0 "register_operand"       "=x,v,x,v")
-	(vec_concat:V4SF
-	  (match_operand:V2SF 1 "register_operand"     " 0,v,0,v")
-	  (match_operand:V2SF 2 "nonimmediate_operand" " x,v,m,m")))]
+(define_insn "*vec_concat<mode>"
+  [(set (match_operand:V4SF_V8HF 0 "register_operand"       "=x,v,x,v")
+	(vec_concat:V4SF_V8HF
+	  (match_operand:<ssehalfvecmode> 1 "register_operand"     " 0,v,0,v")
+	  (match_operand:<ssehalfvecmode> 2 "nonimmediate_operand" " x,v,m,m")))]
   "TARGET_SSE"
   "@
    movlhps\t{%2, %0|%0, %2}
@@ -10933,17 +10936,34 @@
    (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
 
-(define_insn "*vec_concatv4sf_0"
-  [(set (match_operand:V4SF 0 "register_operand"       "=v")
-	(vec_concat:V4SF
-	  (match_operand:V2SF 1 "nonimmediate_operand" "vm")
-	  (match_operand:V2SF 2 "const0_operand")))]
+(define_insn "*vec_concat<mode>_0"
+  [(set (match_operand:V4SF_V8HF 0 "register_operand"       "=v")
+	(vec_concat:V4SF_V8HF
+	  (match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "vm")
+	  (match_operand:<ssehalfvecmode> 2 "const0_operand")))]
   "TARGET_SSE2"
   "%vmovq\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "DF")])
 
+(define_insn "*vec_concatv8hf_movss"
+  [(set (match_operand:V8HF 0 "register_operand"       "=x,v,v")
+	(vec_merge:V8HF
+	  (vec_duplicate:V8HF
+	    (match_operand:V2HF 2 "nonimmediate_operand" "x,m,v"))
+	  (match_operand:V8HF 1 "reg_or_0_operand"	 "0,C,v" )
+	  (const_int 3)))]
+  "TARGET_SSE"
+  "@
+   movss\t{%2, %0|%0, %2}
+   %vmovss\t{%2, %0|%0, %2}
+   vmovss\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "noavx,*,avx")
+   (set_attr "type" "ssemov")
+   (set_attr "prefix" "orig,maybe_vex,maybe_vex")
+   (set_attr "mode" "SF")])
+
 ;; Avoid combining registers from different units in a single alternative,
 ;; see comment above inline_secondary_memory_needed function in i386.cc
 (define_insn "vec_set<mode>_0"
diff --git a/gcc/testsuite/gcc.target/i386/pr110762-v4hf.c b/gcc/testsuite/gcc.target/i386/pr110762-v4hf.c
new file mode 100644
index 0000000..332784a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110762-v4hf.c
@@ -0,0 +1,57 @@
+/* PR target/110762 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -dp" } */
+
+typedef _Float16 v4hf __attribute__((vector_size(8)));
+typedef _Float16 v2hf __attribute__((vector_size(4)));
+
+v4hf
+foo (v4hf a, v4hf b)
+{
+  return a + b;
+}
+
+v4hf
+foo2 (v4hf a, v4hf b)
+{
+  return a - b;
+}
+
+v4hf
+foo3 (v4hf a, v4hf b)
+{
+  return a * b;
+}
+
+v4hf
+foo1 (v4hf a, v4hf b)
+{
+  return a / b;
+}
+
+v2hf
+foo4 (v2hf a, v2hf b)
+{
+  return a + b;
+}
+
+v2hf
+foo5 (v2hf a, v2hf b)
+{
+  return a - b;
+}
+
+v2hf
+foo6 (v2hf a, v2hf b)
+{
+  return a * b;
+}
+
+v2hf
+foo7 (v2hf a, v2hf b)
+{
+  return a / b;
+}
+
+/* { dg-final { scan-assembler-times "\\*vec_concatv8hf_0" 7 } } */
+/* { dg-final { scan-assembler-times "\\*vec_concatv8hf_movss" 8 } } */
-- 
cgit v1.1