aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorZhang, Jun <jun.zhang@intel.com>2024-08-26 10:53:52 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2024-08-26 13:21:52 +0800
commitf6fe2962daf7b8d8532c768c3b9eab00f99cce5b (patch)
tree86ec5df725f93fbcbaf002298816ec2248f33b6a /gcc
parent889f6dd0d8c7317f62578c900c0f662e919786a2 (diff)
downloadgcc-f6fe2962daf7b8d8532c768c3b9eab00f99cce5b.zip
gcc-f6fe2962daf7b8d8532c768c3b9eab00f99cce5b.tar.gz
gcc-f6fe2962daf7b8d8532c768c3b9eab00f99cce5b.tar.bz2
AVX10.2: Support vector copy instructions
gcc/ChangeLog: * config.gcc: Add avx10_2copyintrin.h. * config/i386/i386.md (avx10_2): New isa attribute. * config/i386/immintrin.h: Include avx10_2copyintrin.h. * config/i386/sse.md (sse_movss_<mode>): Add new constraints to handle AVX10.2. (vec_set<mode>_0): Ditto. (@vec_set<mode>_0): Ditto. (vec_set<mode>_0): Ditto. (avx512fp16_mov<mode>): Ditto. (*vec_set<mode>_0_1): New split. * config/i386/avx10_2copyintrin.h: New file. gcc/testsuite/ChangeLog: * gcc.target/i386/avx10_2-vmovd-1.c: New test. * gcc.target/i386/avx10_2-vmovd-2.c: Ditto. * gcc.target/i386/avx10_2-vmovw-1.c: Ditto. * gcc.target/i386/avx10_2-vmovw-2.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config.gcc3
-rw-r--r--gcc/config/i386/avx10_2copyintrin.h38
-rw-r--r--gcc/config/i386/i386.md3
-rw-r--r--gcc/config/i386/immintrin.h2
-rw-r--r--gcc/config/i386/sse.md138
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c48
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c44
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c69
-rw-r--r--gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c64
9 files changed, 356 insertions, 53 deletions
diff --git a/gcc/config.gcc b/gcc/config.gcc
index cd8a34b..e887c9c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -457,7 +457,8 @@ i[34567]86-*-* | x86_64-*-*)
avx10_2convertintrin.h avx10_2-512convertintrin.h
avx10_2bf16intrin.h avx10_2-512bf16intrin.h
avx10_2satcvtintrin.h avx10_2-512satcvtintrin.h
- avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h"
+ avx10_2minmaxintrin.h avx10_2-512minmaxintrin.h
+ avx10_2copyintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/avx10_2copyintrin.h b/gcc/config/i386/avx10_2copyintrin.h
new file mode 100644
index 0000000..f1150c7
--- /dev/null
+++ b/gcc/config/i386/avx10_2copyintrin.h
@@ -0,0 +1,38 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+ This file is part of GCC.
+ GCC is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+ GCC is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AVX10_2COPYINTRIN_H_INCLUDED
+#define _AVX10_2COPYINTRIN_H_INCLUDED
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi32 (__m128i __A)
+{
+ return _mm_set_epi32 (0, 0, 0, ((__v4si) __A)[0]);
+}
+
+extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_move_epi16 (__m128i __A)
+{
+ return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi) __A)[0]);
+}
+
+#endif /* _AVX10_2COPYINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 36108e5..34f9214 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -579,7 +579,7 @@
noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
- vaes_avx512vl,noapx_nf"
+ vaes_avx512vl,noapx_nf,avx10_2"
(const_string "base"))
;; The (bounding maximum) length of an instruction immediate.
@@ -976,6 +976,7 @@
(symbol_ref "TARGET_APX_NDD && Pmode == DImode")
(eq_attr "isa" "vaes_avx512vl")
(symbol_ref "TARGET_VAES && TARGET_AVX512VL")
+ (eq_attr "isa" "avx10_2") (symbol_ref "TARGET_AVX10_2_256")
(eq_attr "mmx_isa" "native")
(symbol_ref "!TARGET_MMX_WITH_SSE")
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 0d5af15..6b8035e 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -160,4 +160,6 @@
#include <avx10_2-512minmaxintrin.h>
+#include <avx10_2copyintrin.h>
+
#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index f5956f1..a6d844d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -11505,19 +11505,20 @@
(set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
(define_insn "sse_movss_<mode>"
- [(set (match_operand:VI4F_128 0 "register_operand" "=x,v")
+ [(set (match_operand:VI4F_128 0 "register_operand" "=x,v,v")
(vec_merge:VI4F_128
- (match_operand:VI4F_128 2 "register_operand" " x,v")
- (match_operand:VI4F_128 1 "register_operand" " 0,v")
+ (match_operand:VI4F_128 2 "register_operand" " x,v,v")
+ (match_operand:VI4F_128 1 "reg_or_0_operand" " 0,v,C")
(const_int 1)))]
"TARGET_SSE"
"@
movss\t{%2, %0|%0, %2}
- vmovss\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ vmovss\t{%2, %1, %0|%0, %1, %2}
+ vmovd\t{%2, %0|%0, %2}"
+ [(set_attr "isa" "noavx,avx,avx10_2")
(set_attr "type" "ssemov")
- (set_attr "prefix" "orig,maybe_evex")
- (set_attr "mode" "SF")])
+ (set_attr "prefix" "orig,maybe_evex,evex")
+ (set_attr "mode" "SF,SF,SI")])
(define_insn "avx2_vec_dup<mode>"
[(set (match_operand:VF1_128_256 0 "register_operand" "=v")
@@ -11687,18 +11688,19 @@
;; see comment above inline_secondary_memory_needed function in i386.cc
(define_insn "vec_set<mode>_0"
[(set (match_operand:VI4F_128 0 "nonimmediate_operand"
- "=Yr,*x,v,v,v,x,x,v,Yr ,?x ,x ,m ,m ,m")
+ "=Yr,*x,v,v,v,v,x,x,v,Yr ,?x ,x ,m ,m ,m")
(vec_merge:VI4F_128
(vec_duplicate:VI4F_128
(match_operand:<ssescalarmode> 2 "general_operand"
- " Yr,*x,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF"))
+ " Yr,*x,v,v,m,r ,m,x,v,?jrjm,?jrjm,?rm,!x,?re,!*fF"))
(match_operand:VI4F_128 1 "nonimm_or_0_operand"
- " C , C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
+ " C , C,C,C,C,C ,C,0,v,0 ,0 ,x ,0 ,0 ,0")
(const_int 1)))]
"TARGET_SSE"
"@
insertps\t{$0xe, %2, %0|%0, %2, 0xe}
insertps\t{$0xe, %2, %0|%0, %2, 0xe}
+ vmovd\t{%2, %0|%0, %2}
vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
%vmov<ssescalarmodesuffix>\t{%2, %0|%0, %2}
%vmovd\t{%2, %0|%0, %2}
@@ -11712,22 +11714,24 @@
#
#"
[(set (attr "isa")
- (cond [(eq_attr "alternative" "0,1,8,9")
+ (cond [(eq_attr "alternative" "0,1,9,10")
(const_string "sse4_noavx")
- (eq_attr "alternative" "2,7,10")
+ (eq_attr "alternative" "2")
+ (const_string "avx10_2")
+ (eq_attr "alternative" "3,8,11")
(const_string "avx")
- (eq_attr "alternative" "3,4")
+ (eq_attr "alternative" "4,5")
(const_string "sse2")
- (eq_attr "alternative" "5,6")
+ (eq_attr "alternative" "6,7")
(const_string "noavx")
]
(const_string "*")))
(set (attr "type")
- (cond [(eq_attr "alternative" "0,1,2,8,9,10")
+ (cond [(eq_attr "alternative" "0,1,3,9,10,11")
(const_string "sselog")
- (eq_attr "alternative" "12")
- (const_string "imov")
(eq_attr "alternative" "13")
+ (const_string "imov")
+ (eq_attr "alternative" "14")
(const_string "fmov")
]
(const_string "ssemov")))
@@ -11736,45 +11740,46 @@
(const_string "gpr16")
(const_string "*")))
(set (attr "prefix_extra")
- (if_then_else (eq_attr "alternative" "8,9,10")
+ (if_then_else (eq_attr "alternative" "9,10,11")
(const_string "1")
(const_string "*")))
(set (attr "length_immediate")
- (if_then_else (eq_attr "alternative" "8,9,10")
+ (if_then_else (eq_attr "alternative" "9,10,11")
(const_string "1")
(const_string "*")))
(set (attr "prefix")
- (cond [(eq_attr "alternative" "0,1,5,6,8,9")
+ (cond [(eq_attr "alternative" "0,1,6,7,9,10")
(const_string "orig")
- (eq_attr "alternative" "2")
+ (eq_attr "alternative" "2,3")
(const_string "maybe_evex")
- (eq_attr "alternative" "3,4")
+ (eq_attr "alternative" "4,5")
(const_string "maybe_vex")
- (eq_attr "alternative" "7,10")
+ (eq_attr "alternative" "8,11")
(const_string "vex")
]
(const_string "*")))
- (set_attr "mode" "SF,SF,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
+ (set_attr "mode" "SF,SF,SI,SF,<ssescalarmode>,SI,SF,SF,SF,TI,TI,TI,*,*,*")
(set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "4")
+ (cond [(eq_attr "alternative" "5")
(symbol_ref "TARGET_INTER_UNIT_MOVES_TO_VEC")
]
(symbol_ref "true")))])
(define_insn "@vec_set<mode>_0"
[(set (match_operand:V8_128 0 "register_operand"
- "=v,v,v,x,x,Yr,*x,x,x,x,v,v")
+ "=v,v,v,v,x,x,Yr,*x,x,x,x,v,v")
(vec_merge:V8_128
(vec_duplicate:V8_128
(match_operand:<ssescalarmode> 2 "nonimmediate_operand"
- " r,m,v,r,m,Yr,*x,r,m,x,r,m"))
+ " r,m,v,v,r,m,Yr,*x,r,m,x,r,m"))
(match_operand:V8_128 1 "reg_or_0_operand"
- " C,C,v,0,0,0 ,0 ,x,x,x,v,v")
+ " C,C,C,v,0,0,0 ,0 ,x,x,x,v,v")
(const_int 1)))]
"TARGET_SSE2"
"@
vmovw\t{%k2, %0|%0, %k2}
vmovw\t{%2, %0|%0, %2}
+ vmovw\t{%2, %0|%0, %2}
vmovsh\t{%2, %1, %0|%0, %1, %2}
pinsrw\t{$0, %k2, %0|%0, %k2, 0}
pinsrw\t{$0, %2, %0|%0, %2, 0}
@@ -11786,65 +11791,92 @@
vpinsrw\t{$0, %k2, %1, %0|%0, %1, %k2, 0}
vpinsrw\t{$0, %2, %1, %0|%0, %1, %2, 0}"
[(set (attr "isa")
- (cond [(eq_attr "alternative" "0,1,2")
+ (cond [(eq_attr "alternative" "0,1,3")
(const_string "avx512fp16")
- (eq_attr "alternative" "3,4")
+ (eq_attr "alternative" "2")
+ (const_string "avx10_2")
+ (eq_attr "alternative" "4,5")
(const_string "noavx")
- (eq_attr "alternative" "5,6")
+ (eq_attr "alternative" "6,7")
(const_string "sse4_noavx")
- (eq_attr "alternative" "7,8,9")
+ (eq_attr "alternative" "8,9,10")
(const_string "avx")
- (eq_attr "alternative" "10,11")
+ (eq_attr "alternative" "11,12")
(const_string "avx512bw")
]
(const_string "*")))
(set (attr "type")
- (if_then_else (eq_attr "alternative" "0,1,2,5,6,9")
+ (if_then_else (eq_attr "alternative" "0,1,2,3,6,7,10")
(const_string "ssemov")
(const_string "sselog")))
(set (attr "prefix_data16")
- (if_then_else (eq_attr "alternative" "3,4")
+ (if_then_else (eq_attr "alternative" "4,5")
(const_string "1")
(const_string "*")))
(set (attr "prefix_extra")
- (if_then_else (eq_attr "alternative" "5,6,9")
+ (if_then_else (eq_attr "alternative" "6,7,10")
(const_string "1")
(const_string "*")))
(set (attr "length_immediate")
- (if_then_else (eq_attr "alternative" "0,1,2")
+ (if_then_else (eq_attr "alternative" "0,1,2,3")
(const_string "*")
(const_string "1")))
(set (attr "prefix")
- (cond [(eq_attr "alternative" "0,1,2,10,11")
+ (cond [(eq_attr "alternative" "0,1,2,3,11,12")
(const_string "evex")
- (eq_attr "alternative" "7,8,9")
+ (eq_attr "alternative" "8,9,10")
(const_string "vex")
]
(const_string "orig")))
(set (attr "mode")
- (if_then_else (eq_attr "alternative" "0,1,2")
+ (if_then_else (eq_attr "alternative" "0,1,2,3")
(const_string "HF")
(const_string "TI")))
(set (attr "enabled")
(cond [(and (not (match_test "<MODE>mode == V8HFmode || <MODE>mode == V8BFmode"))
- (eq_attr "alternative" "2"))
+ (eq_attr "alternative" "3"))
(symbol_ref "false")
]
(const_string "*")))])
+(define_insn_and_split "*vec_set<mode>_0_1"
+ [(set (match_operand:V8_128 0 "register_operand")
+ (vec_merge:V8_128
+ (vec_duplicate:V8_128
+ (vec_select:<ssescalarmode>
+ (match_operand:V8_128 2 "nonimmediate_operand")
+ (parallel [(const_int 0)])))
+ (match_operand:V8_128 1 "reg_or_0_operand")
+ (const_int 1)))]
+ "TARGET_SSE2 && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(set (match_dup 0)
+ (vec_merge:V8_128
+ (vec_duplicate:V8_128 (match_dup 2))
+ (match_dup 1)
+ (const_int 1)))]
+{
+ if (register_operand (operands[2], <MODE>mode))
+ operands[2] = force_reg (<MODE>mode, operands[2]);
+ operands[2] = gen_lowpart (<ssescalarmode>mode, operands[2]);
+})
+
;; vmovw clears also the higer bits
(define_insn "vec_set<mode>_0"
- [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v")
+ [(set (match_operand:VI2F_256_512 0 "register_operand" "=v,v,v")
(vec_merge:VI2F_256_512
(vec_duplicate:VI2F_256_512
- (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m"))
+ (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "r,m,v"))
(match_operand:VI2F_256_512 1 "const0_operand")
(const_int 1)))]
"TARGET_AVX512FP16"
"@
vmovw\t{%k2, %x0|%x0, %k2}
+ vmovw\t{%2, %x0|%x0, %2}
vmovw\t{%2, %x0|%x0, %2}"
- [(set_attr "type" "ssemov")
+ [(set_attr "isa" "*,*,avx10_2")
+ (set_attr "type" "ssemov")
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
@@ -11889,16 +11921,20 @@
})
(define_insn "avx512fp16_mov<mode>"
- [(set (match_operand:V8_128 0 "register_operand" "=v")
+ [(set (match_operand:V8_128 0 "register_operand" "=v,v")
(vec_merge:V8_128
- (match_operand:V8_128 2 "register_operand" "v")
- (match_operand:V8_128 1 "register_operand" "v")
+ (match_operand:V8_128 2 "register_operand" "v,v")
+ (match_operand:V8_128 1 "reg_or_0_operand" "v,C")
(const_int 1)))]
- "TARGET_AVX512FP16"
- "vmovsh\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemov")
+ "TARGET_AVX512FP16
+ || (TARGET_AVX10_2_256 && const0_operand (operands[1], <MODE>mode))"
+ "@
+ vmovsh\t{%2, %1, %0|%0, %1, %2}
+ vmovw\t{%2, %0|%2, %0}"
+ [(set_attr "isa" "*,avx10_2")
+ (set_attr "type" "ssemov")
(set_attr "prefix" "evex")
- (set_attr "mode" "HF")])
+ (set_attr "mode" "HF,HI")])
;; A subset is vec_setv4sf.
(define_insn "*vec_setv4sf_sse4_1"
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
new file mode 100644
index 0000000..275bbad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vmovd\t4\\(%esp\\), %xmm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovss\t4\\(%esp\\), %xmm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 3 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%edi, %xmm0" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovd\t%xmm0, %xmm0" 4 { target { ! ia32 } } } } */
+
+
+#include<immintrin.h>
+
+typedef int v4si __attribute__((vector_size(16)));
+typedef float v4sf __attribute__((vector_size(16)));
+
+v4si
+__attribute__((noipa, unused))
+f1 (int a)
+{
+ return __extension__(v4si){a, 0, 0, 0};
+}
+
+v4sf
+__attribute__((noipa, unused))
+f2 (float a)
+{
+ return __extension__(v4sf){a, 0, 0, 0};
+}
+
+v4si
+__attribute__((noipa, unused))
+f3 (v4si a)
+{
+ return __extension__(v4si){a[0], 0, 0, 0};
+}
+
+v4sf
+__attribute__((noipa, unused))
+f4 (v4sf a)
+{
+ return __extension__(v4sf){a[0], 0, 0, 0};
+}
+
+__m128i
+__attribute__((noipa, unused))
+f5 (__m128i a)
+{
+ return _mm_set_epi32 (0, 0, 0,((__v4si)a)[0]);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
new file mode 100644
index 0000000..7d659300
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovd-2.c
@@ -0,0 +1,44 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+
+#include "avx10-helper.h"
+#include "avx10_2-vmovd-1.c"
+
+static void
+TEST (void)
+{
+ union128i_d u1, s1;
+ int e1[4] = {0};
+
+ s1.x = _mm_set_epi32(-12876, -12886, -12776, 3376590);
+ e1[0] = s1.a[0];
+
+ u1.x = _mm_set_epi32(-1, -1, -1, -1);
+ u1.x = (__m128i)f1((int)s1.a[0]);
+ if (check_union128i_d (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi32(-1, -1, -1, -1);
+ u1.x = (__m128i)f2(((float*)s1.a)[0]);
+ if (check_union128i_d (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi32(-1, -1, -1, -1);
+ u1.x = (__m128i)f3((v4si)s1.x);
+ if (check_union128i_d (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi32(-1, -1, -1, -1);
+ u1.x = (__m128i)f4((v4sf)s1.x);
+ if (check_union128i_d (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi32(-1, -1, -1, -1);
+ u1.x = (__m128i)f5((__m128i)s1.x);
+ if (check_union128i_d (u1, e1))
+ abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
new file mode 100644
index 0000000..ec19a9a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-final { scan-assembler-times "vmovw\t4\\(%esp\\), %xmm0" 3 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t8\\(%ebp\\), %xmm0" 1 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 4 { target ia32 } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%edi, %xmm0" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vmovw\t%xmm0, %xmm0" 7 { target { ! ia32 } } } } */
+
+#include<immintrin.h>
+
+typedef _Float16 v8hf __attribute__((vector_size(16)));
+typedef __bf16 v8bf __attribute__((vector_size(16)));
+typedef short v8hi __attribute__((vector_size(16)));
+
+v8hf
+__attribute__((noipa, unused))
+f1 (_Float16 a)
+{
+ return __extension__(v8hf){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8bf
+__attribute__((noipa, unused))
+f2 (__bf16 a)
+{
+ return __extension__(v8bf){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hi
+__attribute__((noipa, unused))
+f3 (short a)
+{
+ return __extension__(v8hi){a, 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hf
+__attribute__((noipa, unused))
+f4 (v8hf a)
+{
+ return __extension__(v8hf){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8bf
+__attribute__((noipa, unused))
+f5 (v8bf a)
+{
+ return __extension__(v8bf){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+v8hi
+__attribute__((noipa, unused))
+f6 (v8hi a)
+{
+ return __extension__(v8hi){a[0], 0, 0, 0, 0, 0, 0, 0};
+}
+
+__m128i
+__attribute__((noipa, unused))
+f7 (__m128i a)
+{
+ return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, ((__v8hi)a)[0]);
+}
+
+__m256h
+__attribute__((noipa, unused))
+f8 (_Float16 a)
+{
+ return _mm256_set_ph (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, a);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c
new file mode 100644
index 0000000..d63739e68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx10_2-vmovw-2.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx10.2" } */
+/* { dg-require-effective-target avx10_2 } */
+
+#define AVX10_2
+#define AVX10_SCALAR
+
+#include "avx10-helper.h"
+#include "avx10_2-vmovw-1.c"
+
+static void
+TEST (void)
+{
+ union128i_w u1, s1;
+ union256i_w u2, s2;
+ short e1[8] = {0};
+ short e2[16] = {0};
+
+ s1.x = _mm_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158);
+ e1[0] = s1.a[0];
+
+ u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ u1.x = (__m128i)f1(((_Float16*)s1.a)[0]);
+ if (check_union128i_w (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ u1.x = (__m128i)f2(((__bf16*)s1.a)[0]);
+ if (check_union128i_w (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ u1.x = (__m128i)f3((short)s1.a[0]);
+ if (check_union128i_w (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ u1.x = (__m128i)f4((v8hf)s1.x);
+ if (check_union128i_w (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ u1.x = (__m128i)f5((v8bf)s1.x);
+ if (check_union128i_w (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ u1.x = (__m128i)f6((v8hi)s1.x);
+ if (check_union128i_w (u1, e1))
+ abort ();
+
+ u1.x = _mm_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1);
+ u1.x = (__m128i)f7((__m128i)s1.x);
+ if (check_union128i_w (u1, e1))
+ abort ();
+
+ s2.x = _mm256_set_epi16(-12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158,
+ -12876, -12886, -12776, -22876, -22886, -22776, -32766, 30158);
+ e2[0] = s2.a[0];
+ u2.x = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
+ u2.x = (__m256i)f8(((_Float16*)s2.a)[0]);
+ if (check_union256i_w (u2, e2))
+ abort ();
+}