aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorHongyu Wang <hongyu.wang@intel.com>2021-07-01 13:17:32 +0800
committerliuhongt <hongtao.liu@intel.com>2021-09-23 17:05:02 +0800
commit60698a19c77aef8c96d14238975e12fad653e7fb (patch)
tree03554bebd20e7ef196af0681af3c9ff9ea27166f /gcc
parentfccd8855ed4ed9d1cc0e55fc1758b3288468657e (diff)
downloadgcc-60698a19c77aef8c96d14238975e12fad653e7fb.zip
gcc-60698a19c77aef8c96d14238975e12fad653e7fb.tar.gz
gcc-60698a19c77aef8c96d14238975e12fad653e7fb.tar.bz2
AVX512FP16: Add fix(uns)?_truncmn2 for HF scalar and vector modes
NB: 64bit/32bit vectorize for HFmode is not supported for now, will adjust this patch when V2HF/V4HF operations supported. gcc/ChangeLog: * config/i386/i386.md (fix<fixunssuffix>_trunchf<mode>2): New expander. (fixuns_trunchfhi2): Likewise. (*fixuns_trunchfsi2zext): New define_insn. * config/i386/sse.md (ssePHmodelower): New mode_attr. (fix<fixunssuffix>_trunc<ssePHmodelower><mode>2): New expander for same element vector fix_truncate. (fix<fixunssuffix>_trunc<ssePHmodelower><mode>2): Likewise for V4HF to V4SI/V4DI fix_truncate. (fix<fixunssuffix>_truncv2hfv2di2): Likeise for V2HF to V2DI fix_truncate. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-trunchf.c: New test. * gcc.target/i386/avx512fp16-truncvnhf.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386.md29
-rw-r--r--gcc/config/i386/sse.md43
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c59
-rw-r--r--gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c61
4 files changed, 192 insertions, 0 deletions
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index a087e55..c6279e6 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -4810,6 +4810,16 @@
}
})
+(define_insn "fix<fixunssuffix>_trunchf<mode>2"
+ [(set (match_operand:SWI48 0 "register_operand" "=r")
+ (any_fix:SWI48
+ (match_operand:HF 1 "nonimmediate_operand" "vm")))]
+ "TARGET_AVX512FP16"
+ "vcvttsh2<fixsuffix>si\t{%1, %0|%0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "<MODE>")])
+
;; Signed conversion to SImode.
(define_expand "fix_truncxfsi2"
@@ -4917,6 +4927,17 @@
(set_attr "prefix" "evex")
(set_attr "mode" "SI")])
+(define_insn "*fixuns_trunchfsi2zext"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (unsigned_fix:SI
+ (match_operand:HF 1 "nonimmediate_operand" "vm"))))]
+ "TARGET_64BIT && TARGET_AVX512FP16"
+ "vcvttsh2usi\t{%1, %k0|%k0, %1}"
+ [(set_attr "type" "sseicvt")
+ (set_attr "prefix" "evex")
+ (set_attr "mode" "SI")])
+
(define_insn "*fixuns_trunc<mode>si2_avx512f_zext"
[(set (match_operand:DI 0 "register_operand" "=r")
(zero_extend:DI
@@ -4949,6 +4970,14 @@
;; Without these patterns, we'll try the unsigned SI conversion which
;; is complex for SSE, rather than the signed SI conversion, which isn't.
+(define_expand "fixuns_trunchfhi2"
+ [(set (match_dup 2)
+ (fix:SI (match_operand:HF 1 "nonimmediate_operand")))
+ (set (match_operand:HI 0 "nonimmediate_operand")
+ (subreg:HI (match_dup 2) 0))]
+ "TARGET_AVX512FP16"
+ "operands[2] = gen_reg_rtx (SImode);")
+
(define_expand "fixuns_trunc<mode>hi2"
[(set (match_dup 2)
(fix:SI (match_operand:MODEF 1 "nonimmediate_operand")))
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 1ca9598..f8a5f19 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1034,6 +1034,13 @@
(V8DI "V8HF") (V4DI "V8HF") (V2DI "V8HF")
(V8DF "V8HF") (V16SF "V16HF") (V8SF "V8HF")])
+;; Mapping of vector modes to vector hf modes of same element.
+(define_mode_attr ssePHmodelower
+ [(V32HI "v32hf") (V16HI "v16hf") (V8HI "v8hf")
+ (V16SI "v16hf") (V8SI "v8hf") (V4SI "v4hf")
+ (V8DI "v8hf") (V4DI "v4hf") (V2DI "v2hf")
+ (V8DF "v8hf") (V16SF "v16hf") (V8SF "v8hf")])
+
;; Mapping of vector modes to packed single mode of the same size
(define_mode_attr ssePSmode
[(V16SI "V16SF") (V8DF "V16SF")
@@ -6175,6 +6182,12 @@
(set_attr "prefix" "evex")
(set_attr "mode" "HF")])
+(define_expand "fix<fixunssuffix>_trunc<ssePHmodelower><mode>2"
+ [(set (match_operand:VI2H_AVX512VL 0 "register_operand")
+ (any_fix:VI2H_AVX512VL
+ (match_operand:<ssePHmode> 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16")
+
(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name><round_saeonly_name>"
[(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v")
(any_fix:VI2H_AVX512VL
@@ -6185,6 +6198,21 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "fix<fixunssuffix>_truncv4hf<mode>2"
+ [(set (match_operand:VI4_128_8_256 0 "register_operand")
+ (any_fix:VI4_128_8_256
+ (match_operand:V4HF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = lowpart_subreg (V8HFmode, operands[1], V4HFmode);
+ emit_insn (gen_avx512fp16_fix<fixunssuffix>_trunc<mode>2 (operands[0],
+ operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx512fp16_fix<fixunssuffix>_trunc<mode>2<mask_name>"
[(set (match_operand:VI4_128_8_256 0 "register_operand" "=v")
(any_fix:VI4_128_8_256
@@ -6207,6 +6235,21 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
+(define_expand "fix<fixunssuffix>_truncv2hfv2di2"
+ [(set (match_operand:V2DI 0 "register_operand")
+ (any_fix:V2DI
+ (match_operand:V2HF 1 "nonimmediate_operand")))]
+ "TARGET_AVX512FP16 && TARGET_AVX512VL"
+{
+ if (!MEM_P (operands[1]))
+ {
+ operands[1] = lowpart_subreg (V8HFmode, operands[1], V2HFmode);
+ emit_insn (gen_avx512fp16_fix<fixunssuffix>_truncv2di2 (operands[0],
+ operands[1]));
+ DONE;
+ }
+})
+
(define_insn "avx512fp16_fix<fixunssuffix>_truncv2di2<mask_name>"
[(set (match_operand:V2DI 0 "register_operand" "=v")
(any_fix:V2DI
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
new file mode 100644
index 0000000..2c025b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-trunchf.c
@@ -0,0 +1,59 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16" } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 3 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*(?:%xmm\[0-9\]|\\(%esp\\))+, %eax(?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttsh2si\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-times "vcvttsh2usi\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+, %rax(?:\n|\[ \\t\]+#)" 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler "xorl\[ \\t\]+%edx, %edx" { target ia32 } } } */
+
+#include <immintrin.h>
+
+short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si16 (_Float16 f)
+{
+ return f;
+}
+
+unsigned short
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su16 (_Float16 f)
+{
+ return f;
+}
+
+int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si32 (_Float16 f)
+{
+ return f;
+}
+
+unsigned int
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su32 (_Float16 f)
+{
+ return f;
+}
+
+long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_si64 (_Float16 f)
+{
+ return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64 (_Float16 f)
+{
+ return f;
+}
+
+unsigned long long
+__attribute__ ((noinline, noclone))
+trunc_f16_to_su64_zext (_Float16 f)
+{
+ return (unsigned int) f;
+}
+
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
new file mode 100644
index 0000000..ee55cd1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-truncvnhf.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-slp-vectorize -mprefer-vector-width=512" } */
+
+extern long long di[8];
+extern unsigned long long udi[8];
+extern int si[16];
+extern unsigned int usi[16];
+extern short hi[32];
+extern unsigned short uhi[32];
+extern _Float16 hf[32];
+
+#define DO_PRAGMA(X) _Pragma(#X)
+
+#define FIX_TRUNCHFVV(size, mode) \
+ void __attribute__ ((noinline, noclone)) \
+fix_trunc##size##hf##v##size##mode () \
+{\
+ int i; \
+ DO_PRAGMA (GCC unroll size) \
+ for (i = 0; i < size; i++) \
+ mode[i] = hf[i]; \
+}
+
+FIX_TRUNCHFVV(32, hi)
+FIX_TRUNCHFVV(16, hi)
+FIX_TRUNCHFVV(8, hi)
+FIX_TRUNCHFVV(16, si)
+FIX_TRUNCHFVV(8, si)
+FIX_TRUNCHFVV(4, si)
+FIX_TRUNCHFVV(8, di)
+FIX_TRUNCHFVV(4, di)
+FIX_TRUNCHFVV(2, di)
+
+FIX_TRUNCHFVV(32, uhi)
+FIX_TRUNCHFVV(16, uhi)
+FIX_TRUNCHFVV(8, uhi)
+FIX_TRUNCHFVV(16, usi)
+FIX_TRUNCHFVV(8, usi)
+FIX_TRUNCHFVV(4, usi)
+FIX_TRUNCHFVV(8, udi)
+FIX_TRUNCHFVV(4, udi)
+FIX_TRUNCHFVV(2, udi)
+
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2qq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2uqq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2dq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2udq\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%zmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2w\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttph2uw\[ \\t\]+\[^\{\n\]*\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */