aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorHaochen Jiang <haochen.jiang@intel.com>2024-11-01 10:04:27 +0800
committerHaochen Jiang <haochen.jiang@intel.com>2024-11-01 10:04:27 +0800
commit8ee5cd4b84489bee0f72153e96a9afe9493e170d (patch)
tree07b6d79f0496a351408676123fb0c6c6e1d6797d /gcc
parenta287ff2697f0788856a21c99098611a5ae0a4749 (diff)
downloadgcc-8ee5cd4b84489bee0f72153e96a9afe9493e170d.zip
gcc-8ee5cd4b84489bee0f72153e96a9afe9493e170d.tar.gz
gcc-8ee5cd4b84489bee0f72153e96a9afe9493e170d.tar.bz2
Support Intel SM4 EVEX instructions
gcc/ChangeLog: * config/i386/i386-builtin-types.def: Add DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI). * config/i386/i386-builtin.def (BDESC): Add new builtins. * config/i386/i386-expand.cc (ix86_expand_args_builtin): Handle V16SI_FTYPE_V16SI_V16SI. * config/i386/sm4intrin.h: Add zmm insns. * config/i386/sse.md (vsm4key4_<mode>): Add EVEX pattern. (vsm4rnds4_<mode>): Ditto. gcc/testsuite/ChangeLog: * gcc.target/i386/sm4-check.h: Add zmm test. * gcc.target/i386/sm4-avx10_2-1.c: New test. * gcc.target/i386/sm4-avx10_2-512-1.c: Ditto. * gcc.target/i386/sm4key4-avx10_2-512-2.c: Ditto. * gcc.target/i386/sm4rnds4-avx10_2-512-2.c: Ditto.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/i386/i386-builtin-types.def3
-rw-r--r--gcc/config/i386/i386-builtin.def2
-rw-r--r--gcc/config/i386/i386-expand.cc1
-rw-r--r--gcc/config/i386/sm4intrin.h25
-rw-r--r--gcc/config/i386/sse.md20
-rw-r--r--gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c58
-rw-r--r--gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c15
-rw-r--r--gcc/testsuite/gcc.target/i386/sm4-check.h36
-rw-r--r--gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c18
-rw-r--r--gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c18
10 files changed, 186 insertions, 10 deletions
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 290f6e6..c603423 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1507,3 +1507,6 @@ DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT)
DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
+
+# SM4 builtins
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 1eb631d..3958027 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1668,8 +1668,10 @@ BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, CODE_FOR_vsm3rnds2, "__builtin
/* SM4 */
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v4si, "__builtin_ia32_vsm4key4128", IX86_BUILTIN_VSM4KEY4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v8si, "__builtin_ia32_vsm4key4256", IX86_BUILTIN_VSM4KEY4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vsm4key4_v16si, "__builtin_ia32_vsm4key4512", IX86_BUILTIN_VSM4KEY4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v4si, "__builtin_ia32_vsm4rnds4128", IX86_BUILTIN_VSM4RNDS4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v8si, "__builtin_ia32_vsm4rnds4256", IX86_BUILTIN_VSM4RNDS4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vsm4rnds4_v16si, "__builtin_ia32_vsm4rnds4512", IX86_BUILTIN_VSM4RNDS4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
/* SHA512 */
BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, "__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 0de0e84..768987c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -11415,6 +11415,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case V16QI_FTYPE_V8HI_V8HI:
case V16HF_FTYPE_V16HF_V16HF:
case V16SF_FTYPE_V16SF_V16SF:
+ case V16SI_FTYPE_V16SI_V16SI:
case V8QI_FTYPE_V8QI_V8QI:
case V8QI_FTYPE_V4HI_V4HI:
case V8HI_FTYPE_V8HI_V8HI:
diff --git a/gcc/config/i386/sm4intrin.h b/gcc/config/i386/sm4intrin.h
index 4c212cc..e2d78f0 100644
--- a/gcc/config/i386/sm4intrin.h
+++ b/gcc/config/i386/sm4intrin.h
@@ -67,4 +67,29 @@ _mm256_sm4rnds4_epi32 (__m256i __A, __m256i __B)
#pragma GCC pop_options
#endif /* __DISABLE_SM4__ */
+#if !defined (__SM4__) || !defined (__AVX10_2_512__)
+#pragma GCC push_options
+#pragma GCC target("sm4,avx10.2-512")
+#define __DISABLE_SM4_512__
+#endif /* __SM4_512__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4key4_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vsm4key4512 ((__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4rnds4_epi32 (__m512i __A, __m512i __B)
+{
+ return (__m512i) __builtin_ia32_vsm4rnds4512 ((__v16si) __A, (__v16si) __B);
+}
+
+#ifdef __DISABLE_SM4_512__
+#undef __DISABLE_SM4_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_SM4_512__ */
+
#endif /* _SM4INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 36f8567..319c3c7 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -30067,25 +30067,29 @@
(set_attr "mode" "OI")])
(define_insn "vsm4key4_<mode>"
- [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
- (unspec:VI4_AVX
- [(match_operand:VI4_AVX 1 "register_operand" "x")
- (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+ [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX10_2
+ [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+ (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
UNSPEC_SM4KEY4))]
"TARGET_SM4"
"vsm4key4\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "other")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "isa" "avx,avx10_2")
(set_attr "mode" "<sseinsnmode>")])
(define_insn "vsm4rnds4_<mode>"
- [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
- (unspec:VI4_AVX
- [(match_operand:VI4_AVX 1 "register_operand" "x")
- (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+ [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+ (unspec:VI4_AVX10_2
+ [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+ (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
UNSPEC_SM4RNDS4))]
"TARGET_SM4"
"vsm4rnds4\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "other")
+ (set_attr "prefix" "maybe_evex")
+ (set_attr "isa" "avx,avx10_2")
(set_attr "mode" "<sseinsnmode>")])
(define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
diff --git a/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c
new file mode 100644
index 0000000..4746f6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-1.c
@@ -0,0 +1,58 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2" } */
+
+#include <immintrin.h>
+
+void
+f1 (__m128i x, __m128i y)
+{
+ register __m128i a __asm("xmm16");
+ register __m128i b __asm("xmm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm_sm4key4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f2 (__m256i x, __m256i y)
+{
+ register __m256i a __asm("ymm16");
+ register __m256i b __asm("ymm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm256_sm4key4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f3 (__m128i x, __m128i y)
+{
+ register __m128i a __asm("xmm16");
+ register __m128i b __asm("xmm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm_sm4rnds4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+void
+f4 (__m256i x, __m256i y)
+{
+ register __m256i a __asm("ymm16");
+ register __m256i b __asm("ymm17");
+ a = x;
+ b = y;
+ asm volatile ("" : "+v" (a), "+v" (b));
+ a = _mm256_sm4rnds4_epi32 (a, b);
+ asm volatile ("" : "+v" (a));
+}
+
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%xmm17\[^\n\]*%xmm16\[^\n\]*%xmm16" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%ymm17\[^\n\]*%ymm16\[^\n\]*%ymm16" } } */
+
diff --git a/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c
new file mode 100644
index 0000000..546472a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4-avx10_2-512-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-final { scan-assembler "vsm4key4\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */
+/* { dg-final { scan-assembler "vsm4rnds4\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]" } } */
+
+#include <immintrin.h>
+
+volatile __m512i x, y, z;
+
+void extern
+sm4_test (void)
+{
+ x = _mm512_sm4key4_epi32 (y, z);
+ x = _mm512_sm4rnds4_epi32 (y, z);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sm4-check.h b/gcc/testsuite/gcc.target/i386/sm4-check.h
index 435fcf2..72543a7 100644
--- a/gcc/testsuite/gcc.target/i386/sm4-check.h
+++ b/gcc/testsuite/gcc.target/i386/sm4-check.h
@@ -1,7 +1,11 @@
#include <stdlib.h>
-#include "m256-check.h"
+#include "m512-check.h"
+#ifdef AVX10_2_512
+static void sm4_avx512f_test (void);
+#else
static void sm4_test (void);
+#endif
typedef union
{
@@ -156,18 +160,46 @@ compute_sm4##name##4 (int *dst, int *src1, int *src2, int vl) \
if (check_union256i_d (res2, dst2)) \
abort ();
+#define SM4_AVX512F_SIMULATE(name) \
+ union512i_d src5, src6, res3; \
+ int dst3[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
+ \
+ src5.x = _mm512_set_epi32 (111, 222, 333, 444, 555, 666, 777, 888, \
+ 999, 123, 456, 789, 135, 792, 468, 147); \
+ src6.x = _mm512_set_epi32 (258, 369, 159, 483, 726, 162, 738, 495, \
+ 174, 285, 396, 186, 429, 752, 198, 765); \
+ res3.x = _mm512_set_epi32 (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); \
+ \
+ res3.x = _mm512_sm4##name##4_epi32 (src5.x, src6.x); \
+ \
+ compute_sm4##name##4 (dst3, src5.a, src6.a, 512); \
+ \
+ if (check_union512i_d (res3, dst3)) \
+ abort ();
+
static void
__attribute__ ((noinline))
do_test (void)
{
+#ifdef AVX10_512BIT
+ sm4_avx512f_test ();
+#else
sm4_test ();
+#endif
}
int
main ()
{
/* Check CPU support for SM4. */
- if (__builtin_cpu_supports ("sm4"))
+ if (__builtin_cpu_supports ("sm4")
+#ifdef AVX10_2
+ && __builtin_cpu_supports ("avx10.2")
+#endif
+#ifdef AVX10_2_512
+ && __builtin_cpu_supports ("avx10.2-512")
+#endif
+ )
{
do_test ();
#ifdef DEBUG
diff --git a/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c b/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c
new file mode 100644
index 0000000..85b7e3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4key4-avx10_2-512-2.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-require-effective-target sm4 } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#include "sm4-check.h"
+
+char key;
+SM4_FUNC (key);
+
+static void
+sm4_avx512f_test (void)
+{
+ SM4_AVX512F_SIMULATE (key);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c b/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c
new file mode 100644
index 0000000..1eaf08b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sm4rnds4-avx10_2-512-2.c
@@ -0,0 +1,18 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -march=x86-64-v3 -msm4 -mavx10.2-512" } */
+/* { dg-require-effective-target sm4 } */
+/* { dg-require-effective-target avx10_2_512 } */
+
+#define AVX10_2
+#define AVX10_2_512
+#define AVX10_512BIT
+#include "sm4-check.h"
+
+char rnds;
+SM4_FUNC (rnds);
+
+static void
+sm4_avx512f_test (void)
+{
+ SM4_AVX512F_SIMULATE (rnds);
+}