diff options
author | Kong Lingling <lingling.kong@intel.com> | 2023-03-27 10:09:00 +0800 |
---|---|---|
committer | Hongyu Wang <hongyu.wang@intel.com> | 2023-10-07 16:34:31 +0800 |
commit | 1328bb72548c7547629419e1b4f172ad94cdd6ff (patch) | |
tree | fb911e9b9e1a4483e3bc68fdeb33899e5c0bc432 | |
parent | 797b89329606501dcd56919264b5b6e39ebc7b43 (diff) | |
download | gcc-1328bb72548c7547629419e1b4f172ad94cdd6ff.zip gcc-1328bb72548c7547629419e1b4f172ad94cdd6ff.tar.gz gcc-1328bb72548c7547629419e1b4f172ad94cdd6ff.tar.bz2 |
[APX EGPR] Handle legacy insns that only support GPR16 (3/5)
Disable EGPR usage for below legacy insns in opcode map2/3 that have vex
but no evex counterpart.
insn list:
1. phminposuw/vphminposuw
2. ptest/vptest
3. roundps/vroundps, roundpd/vroundpd,
roundss/vroundss, roundsd/vroundsd
4. pcmpestri/vpcmpestri, pcmpestrm/vpcmpestrm
5. pcmpistri/vpcmpistri, pcmpistrm/vpcmpistrm
6. aesimc/vaesimc, aeskeygenassist/vaeskeygenassist
gcc/ChangeLog:
* config/i386/i386-protos.h (x86_evex_reg_mentioned_p): New
prototype.
* config/i386/i386.cc (x86_evex_reg_mentioned_p): New
function.
* config/i386/i386.md (sse4_1_round<mode>2): Set attr gpr32 0
and constraint jm to all non-evex alternatives, adjust
alternative outputs if evex reg is mentioned.
* config/i386/sse.md (<sse4_1>_ptest<mode>): Set attr gpr32 0
and constraint jm/ja to all non-evex alternatives.
(ptesttf2): Likewise.
(<sse4_1>_round<ssemodesuffix><avxsizesuffix): Likewise.
(sse4_1_round<ssescalarmodesuffix>): Likewise.
(sse4_2_pcmpestri): Likewise.
(sse4_2_pcmpestrm): Likewise.
(sse4_2_pcmpestr_cconly): Likewise.
(sse4_2_pcmpistr): Likewise.
(sse4_2_pcmpistri): Likewise.
(sse4_2_pcmpistrm): Likewise.
(sse4_2_pcmpistr_cconly): Likewise.
(aesimc): Likewise.
(aeskeygenassist): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-legacy-insn-check-norex2.c: Add intrinsic
tests.
Co-authored-by: Hongyu Wang <hongyu.wang@intel.com>
Co-authored-by: Hongtao Liu <hongtao.liu@intel.com>
-rw-r--r-- | gcc/config/i386/i386-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 13 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 3 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 93 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c | 55 |
5 files changed, 132 insertions, 33 deletions
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index a54e3f6..28d0eab 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -65,6 +65,7 @@ extern bool extended_reg_mentioned_p (rtx); extern bool x86_extended_QIreg_mentioned_p (rtx_insn *); extern bool x86_extended_reg_mentioned_p (rtx); extern bool x86_extended_rex2reg_mentioned_p (rtx); +extern bool x86_evex_reg_mentioned_p (rtx [], int); extern bool x86_maybe_negate_const_int (rtx *, machine_mode); extern machine_mode ix86_cc_mode (enum rtx_code, rtx, rtx); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 62fba99..652e880 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -22944,6 +22944,19 @@ x86_extended_rex2reg_mentioned_p (rtx insn) return false; } +/* Return true when rtx operands mentions register that must be encoded using + evex prefix. */ +bool +x86_evex_reg_mentioned_p (rtx operands[], int nops) +{ + int i; + for (i = 0; i < nops; i++) + if (EXT_REX_SSE_REG_P (operands[i]) + || x86_extended_rex2reg_mentioned_p (operands[i])) + return true; + return false; +} + /* If profitable, negate (without causing overflow) integer constant of mode MODE at location LOC. Return true in this case. */ bool diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index e29fc8e..d1651bc 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -21603,7 +21603,7 @@ (define_insn "sse4_1_round<mode>2" [(set (match_operand:MODEFH 0 "register_operand" "=x,x,x,v,v") (unspec:MODEFH - [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,m,v,m") + [(match_operand:MODEFH 1 "nonimmediate_operand" "0,x,jm,v,m") (match_operand:SI 2 "const_0_to_15_operand")] UNSPEC_ROUND))] "TARGET_SSE4_1" @@ -21616,6 +21616,7 @@ [(set_attr "type" "ssecvt") (set_attr "prefix_extra" "1,1,1,*,*") (set_attr "length_immediate" "1") + (set_attr "gpr32" "1,1,0,1,1") (set_attr "prefix" "maybe_vex,maybe_vex,maybe_vex,evex,evex") (set_attr "isa" "noavx512f,noavx512f,noavx512f,avx512f,avx512f") (set_attr "avx_partial_xmm_update" "false,false,true,false,true") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index a7858a7..4db3940 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -22610,11 +22610,12 @@ (define_insn "sse4_1_phminposuw" [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,x") - (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "YrBm,*xBm,xm")] + (unspec:V8HI [(match_operand:V8HI 1 "vector_operand" "Yrja,*xja,xjm")] UNSPEC_PHMINPOSUW))] "TARGET_SSE4_1" "%vphminposuw\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") + (set_attr "gpr32" "0") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,vex") @@ -23803,12 +23804,13 @@ (define_insn "*<sse4_1>_ptest<mode>" [(set (reg FLAGS_REG) (unspec [(match_operand:V_AVX 0 "register_operand" "Yr, *x, x") - (match_operand:V_AVX 1 "vector_operand" "YrBm, *xBm, xm")] + (match_operand:V_AVX 1 "vector_operand" "Yrja, *xja, xjm")] UNSPEC_PTEST))] "TARGET_SSE4_1 && ix86_match_ptest_ccmode (insn)" "%vptest\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecomi") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,vex") (set (attr "btver2_decode") @@ -23845,12 +23847,13 @@ (define_insn "ptesttf2" [(set (reg:CC FLAGS_REG) (unspec:CC [(match_operand:TF 0 "register_operand" "Yr, *x, x") - (match_operand:TF 1 "vector_operand" "YrBm, *xBm, xm")] + (match_operand:TF 1 "vector_operand" "Yrja, *xja, xjm")] UNSPEC_PTEST))] "TARGET_SSE4_1" "%vptest\t{%1, %0|%0, %1}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecomi") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,orig,vex") (set_attr "mode" "TI")]) @@ -23961,13 +23964,14 @@ (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 - [(match_operand:VF_128_256 1 "vector_operand" "YrBm,*xBm,xm") + [(match_operand:VF_128_256 1 "vector_operand" "Yrja,*xja,xjm") (match_operand:SI 2 "const_0_to_15_operand")] UNSPEC_ROUND))] "TARGET_SSE4_1" "%vround<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,noavx,avx") (set_attr "type" "ssecvt") + (set_attr "gpr32" "0") (set_attr "prefix_data16" "1,1,*") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") @@ -24054,19 +24058,32 @@ [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 2 "nonimmediate_operand" "Yrm,*xm,xm,vm") + [(match_operand:VF_128 2 "nonimmediate_operand" "Yrjm,*xjm,xjm,vm") (match_operand:SI 3 "const_0_to_15_operand")] UNSPEC_ROUND) (match_operand:VF_128 1 "register_operand" "0,0,x,v") (const_int 1)))] "TARGET_SSE4_1" - "@ - round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3} - round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3} - vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3} - vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}" - [(set_attr "isa" "noavx,noavx,avx,avx512f") +{ + switch (which_alternative) + { + case 0: + case 1: + return "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %<iptr>2, %3}"; + case 2: + return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"; + case 3: + if (x86_evex_reg_mentioned_p (operands, 3)) + return "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"; + else + return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %<iptr>2, %3}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "noavx,noavx,noavx512f,avx512f") (set_attr "type" "ssecvt") + (set_attr "gpr32" "0,0,0,1") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*,*") (set_attr "prefix_extra" "1") @@ -24078,19 +24095,32 @@ (vec_merge:VFH_128 (vec_duplicate:VFH_128 (unspec:<ssescalarmode> - [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrm,*xm,xm,vm") + [(match_operand:<ssescalarmode> 2 "nonimmediate_operand" "Yrjm,*xjm,xjm,vm") (match_operand:SI 3 "const_0_to_15_operand")] UNSPEC_ROUND)) (match_operand:VFH_128 1 "register_operand" "0,0,x,v") (const_int 1)))] "TARGET_SSE4_1" - "@ - round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} - round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3} - vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3} - vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,noavx,avx,avx512f") +{ + switch (which_alternative) + { + case 0: + case 1: + return "round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}"; + case 2: + return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + case 3: + if (x86_evex_reg_mentioned_p (operands, 3) || <MODE>mode == V8HFmode) + return "vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + else + return "vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; + default: + gcc_unreachable (); + } +} + [(set_attr "isa" "noavx,noavx,noavx512f,avx512f") (set_attr "type" "ssecvt") + (set_attr "gpr32" "0,0,0,1") (set_attr "length_immediate" "1") (set_attr "prefix_data16" "1,1,*,*") (set_attr "prefix_extra" "1") @@ -24311,7 +24341,7 @@ (unspec:SI [(match_operand:V16QI 1 "register_operand" "x,x") (match_operand:SI 2 "register_operand" "a,a") - (match_operand:V16QI 3 "nonimmediate_operand" "x,m") + (match_operand:V16QI 3 "nonimmediate_operand" "x,jm") (match_operand:SI 4 "register_operand" "d,d") (match_operand:SI 5 "const_0_to_255_operand")] UNSPEC_PCMPESTR)) @@ -24326,6 +24356,7 @@ "TARGET_SSE4_2" "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" [(set_attr "type" "sselog") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "length_immediate" "1") @@ -24338,7 +24369,7 @@ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x,x") (match_operand:SI 2 "register_operand" "a,a") - (match_operand:V16QI 3 "nonimmediate_operand" "x,m") + (match_operand:V16QI 3 "nonimmediate_operand" "x,jm") (match_operand:SI 4 "register_operand" "d,d") (match_operand:SI 5 "const_0_to_255_operand")] UNSPEC_PCMPESTR)) @@ -24353,6 +24384,7 @@ "TARGET_SSE4_2" "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" [(set_attr "type" "sselog") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") @@ -24365,7 +24397,7 @@ (unspec:CC [(match_operand:V16QI 2 "register_operand" "x,x,x,x") (match_operand:SI 3 "register_operand" "a,a,a,a") - (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m") + (match_operand:V16QI 4 "nonimmediate_operand" "x,jm,x,jm") (match_operand:SI 5 "register_operand" "d,d,d,d") (match_operand:SI 6 "const_0_to_255_operand")] UNSPEC_PCMPESTR)) @@ -24378,6 +24410,7 @@ %vpcmpestri\t{%6, %4, %2|%2, %4, %6} %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" [(set_attr "type" "sselog") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") @@ -24389,7 +24422,7 @@ [(set (match_operand:SI 0 "register_operand" "=c,c") (unspec:SI [(match_operand:V16QI 2 "register_operand" "x,x") - (match_operand:V16QI 3 "nonimmediate_operand" "x,m") + (match_operand:V16QI 3 "nonimmediate_operand" "x,jm") (match_operand:SI 4 "const_0_to_255_operand")] UNSPEC_PCMPISTR)) (set (match_operand:V16QI 1 "register_operand" "=Yz,Yz") @@ -24432,6 +24465,7 @@ DONE; } [(set_attr "type" "sselog") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "memory" "none,load") @@ -24441,7 +24475,7 @@ [(set (match_operand:SI 0 "register_operand" "=c,c") (unspec:SI [(match_operand:V16QI 1 "register_operand" "x,x") - (match_operand:V16QI 2 "nonimmediate_operand" "x,m") + (match_operand:V16QI 2 "nonimmediate_operand" "x,jm") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_PCMPISTR)) (set (reg:CC FLAGS_REG) @@ -24453,6 +24487,7 @@ "TARGET_SSE4_2" "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" [(set_attr "type" "sselog") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") @@ -24464,7 +24499,7 @@ [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x,x") - (match_operand:V16QI 2 "nonimmediate_operand" "x,m") + (match_operand:V16QI 2 "nonimmediate_operand" "x,jm") (match_operand:SI 3 "const_0_to_255_operand")] UNSPEC_PCMPISTR)) (set (reg:CC FLAGS_REG) @@ -24476,6 +24511,7 @@ "TARGET_SSE4_2" "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" [(set_attr "type" "sselog") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") @@ -24487,7 +24523,7 @@ [(set (reg:CC FLAGS_REG) (unspec:CC [(match_operand:V16QI 2 "register_operand" "x,x,x,x") - (match_operand:V16QI 3 "nonimmediate_operand" "x,m,x,m") + (match_operand:V16QI 3 "nonimmediate_operand" "x,jm,x,jm") (match_operand:SI 4 "const_0_to_255_operand")] UNSPEC_PCMPISTR)) (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X")) @@ -24499,6 +24535,7 @@ %vpcmpistri\t{%4, %3, %2|%2, %3, %4} %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" [(set_attr "type" "sselog") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "memory" "none,load,none,load") @@ -25983,23 +26020,25 @@ (define_insn "aesimc" [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm")] + (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xja")] UNSPEC_AESIMC))] "TARGET_AES" "%vaesimc\t{%1, %0|%0, %1}" [(set_attr "type" "sselog1") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "prefix" "maybe_vex") (set_attr "mode" "TI")]) (define_insn "aeskeygenassist" [(set (match_operand:V2DI 0 "register_operand" "=x") - (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xBm") + (unspec:V2DI [(match_operand:V2DI 1 "vector_operand" "xja") (match_operand:SI 2 "const_0_to_255_operand")] UNSPEC_AESKEYGENASSIST))] "TARGET_AES" "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sselog1") + (set_attr "gpr32" "0") (set_attr "prefix_extra" "1") (set_attr "length_immediate" "1") (set_attr "prefix" "maybe_vex") diff --git a/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c b/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c index 510213a..771bcb0 100644 --- a/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c +++ b/gcc/testsuite/gcc.target/i386/apx-legacy-insn-check-norex2.c @@ -45,13 +45,22 @@ typedef union DTYPE a[16]; } tmp_u; -__attribute__((target("sse4.2"))) +__attribute__((target("sse4.2,aes"))) void sse_test () { register tmp_u *tdst __asm__("%r16"); register tmp_u *src1 __asm__("%r17"); register tmp_u *src2 __asm__("%r18"); - + + src1->xi[0] = _mm_minpos_epu16 (src1->xi[1]); + src1->a[2] = _mm_testc_si128 (src1->xi[3], src2->xi[4]); + src1->xf[3] = _mm_round_ss (src1->xf[5], src2->xf[6], + _MM_FROUND_CUR_DIRECTION); + src1->xf[4] = _mm_round_ps (src1->xf[7], _MM_FROUND_CUR_DIRECTION); + src1->xd[0] = _mm_round_sd (src1->xd[2], src2->xd[3], + _MM_FROUND_CUR_DIRECTION); + src1->xd[1] = _mm_round_pd (src1->xd[4], _MM_FROUND_CUR_DIRECTION); + src1->xi[0] = _mm_hadd_epi16 (tdst->xi[2], src2->xi[3]); src1->xi[1] = _mm_hadd_epi32 (tdst->xi[0], src2->xi[1]); tdst->xi[2] = _mm_hadds_epi16 (src1->xi[4], src2->xi[5]); @@ -77,16 +86,33 @@ void sse_test () tdst->xi[1] = _mm_sign_epi8 (src1->xi[5], src2->xi[6]); tdst->xi[2] = _mm_sign_epi16 (src1->xi[7], src2->xi[0]); tdst->xi[3] = _mm_sign_epi32 (src1->xi[1], src2->xi[2]); + + tdst->a[2] = _mm_cmpestri (src1->xi[3], 16, src2->xi[4], 16, 0x0c); + tdst->xi[4] = _mm_cmpestrm (src1->xi[3], 16, src2->xi[4], 16, 0x20); + tdst->a[5] = _mm_cmpistri (src1->xi[5], src2->xi[6], 0x30); + tdst->xi[6] = _mm_cmpistrm (src1->xi[5], src2->xi[6], 0x40); + + tdst->xi[7] = _mm_aesimc_si128 (src1->xi[7]); + tdst->xi[0] = _mm_aeskeygenassist_si128 (src1->xi[1], 0x1b); } -__attribute__((target("avx2"))) +__attribute__((target("avx2,aes"))) void vex_test () { register tmp_u *tdst __asm__("%r16"); register tmp_u *src1 __asm__("%r17"); register tmp_u *src2 __asm__("%r18"); - + + src1->xi[0] = _mm_minpos_epu16 (src1->xi[1]); + src1->a[2] = _mm256_testc_si256 (src1->yi[2], src2->yi[3]); + src1->xf[3] = _mm_round_ss (src1->xf[5], src2->xf[6], + _MM_FROUND_CUR_DIRECTION); + src1->yf[4] = _mm256_round_ps (src1->yf[2], _MM_FROUND_CUR_DIRECTION); + src1->xd[0] = _mm_round_sd (src1->xd[2], src2->xd[3], + _MM_FROUND_CUR_DIRECTION); + src1->yd[1] = _mm256_round_pd (src1->yd[3], _MM_FROUND_CUR_DIRECTION); + src1->yi[1] = _mm256_hadd_epi16 (tdst->yi[2], src2->yi[3]); src1->yi[2] = _mm256_hadd_epi32 (tdst->yi[0], src2->yi[1]); tdst->yi[3] = _mm256_hadds_epi16 (src1->yi[1], src2->yi[2]); @@ -98,7 +124,6 @@ void vex_test () src1->yi[1] = _mm256_cmpgt_epi64 (tdst->yi[3], src2->yi[0]); tdst->yf[2] = _mm256_dp_ps (src1->yf[0], src2->yf[1], 0xbf); - tdst->xd[3] = _mm_dp_pd (src1->xd[0], src2->xd[1], 0xbf); tdst->yi[3] = _mm256_mpsadbw_epu8 (src1->yi[1], src2->yi[1], 0xc1); @@ -112,6 +137,14 @@ void vex_test () tdst->yi[2] = _mm256_sign_epi8 (src1->yi[0], src2->yi[1]); tdst->yi[3] = _mm256_sign_epi16 (src1->yi[2], src2->yi[3]); tdst->yi[0] = _mm256_sign_epi32 (src1->yi[0], src2->yi[1]); + + tdst->a[2] = _mm_cmpestri (src1->xi[3], 16, src2->xi[4], 16, 0x0c); + tdst->xi[4] = _mm_cmpestrm (src1->xi[3], 16, src2->xi[4], 16, 0x20); + tdst->a[5] = _mm_cmpistri (src1->xi[5], src2->xi[6], 0x30); + tdst->xi[6] = _mm_cmpistrm (src1->xi[5], src2->xi[6], 0x40); + + tdst->xi[7] = _mm_aesimc_si128 (src1->xi[7]); + tdst->xi[0] = _mm_aeskeygenassist_si128 (src1->xi[1], 0x1b); } /* { dg-final { scan-assembler-not "v?pcmpeqq\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ @@ -134,3 +167,15 @@ void vex_test () /* { dg-final { scan-assembler-not "v?psignb\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ /* { dg-final { scan-assembler-not "v?psignw\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ /* { dg-final { scan-assembler-not "v?psignd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?phminposuw\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?ptest\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?roundss\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?roundsd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?roundps\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?roundpd\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?pcmpestri\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?pcmpistri\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?pcmpestrm\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?pcmpistrm\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?aesimc\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ +/* { dg-final { scan-assembler-not "v?aeskeygenassist\[ \\t]+\\\.\\\*r\(1\[6-9\]\|2\[0-9\]|30\|31\)" } } */ |