diff options
author | Steven Munroe <munroesj@gcc.gnu.org> | 2017-05-12 18:34:44 +0000 |
---|---|---|
committer | Steven Munroe <munroesj@gcc.gnu.org> | 2017-05-12 18:34:44 +0000 |
commit | b76f15500970fd9b00ae4c6979fbe1b4ab7ed126 (patch) | |
tree | a1b61fb8d8159e84865f0ec3c9036d0dd3de5a76 | |
parent | 2a3fa75aac85ff2583799db0204c20e2b8ca8ba8 (diff) | |
download | gcc-b76f15500970fd9b00ae4c6979fbe1b4ab7ed126.zip gcc-b76f15500970fd9b00ae4c6979fbe1b4ab7ed126.tar.gz gcc-b76f15500970fd9b00ae4c6979fbe1b4ab7ed126.tar.bz2 |
config.gcc (powerpc*-*-*): Add bmi2intrin.h, bmiintrin.h, and x86intrin.h
[gcc]
2017-05-12 Steven Munroe <munroesj@gcc.gnu.org>
* config.gcc (powerpc*-*-*): Add bmi2intrin.h, bmiintrin.h,
and x86intrin.h
* config/rs6000/bmiintrin.h: New file.
* config/rs6000/bmi2intrin.h: New file.
* config/rs6000/x86intrin.h: New file.
[gcc/testsuite]
2017-05-12 Steven Munroe <munroesj@gcc.gnu.org>
* gcc.target/powerpc/bmi-andn-1.c: New file
* gcc.target/powerpc/bmi-andn-2.c: New file.
* gcc.target/powerpc/bmi-bextr-1.c: New file.
* gcc.target/powerpc/bmi-bextr-2.c: New file.
* gcc.target/powerpc/bmi-bextr-4.c: New file.
* gcc.target/powerpc/bmi-bextr-5.c: New file.
* gcc.target/powerpc/bmi-blsi-1.c: New file.
* gcc.target/powerpc/bmi-blsi-2.c: New file.
* gcc.target/powerpc/bmi-blsmsk-1.c: new file.
* gcc.target/powerpc/bmi-blsmsk-2.c: New file.
* gcc.target/powerpc/bmi-blsr-1.c: New file.
* gcc.target/powerpc/bmi-blsr-2.c: New File.
* gcc.target/powerpc/bmi-check.h: New File.
* gcc.target/powerpc/bmi-tzcnt-1.c: new file.
* gcc.target/powerpc/bmi-tzcnt-2.c: New file.
* gcc.target/powerpc/bmi2-bzhi32-1.c: New file.
* gcc.target/powerpc/bmi2-bzhi64-1.c: New file.
* gcc.target/powerpc/bmi2-bzhi64-1a.c: New file.
* gcc.target/powerpc/bmi2-check.h: New file.
* gcc.target/powerpc/bmi2-mulx32-1.c: New file.
* gcc.target/powerpc/bmi2-mulx32-2.c: New file.
* gcc.target/powerpc/bmi2-mulx64-1.c: New file.
* gcc.target/powerpc/bmi2-mulx64-2.c: New file.
* gcc.target/powerpc/bmi2-pdep32-1.c: New file.
* gcc.target/powerpc/bmi2-pdep64-1.c: New file.
* gcc.target/powerpc/bmi2-pext32-1.c: New File.
* gcc.target/powerpc/bmi2-pext64-1.c: New file.
* gcc.target/powerpc/bmi2-pext64-1a.c: New File.
From-SVN: r247988
34 files changed, 1516 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6cd0666..99f1648 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,11 @@ +2017-05-12 Steven Munroe <munroesj@gcc.gnu.org> + + * config.gcc (powerpc*-*-*): Add bmi2intrin.h, bmiintrin.h, + and x86intrin.h + * config/rs6000/bmiintrin.h: New file. + * config/rs6000/bmi2intrin.h: New file. + * config/rs6000/x86intrin.h: New file. + 2017-05-12 Jeff Law <law@redhat.com> * tree-vrp.c (vrp_dom_walker::before_dom_childern): Push unwinding diff --git a/gcc/config.gcc b/gcc/config.gcc index e8aaf2d..8ce94d0 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -444,7 +444,10 @@ nvptx-*-*) ;; powerpc*-*-*) cpu_type=rs6000 - extra_headers="ppc-asm.h altivec.h spe.h ppu_intrinsics.h paired.h spu2vmx.h vec_types.h si2vmx.h htmintrin.h htmxlintrin.h" + extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h" + extra_headers="${extra_headers} bmi2intrin.h bmiintrin.h x86intrin.h" + extra_headers="${extra_headers} ppu_intrinsics.h spu2vmx.h vec_types.h si2vmx.h" + extra_headers="${extra_headers} spe.h paired.h" case x$with_cpu in xpowerpc64|xdefault64|x6[23]0|x970|xG5|xpower[3456789]|xpower6x|xrs64a|xcell|xa2|xe500mc64|xe5500|xe6500) cpu_is_64bit=yes diff --git a/gcc/config/rs6000/bmi2intrin.h b/gcc/config/rs6000/bmi2intrin.h new file mode 100644 index 0000000..fc634c1 --- /dev/null +++ b/gcc/config/rs6000/bmi2intrin.h @@ -0,0 +1,169 @@ +/* Copyright (C) 2011-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ + +#if !defined _X86INTRIN_H_INCLUDED +# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _BMI2INTRIN_H_INCLUDED +#define _BMI2INTRIN_H_INCLUDED + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bzhi_u32 (unsigned int __X, unsigned int __Y) +{ + return ((__X << (32 - __Y)) >> (32 - __Y)); +} + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P) +{ + unsigned long long __res = (unsigned long long) __X * __Y; + *__P = (unsigned int) (__res >> 32); + return (unsigned int) __res; +} + +#ifdef __PPC64__ +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bzhi_u64 (unsigned long long __X, unsigned long long __Y) +{ + return ((__X << (64 - __Y)) >> (64 - __Y)); +} + +/* __int128 requires base 64-bit. */ +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mulx_u64 (unsigned long long __X, unsigned long long __Y, + unsigned long long *__P) +{ + unsigned __int128 __res = (unsigned __int128) __X * __Y; + *__P = (unsigned long long) (__res >> 64); + return (unsigned long long) __res; +} + +#ifdef _ARCH_PWR7 +/* popcount and bpermd require power7 minimum. */ +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pdep_u64 (unsigned long long __X, unsigned long long __M) +{ + unsigned long result = 0x0UL; + const unsigned long mask = 0x8000000000000000UL; + unsigned long m = __M; + unsigned long c, t; + unsigned long p; + + /* The pop-count of the mask gives the number of the bits from + source to process. This is also needed to shift bits from the + source into the correct position for the result. */ + p = 64 - __builtin_popcountl (__M); + + /* The loop is for the number of '1' bits in the mask and clearing + each mask bit as it is processed. */ + while (m != 0) + { + c = __builtin_clzl (m); + t = __X << (p - c); + m ^= (mask >> c); + result |= (t & (mask >> c)); + p++; + } + return (result); +} + +extern __inline unsigned long long +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pext_u64 (unsigned long long __X, unsigned long long __M) +{ + unsigned long p = 0x4040404040404040UL; // initial bit permute control + const unsigned long mask = 0x8000000000000000UL; + unsigned long m = __M; + unsigned long c; + unsigned long result; + + /* if the mask is constant and selects 8 bits or less we can use + the Power8 Bit permute instruction. */ + if (__builtin_constant_p (__M) && (__builtin_popcountl (__M) <= 8)) + { + /* Also if the pext mask is constant, then the popcount is + constant, we can evaluate the following loop at compile + time and use a constant bit permute vector. */ + for (long i = 0; i < __builtin_popcountl (__M); i++) + { + c = __builtin_clzl (m); + p = (p << 8) | c; + m ^= (mask >> c); + } + result = __builtin_bpermd (p, __X); + } + else + { + p = 64 - __builtin_popcountl (__M); + result = 0; + /* We could a use a for loop here, but that combined with + -funroll-loops can expand to a lot of code. The while + loop avoids unrolling and the compiler commons the xor + from clearing the mask bit with the (m != 0) test. The + result is a more compact loop setup and body. */ + while (m != 0) + { + unsigned long t; + c = __builtin_clzl (m); + t = (__X & (mask >> c)) >> (p - c); + m ^= (mask >> c); + result |= (t); + p++; + } + } + return (result); +} + +/* these 32-bit implementations depend on 64-bit pdep/pext + which depend on _ARCH_PWR7. */ +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pdep_u32 (unsigned int __X, unsigned int __Y) +{ + return _pdep_u64 (__X, __Y); +} + +extern __inline unsigned int +__attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_pext_u32 (unsigned int __X, unsigned int __Y) +{ + return _pext_u64 (__X, __Y); +} +#endif /* _ARCH_PWR7 */ +#endif /* __PPC64__ */ + +#endif /* _BMI2INTRIN_H_INCLUDED */ diff --git a/gcc/config/rs6000/bmiintrin.h b/gcc/config/rs6000/bmiintrin.h new file mode 100644 index 0000000..28671ca --- /dev/null +++ b/gcc/config/rs6000/bmiintrin.h @@ -0,0 +1,187 @@ +/* Copyright (C) 2010-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ + +#if !defined _X86INTRIN_H_INCLUDED +# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead." +#endif + +#ifndef _BMIINTRIN_H_INCLUDED +#define _BMIINTRIN_H_INCLUDED + +extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u16 (unsigned short __X) +{ + return __builtin_ctz (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__andn_u32 (unsigned int __X, unsigned int __Y) +{ + return (~__X & __Y); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bextr_u32 (unsigned int __X, unsigned int __P, unsigned int __L) +{ + return ((__X << (32 - (__L + __P))) >> (32 - __L)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextr_u32 (unsigned int __X, unsigned int __Y) +{ + unsigned int __P, __L; + __P = __Y & 0xFF; + __L = (__Y >> 8) & 0xFF; + return (_bextr_u32 (__X, __P, __L)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsi_u32 (unsigned int __X) +{ + return (__X & -__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u32 (unsigned int __X) +{ + return __blsi_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsmsk_u32 (unsigned int __X) +{ + return (__X ^ (__X - 1)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u32 (unsigned int __X) +{ + return __blsmsk_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsr_u32 (unsigned int __X) +{ + return (__X & (__X - 1)); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u32 (unsigned int __X) +{ + return __blsr_u32 (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u32 (unsigned int __X) +{ + return __builtin_ctz (__X); +} + +extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u32 (unsigned int __X) +{ + return __builtin_ctz (__X); +} + +/* use the 64-bit shift, rotate, and count leading zeros instructions + for long long. */ +#ifdef __PPC64__ +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__andn_u64 (unsigned long long __X, unsigned long long __Y) +{ + return (~__X & __Y); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_bextr_u64 (unsigned long long __X, unsigned int __P, unsigned int __L) +{ + return ((__X << (64 - (__L + __P))) >> (64 - __L)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__bextr_u64 (unsigned long long __X, unsigned long long __Y) +{ + unsigned int __P, __L; + __P = __Y & 0xFF; + __L = (__Y & 0xFF00) >> 8; + return (_bextr_u64 (__X, __P, __L)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsi_u64 (unsigned long long __X) +{ + return __X & -__X; +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsi_u64 (unsigned long long __X) +{ + return __blsi_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsmsk_u64 (unsigned long long __X) +{ + return (__X ^ (__X - 1)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsmsk_u64 (unsigned long long __X) +{ + return __blsmsk_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__blsr_u64 (unsigned long long __X) +{ + return (__X & (__X - 1)); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_blsr_u64 (unsigned long long __X) +{ + return __blsr_u64 (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +__tzcnt_u64 (unsigned long long __X) +{ + return __builtin_ctzll (__X); +} + +extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_tzcnt_u64 (unsigned long long __X) +{ + return __builtin_ctzll (__X); +} +#endif /* __PPC64__ */ + +#endif /* _BMIINTRIN_H_INCLUDED */ diff --git a/gcc/config/rs6000/x86intrin.h b/gcc/config/rs6000/x86intrin.h new file mode 100644 index 0000000..4aa33fd --- /dev/null +++ b/gcc/config/rs6000/x86intrin.h @@ -0,0 +1,43 @@ +/* Copyright (C) 2008-2017 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header is distributed to simplify porting x86_64 code that + makes explicit use of Intel intrinsics to powerpc64le. + It is the user's responsibility to determine if the results are + acceptable and make additional changes as necessary. + Note that much code that uses Intel intrinsics can be rewritten in + standard C or GNU C extensions, which are more portable and better + optimized across multiple targets. */ +#warning "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this warning." +#endif + +#ifndef _X86INTRIN_H_INCLUDED +#define _X86INTRIN_H_INCLUDED + +#include <bmiintrin.h> + +#include <bmi2intrin.h> + + +#endif /* _X86INTRIN_H_INCLUDED */ diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c00c162..3b56e73 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,34 @@ +2017-05-12 Steven Munroe <munroesj@gcc.gnu.org> + + * gcc.target/powerpc/bmi-andn-1.c: New file + * gcc.target/powerpc/bmi-andn-2.c: New file. + * gcc.target/powerpc/bmi-bextr-1.c: New file. + * gcc.target/powerpc/bmi-bextr-2.c: New file. + * gcc.target/powerpc/bmi-bextr-4.c: New file. + * gcc.target/powerpc/bmi-bextr-5.c: New file. + * gcc.target/powerpc/bmi-blsi-1.c: New file. + * gcc.target/powerpc/bmi-blsi-2.c: New file. + * gcc.target/powerpc/bmi-blsmsk-1.c: new file. + * gcc.target/powerpc/bmi-blsmsk-2.c: New file. + * gcc.target/powerpc/bmi-blsr-1.c: New file. + * gcc.target/powerpc/bmi-blsr-2.c: New File. + * gcc.target/powerpc/bmi-check.h: New File. + * gcc.target/powerpc/bmi-tzcnt-1.c: new file. + * gcc.target/powerpc/bmi-tzcnt-2.c: New file. + * gcc.target/powerpc/bmi2-bzhi32-1.c: New file. + * gcc.target/powerpc/bmi2-bzhi64-1.c: New file. + * gcc.target/powerpc/bmi2-bzhi64-1a.c: New file. + * gcc.target/powerpc/bmi2-check.h: New file. + * gcc.target/powerpc/bmi2-mulx32-1.c: New file. + * gcc.target/powerpc/bmi2-mulx32-2.c: New file. + * gcc.target/powerpc/bmi2-mulx64-1.c: New file. + * gcc.target/powerpc/bmi2-mulx64-2.c: New file. + * gcc.target/powerpc/bmi2-pdep32-1.c: New file. + * gcc.target/powerpc/bmi2-pdep64-1.c: New file. + * gcc.target/powerpc/bmi2-pext32-1.c: New File. + * gcc.target/powerpc/bmi2-pext64-1.c: New file. + * gcc.target/powerpc/bmi2-pext64-1a.c: New File. + 2017-05-12 Paolo Carlini <paolo.carlini@oracle.com> PR c++/60430 diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-andn-1.c b/gcc/testsuite/gcc.target/powerpc/bmi-andn-1.c new file mode 100644 index 0000000..6be887a --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-andn-1.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64" } */ +/* { dg-require-effective-target lp64 } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +long long calc_andn_u64 (long long src1, + long long src2, + long long dummy) +{ + return (~src1 + dummy) & (src2); +} + +static void +bmi_test() +{ + unsigned i; + + long long src = 0xfacec0ffeefacec0; + long long res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_andn_u64 (src, src+i, 0); + res = __andn_u64 (src, src+i); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-andn-2.c b/gcc/testsuite/gcc.target/powerpc/bmi-andn-2.c new file mode 100644 index 0000000..c36a520 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-andn-2.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64" } */ +/* { dg-require-effective-target lp64 } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +long long calc_andn_u32 (int src1, int src2, int dummy) +{ + return (~src1+dummy) & (src2); +} + +static void +bmi_test() +{ + unsigned i; + + int src = 0xfacec0ff; + int res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_andn_u32 (src, src+i, 0); + res = __andn_u32 (src, src+i); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-bextr-1.c b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-1.c new file mode 100644 index 0000000..9e593eb --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-1.c @@ -0,0 +1,50 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O2 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +long long calc_bextr_u64 (unsigned long long src1, + unsigned long long src2) +{ + long long res = 0; + unsigned char start = (src2 & 0xff); + unsigned char len = (int) ((src2 >> 8) & 0xff); + if (start < 64) { + unsigned i; + unsigned last = (start+len) < 64 ? start+len : 64; + + src1 >>= start; + for (i=start; i<last; ++i) { + res |= (src1 & 1) << (i-start); + src1 >>= 1; + } + } + + return res; +} + +static void +bmi_test () +{ + unsigned i; + unsigned char start, len; + unsigned long long src1 = 0xfacec0ffeefacec0; + unsigned long long res, res_ref, src2; + + for (i=0; i<5; ++i) { + start = (i * 1983) % 64; + len = (i + (i * 1983)) % 64; + + src1 = src1 * 3; + src2 = start | (((unsigned long long)len) << 8); + + res_ref = calc_bextr_u64 (src1, src2); + res = __bextr_u64 (src1, src2); + + if (res != res_ref) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-bextr-2.c b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-2.c new file mode 100644 index 0000000..7afe41e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-2.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +unsigned calc_bextr_u32 (unsigned src1, unsigned src2) +{ + unsigned res = 0; + unsigned char start = (src2 & 0xff); + unsigned char len = (int) ((src2 >> 8) & 0xff); + if (start < 32) { + unsigned i; + unsigned last = (start+len) < 32 ? start+len : 32; + + src1 >>= start; + for (i=start; i<last; ++i) { + res |= (src1 & 1) << (i-start); + src1 >>= 1; + } + } + + return res; +} + +static void +bmi_test () +{ + unsigned i; + unsigned char start, len; + unsigned src1 = 0xfacec0ff; + unsigned res, res_ref, src2; + + for (i=0; i<5; ++i) { + start = (i * 1983) % 32; + len = (i + (i * 1983)) % 32; + + src1 = src1 * 3; + src2 = start | (((unsigned)len) << 8); + + res_ref = calc_bextr_u32 (src1, src2); + res = __bextr_u32 (src1, src2); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-bextr-4.c b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-4.c new file mode 100644 index 0000000..23b9abf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-4.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +unsigned calc_bextr_u32 (unsigned src1, unsigned src2) +{ + unsigned res = 0; + unsigned char start = (src2 & 0xff); + unsigned char len = (int) ((src2 >> 8) & 0xff); + if (start < 32) { + unsigned i; + unsigned last = (start+len) < 32 ? start+len : 32; + + src1 >>= start; + for (i=start; i<last; ++i) { + res |= (src1 & 1) << (i-start); + src1 >>= 1; + } + } + + return res; +} + +static void +bmi_test () +{ + unsigned i; + unsigned char start, len; + unsigned src1 = 0xfacec0ff; + unsigned res, res_ref, src2; + + for (i=0; i<5; ++i) { + start = i * 4; + len = i * 4; + + src1 = src1 * 3; + src2 = (start & 0xff) | ((len & 0xff) << 8); + + res_ref = calc_bextr_u32 (src1, src2); + res = _bextr_u32 (src1, start, len); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-bextr-5.c b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-5.c new file mode 100644 index 0000000..96101d1 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-bextr-5.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +long long calc_bextr_u64 (unsigned long long src1, + unsigned long long src2) +{ + long long res = 0; + unsigned char start = (src2 & 0xff); + unsigned char len = (int) ((src2 >> 8) & 0xff); + if (start < 64) { + unsigned i; + unsigned last = (start+len) < 64 ? start+len : 64; + + src1 >>= start; + for (i=start; i<last; ++i) { + res |= (src1 & 1) << (i-start); + src1 >>= 1; + } + } + + return res; +} + +static void +bmi_test () +{ + unsigned i; + unsigned char start, len; + unsigned long long src1 = 0xfacec0ffeefacec0; + unsigned long long res, res_ref, src2; + + for (i=0; i<5; ++i) { + start = i * 4; + len = i * 3; + src1 = src1 * 3; + src2 = (start & 0xff) | ((len & 0xff) << 8); + + res_ref = calc_bextr_u64 (src1, src2); + res = _bextr_u64 (src1, start, len); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-blsi-1.c b/gcc/testsuite/gcc.target/powerpc/bmi-blsi-1.c new file mode 100644 index 0000000..4d42ca6 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-blsi-1.c @@ -0,0 +1,32 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +/* To fool the compiler, so it does not generate blsi here. */ +long long calc_blsi_u64 (long long src1, long long src2) +{ + return (-src1) & (src2); +} + +static void +bmi_test() +{ + unsigned i; + + long long src = 0xfacec0ffeefacec0; + long long res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_blsi_u64 (src, src); + res = __blsi_u64 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-blsi-2.c b/gcc/testsuite/gcc.target/powerpc/bmi-blsi-2.c new file mode 100644 index 0000000..6e3ccef --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-blsi-2.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +/* To fool the compiler, so it does not generate blsi here. */ +int calc_blsi_u32 (int src1, int src2) +{ + return (-src1) & (src2); +} + +static void +bmi_test() +{ + unsigned i; + int src = 0xfacec0ff; + int res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_blsi_u32 (src, src); + res = __blsi_u32 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-blsmsk-1.c b/gcc/testsuite/gcc.target/powerpc/bmi-blsmsk-1.c new file mode 100644 index 0000000..a1b86ee --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-blsmsk-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +/* Trick compiler in order not to generate target insn here. */ +long long calc_blsmsk_u64 (long long src1, long long src2) +{ + return (src1-1) ^ (src2); +} + +static void +bmi_test () +{ + unsigned i; + long long src = 0xfacec0ffeefacec0; + long long res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_blsmsk_u64 (src, src); + res = __blsmsk_u64 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-blsmsk-2.c b/gcc/testsuite/gcc.target/powerpc/bmi-blsmsk-2.c new file mode 100644 index 0000000..5c4a359 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-blsmsk-2.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +/* Trick compiler in order not to generate target insn here. */ +int calc_blsmsk_u32 (int src1, int src2) +{ + return (src1-1) ^ (src2); +} + +static void +bmi_test () +{ + unsigned i; + int src = 0xfacec0ff; + int res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_blsmsk_u32 (src, src); + res = __blsmsk_u32 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-blsr-1.c b/gcc/testsuite/gcc.target/powerpc/bmi-blsr-1.c new file mode 100644 index 0000000..d59d41b --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-blsr-1.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +long long calc_blsr_u64 (long long src1, long long src2) +{ + return (src1-1) & (src2); +} + +static void +bmi_test() +{ + unsigned i; + long long src = 0xfacec0ffeefacec0; + long long res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_blsr_u64 (src, src); + res = __blsr_u64 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-blsr-2.c b/gcc/testsuite/gcc.target/powerpc/bmi-blsr-2.c new file mode 100644 index 0000000..bc80e42 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-blsr-2.c @@ -0,0 +1,30 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +int calc_blsr_u32 (int src1, int src2) +{ + return (src1-1) & (src2); +} + +static void +bmi_test () +{ + unsigned i; + int src = 0xfacec0ff; + int res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_blsr_u32 (src, src); + res = __blsr_u32 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-check.h b/gcc/testsuite/gcc.target/powerpc/bmi-check.h new file mode 100644 index 0000000..35a2ac2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-check.h @@ -0,0 +1,30 @@ +#include <stdio.h> +#include <stdlib.h> + +static void bmi_test (void); + +static void +__attribute__ ((noinline)) +do_test (void) +{ + bmi_test (); +} + +int +main () +{ + /* Need 64-bit for 64-bit longs as single instruction. */ + if ( __builtin_cpu_supports ("ppc64") ) + { + do_test (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + } +#ifdef DEBUG + else + printf ("SKIPPED\n"); +#endif + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-tzcnt-1.c b/gcc/testsuite/gcc.target/powerpc/bmi-tzcnt-1.c new file mode 100644 index 0000000..572fa0c --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-tzcnt-1.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +long long calc_tzcnt_u64 (long long src) +{ + int i; + int res = 0; + + while ( (res<64) && ((src&1) == 0)) { + ++res; + src >>= 1; + } + + return res; +} + +static void +bmi_test () +{ + unsigned i; + long long src = 0xfacec0ffeefacec0; + long long res, res_ref; + + for (i=0; i<5; ++i) { + src = (i + src) << i; + + res_ref = calc_tzcnt_u64 (src); + res = __tzcnt_u64 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi-tzcnt-2.c b/gcc/testsuite/gcc.target/powerpc/bmi-tzcnt-2.c new file mode 100644 index 0000000..37401b0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi-tzcnt-2.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-require-effective-target lp64 } */ +/* { dg-options "-O3 -m64 -fno-inline" } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi-check.h" + +int calc_tzcnt_u32 (int src) +{ + int i; + int res = 0; + + while ( (res<32) && ((src&1) == 0)) { + ++res; + src >>= 1; + } + return res; +} + +static void +bmi_test () +{ + unsigned i; + int src = 0xfacec0ff; + int res, res_ref; + + for (i=0; i<5; ++i) { + src = i + (src << i); + + res_ref = calc_tzcnt_u32 (src); + res = __tzcnt_u32 (src); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi32-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi32-1.c new file mode 100644 index 0000000..51a0e89 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi32-1.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_bzhi_u32 (unsigned a, int l) +{ + unsigned res = a; + int i; + for (i = 0; i < 32 - l; ++i) + res &= ~(1 << (31 - i)); + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7ace0f; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_bzhi_u32 (src, i * 2); + res = _bzhi_u32 (src, i * 2); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi64-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi64-1.c new file mode 100644 index 0000000..49804d9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi64-1.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_bzhi_u64 (unsigned long long a, int l) +{ + unsigned long long res = a; + int i; + for (i = 0; i < 64 - l; ++i) + res &= ~(1LL << (63 - i)); + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned long long src = 0xce7ace0ce7ace0ff; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_bzhi_u64 (src, i * 2); + res = _bzhi_u64 (src, i * 2); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi64-1a.c b/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi64-1a.c new file mode 100644 index 0000000..3118198 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-bzhi64-1a.c @@ -0,0 +1,30 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> + +unsigned long long +test__bzhi_u64_group (unsigned long long a) +{ + /* bzhi is implemented in source as shift left then shift right + to clear the high order bits. + For the case where the starting index is const, the compiler + should reduces this to a single Rotate Left Doubleword + Immediate then Clear Left (rldicl) instruction. */ + unsigned long long res; + res = _bzhi_u64 (a, 8); + res += _bzhi_u64 (a, 16); + res += _bzhi_u64 (a, 24); + res += _bzhi_u64 (a, 32); + res += _bzhi_u64 (a, 40); + res += _bzhi_u64 (a, 48); + return (res); +} +/* the resulting assembler should have 6 X rldicl and no sld or + srd instructions. */ + +/* { dg-final { scan-assembler-times "rldicl" 6 } } */ +/* { dg-final { scan-assembler-not "sld" } } */ +/* { dg-final { scan-assembler-not "srd" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-check.h b/gcc/testsuite/gcc.target/powerpc/bmi2-check.h new file mode 100644 index 0000000..fa7d4c0 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-check.h @@ -0,0 +1,33 @@ +#include <stdio.h> +#include <stdlib.h> + +static void bmi2_test (void); + +static void +__attribute__ ((noinline)) +do_test (void) +{ + bmi2_test (); +} + +int +main () +{ + /* The BMI2 test for pext test requires the Bit Permute doubleword + (bpermd) instruction added in PowerISA 2.06 along with the VSX + facility. So we can test for arch_2_06. */ + if ( __builtin_cpu_supports ("arch_2_06") ) + { + do_test (); +#ifdef DEBUG + printf ("PASSED\n"); +#endif + } +#ifdef DEBUG + else + printf ("SKIPPED\n"); +#endif + + return 0; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-mulx32-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx32-1.c new file mode 100644 index 0000000..5b46577 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx32-1.c @@ -0,0 +1,49 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_mul_u32 (unsigned volatile a, unsigned b) +{ + unsigned long long res = 0; + int i; + for (i = 0; i < b; ++i) + res += a; + + return res; +} + +__attribute__((noinline)) +unsigned long long +gen_mulx (unsigned a, unsigned b) +{ + unsigned long long res; + + res = (unsigned long long)a * b; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned a = 0xce7ace0; + unsigned b = 0xfacefff; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + a = a * (i + 1); + b = b / (i + 1); + + res_ref = calc_mul_u32 (a, b); + res = gen_mulx (a, b); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-mulx32-2.c b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx32-2.c new file mode 100644 index 0000000..6001d8e --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx32-2.c @@ -0,0 +1,48 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_mul_u32 (unsigned volatile a, unsigned b) +{ + unsigned long long res = 0; + int i; + for (i = 0; i < b; ++i) + res += a; + + return res; +} + +__attribute__((noinline)) +unsigned calc_mulx_u32 (unsigned x, unsigned y, unsigned *res_h) +{ + return (unsigned) _mulx_u32 (x, y, res_h); +} + +static void +bmi2_test () +{ + unsigned i; + unsigned a = 0xce7ace0; + unsigned b = 0xfacefff; + unsigned res_l, res_h; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + a = a * (i + 1); + b = b / (i + 1); + + res_ref = calc_mul_u32 (a, b); + res_l = calc_mulx_u32 (a, b, &res_h); + + res = ((unsigned long long) res_h << 32) | res_l; + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-mulx64-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx64-1.c new file mode 100644 index 0000000..2e449c2 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx64-1.c @@ -0,0 +1,38 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned __int128 +calc_mul_u64 (unsigned long long volatile a, unsigned long long b) +{ + unsigned __int128 res = 0; + int i; + for (i = 0; i < b; ++i) + res += (unsigned __int128) a; + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned long long a = 0xce7ace0ce7ace0; + unsigned long long b = 0xface; + unsigned __int128 res, res_ref; + + for (i=0; i<5; ++i) { + a = a * (i + 1); + b = b / (i + 1); + + res_ref = calc_mul_u64 (a, b); + res = (unsigned __int128) a * b; + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-mulx64-2.c b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx64-2.c new file mode 100644 index 0000000..5aa3f80 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-mulx64-2.c @@ -0,0 +1,53 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned __int128 +calc_mul_u64 (unsigned long long volatile a, unsigned long long b) +{ + unsigned __int128 res = 0; + int i; + for (i = 0; i < b; ++i) + res += (unsigned __int128) a; + + return res; +} + +__attribute__((noinline)) +unsigned long long +calc_mulx_u64 (unsigned long long x, + unsigned long long y, + unsigned long long *res_h) +{ + return _mulx_u64 (x, y, res_h); +} + + +static void +bmi2_test () +{ + unsigned i; + unsigned long long a = 0xce7ace0ce7ace0; + unsigned long long b = 0xface; + unsigned long long res_l, res_h; + unsigned __int128 res, res_ref; + + for (i=0; i<5; ++i) { + a = a * (i + 1); + b = b / (i + 1); + + res_ref = calc_mul_u64 (a, b); + + res_l = calc_mulx_u64 (a, b, &res_h); + + res = ((unsigned __int128) res_h << 64) | res_l; + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-pdep32-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-pdep32-1.c new file mode 100644 index 0000000..b5fb327 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-pdep32-1.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_pdep_u32 (unsigned a, int mask) +{ + unsigned res = 0; + int i, k = 0; + + for (i = 0; i < 32; ++i) + if (mask & (1 << i)) { + res |= ((a & (1 << k)) >> k) << i; + ++k; + } + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7acc; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pdep_u32 (src, i * 3); + res = _pdep_u32 (src, i * 3); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-pdep64-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-pdep64-1.c new file mode 100644 index 0000000..6dddb64 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-pdep64-1.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_pdep_u64 (unsigned long long a, unsigned long long mask) +{ + unsigned long long res = 0; + unsigned long long i, k = 0; + + for (i = 0; i < 64; ++i) + if (mask & (1LL << i)) { + res |= ((a & (1LL << k)) >> k) << i; + ++k; + } + return res; +} + +static +void +bmi2_test () +{ + unsigned long long i; + unsigned long long src = 0xce7acce7acce7ac; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pdep_u64 (src, ~(i * 3)); + res = _pdep_u64 (src, ~(i * 3)); + + if (res != res_ref) + abort (); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-pext32-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-pext32-1.c new file mode 100644 index 0000000..9e10c73 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-pext32-1.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned +calc_pext_u32 (unsigned a, unsigned mask) +{ + unsigned res = 0; + int i, k = 0; + + for (i = 0; i < 32; ++i) + if (mask & (1 << i)) { + res |= ((a & (1 << i)) >> i) << k; + ++k; + } + + return res; +} + +static void +bmi2_test () +{ + unsigned i; + unsigned src = 0xce7acc; + unsigned res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pext_u32 (src, ~(i * 3)); + res = _pext_u32 (src, ~(i * 3)); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-pext64-1.c b/gcc/testsuite/gcc.target/powerpc/bmi2-pext64-1.c new file mode 100644 index 0000000..76fc9ab --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-pext64-1.c @@ -0,0 +1,41 @@ +/* { dg-do run } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> +#include "bmi2-check.h" + +__attribute__((noinline)) +unsigned long long +calc_pext_u64 (unsigned long long a, unsigned long long mask) +{ + unsigned long long res = 0; + int i, k = 0; + + for (i = 0; i < 64; ++i) + if (mask & (1LL << i)) { + res |= ((a & (1LL << i)) >> i) << k; + ++k; + } + + return res; +} + +static void +bmi2_test () +{ + unsigned long long i; + unsigned long long src = 0xce7acce7acce7ac; + unsigned long long res, res_ref; + + for (i = 0; i < 5; ++i) { + src = src * (i + 1); + + res_ref = calc_pext_u64 (src, ~(i * 3)); + res = _pext_u64 (src, ~(i * 3)); + + if (res != res_ref) + abort(); + } +} diff --git a/gcc/testsuite/gcc.target/powerpc/bmi2-pext64-1a.c b/gcc/testsuite/gcc.target/powerpc/bmi2-pext64-1a.c new file mode 100644 index 0000000..a958d10 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/bmi2-pext64-1a.c @@ -0,0 +1,34 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -m64 -mcpu=power7" } */ +/* { dg-require-effective-target powerpc_vsx_ok } */ + +#define NO_WARN_X86_INTRINSICS 1 +#include <x86intrin.h> + +unsigned long long +test__pexp_cmask_u64 (unsigned long long a[4]) +{ + /* The _pext implmentation is nominally a popcount of the mask, + followed by a loop using count leading zeros to find the + next bit to process. + If the mask is a const, the popcount should be folded and + the constant propagation should eliminate the mask + generation loop and produce a single constant bpermd permute + control word. + This test verifies that the compiler is replacing the mask + popcount and loop with a const bperm control and generating + the bpermd for this case. */ + const unsigned long mask = 0x00000000100000a4UL; + unsigned long res; + res = _pext_u64 (a[0], mask); + res = (res << 8) | _pext_u64 (a[1], mask); + res = (res << 8) | _pext_u64 (a[2], mask); + res = (res << 8) | _pext_u64 (a[3], mask); + return (res); +} +/* the resulting assembler should have 4 X bpermd and no popcntd or + cntlzd instructions. */ + +/* { dg-final { scan-assembler-times "bpermd" 4 } } */ +/* { dg-final { scan-assembler-not "popcntd" } } */ +/* { dg-final { scan-assembler-not "cntlzd" } } */ |