aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Edelsohn <dje.gcc@gmail.com>2020-10-19 08:59:46 -0400
committerDavid Edelsohn <dje.gcc@gmail.com>2020-10-19 09:19:52 -0400
commit8d31eb8f17fa17f63d46651af1c69fb8eca2d04a (patch)
tree2cabaa3f9c501b5dfb3dac838d70967960f2bde7
parent2d2f4ffc97a8510e72a99ee106159aeae2627a42 (diff)
downloadgcc-8d31eb8f17fa17f63d46651af1c69fb8eca2d04a.zip
gcc-8d31eb8f17fa17f63d46651af1c69fb8eca2d04a.tar.gz
gcc-8d31eb8f17fa17f63d46651af1c69fb8eca2d04a.tar.bz2
rs6000: correct BE vextract_fp_from_short[hl] vperm masks
xvcvhpsp instruction converts a vector of bfloat16 half precision to single precision. The intrinsics vextract_fp_from_shorth and vextract_fp_from_shortl select the high or low four elements of a half precision vector to convert. The intrinsics use vperm to select the appropriate portion of the half precision vector and redistribute the values for the xvcvhpsp instruction. The big endian versions of the masks for the intrinsics were initialized wrong. This patch replaces the masks with the correct values. This corrects the failure of builtins-3-p9-runnable.c testcase on big endian systems. Bootstrapped powerpc-ibm-aix7.2.3.0 Power9. gcc/ChangeLog: * config/rs6000/vsx.md (vextract_fp_from_shorth): Fix vals_be. (vextract_fp_from_shortl) Same.
-rw-r--r--gcc/config/rs6000/vsx.md4
1 files changed, 2 insertions, 2 deletions
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4ff5245..c023bc0 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5659,7 +5659,7 @@
{
int i;
int vals_le[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
- int vals_be[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
+ int vals_be[16] = {0, 0, 0, 1, 0, 0, 2, 3, 0, 0, 4, 5, 0, 0, 6, 7};
rtx rvals[16];
rtx mask = gen_reg_rtx (V16QImode);
@@ -5693,7 +5693,7 @@
"TARGET_P9_VECTOR"
{
int vals_le[16] = {7, 6, 0, 0, 5, 4, 0, 0, 3, 2, 0, 0, 1, 0, 0, 0};
- int vals_be[16] = {15, 14, 0, 0, 13, 12, 0, 0, 11, 10, 0, 0, 9, 8, 0, 0};
+ int vals_be[16] = {0, 0, 8, 9, 0, 0, 10, 11, 0, 0, 12, 13, 0, 0, 14, 15};
int i;
rtx rvals[16];