aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorPeter Bergner <bergner@linux.ibm.com>2020-07-22 11:44:35 -0500
committerPeter Bergner <bergner@linux.ibm.com>2020-07-22 13:36:28 -0500
commitae575662833d70cb7d74b9538096c7becc79af14 (patch)
tree54c823e58d7e47cc8d6752ac099a147bddabf64e /gcc
parent6e1e0decc9e17a4283d1b5508e892be5215b8ab9 (diff)
downloadgcc-ae575662833d70cb7d74b9538096c7becc79af14.zip
gcc-ae575662833d70cb7d74b9538096c7becc79af14.tar.gz
gcc-ae575662833d70cb7d74b9538096c7becc79af14.tar.bz2
rs6000: __builtin_mma_disassemble_acc() doesn't store elements correctly in LE mode
PR96236 shows a problem where we don't correctly store our 512-bit accumulators correctly in little-endian mode. The patch below detects when we're doing a little-endian memory access and stores to the correct memory locations. 2020-07-22 Peter Bergner <bergner@linux.ibm.com> gcc/ PR target/96236 * config/rs6000/rs6000-call.c (rs6000_gimple_fold_mma_builtin): Handle little-endian memory ordering. gcc/testsuite/ PR target/96236 * gcc.target/powerpc/mma-double-test.c: Update storing results for correct little-endian ordering. * gcc.target/powerpc/mma-single-test.c: Likewise.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/rs6000/rs6000-call.c3
-rwxr-xr-xgcc/testsuite/gcc.target/powerpc/mma-double-test.c8
-rwxr-xr-xgcc/testsuite/gcc.target/powerpc/mma-single-test.c16
3 files changed, 14 insertions, 13 deletions
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 5ec3f2c..bb0fdf2 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -11154,11 +11154,12 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi)
tree src_array = build1 (VIEW_CONVERT_EXPR, array_type, src);
for (unsigned i = 0; i < 4; i++)
{
+ unsigned index = WORDS_BIG_ENDIAN ? i : 3 - i;
tree ref = build4 (ARRAY_REF, unsigned_V16QI_type_node, src_array,
build_int_cst (size_type_node, i),
NULL_TREE, NULL_TREE);
tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
- build_int_cst (dst_type, i * 16));
+ build_int_cst (dst_type, index * 16));
gimplify_assign (dst, ref, &new_seq);
}
pop_gimplify_context (NULL);
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
index ac84ae3..044a288 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-double-test.c
@@ -12,13 +12,13 @@ typedef double v4sf_t __attribute__ ((vector_size (16)));
#define SAVE_ACC(ACC, ldc, J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[0*ldc+J]; \
- rowC[0] += result[3] ; \
+ rowC[0] += result[0]; \
rowC = (v4sf_t *) &CO[1*ldc+J]; \
- rowC[0] += result[2] ; \
+ rowC[0] += result[1]; \
rowC = (v4sf_t *) &CO[2*ldc+J]; \
- rowC[0] += result[1] ; \
+ rowC[0] += result[2]; \
rowC = (v4sf_t *) &CO[3*ldc+J]; \
- rowC[0] += result[0] ;
+ rowC[0] += result[3];
void
MMA (int m, int n, int k, double *A, double *B, double *C)
diff --git a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
index 15369a6..7e628df 100755
--- a/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
+++ b/gcc/testsuite/gcc.target/powerpc/mma-single-test.c
@@ -12,24 +12,24 @@ typedef float v4sf_t __attribute__ ((vector_size (16)));
#define SAVE_ACC(ACC, ldc,J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[0*ldc+J]; \
- rowC[0] += result[3] ; \
+ rowC[0] += result[0]; \
rowC = (v4sf_t *) &CO[1*ldc+J]; \
- rowC[0] += result[2] ; \
+ rowC[0] += result[1]; \
rowC = (v4sf_t *) &CO[2*ldc+J]; \
- rowC[0] += result[1] ; \
+ rowC[0] += result[2]; \
rowC = (v4sf_t *) &CO[3*ldc+J]; \
- rowC[0] += result[0] ;
+ rowC[0] += result[3];
#define SAVE_ACC1(ACC,ldc, J) \
__builtin_mma_disassemble_acc (result, ACC); \
rowC = (v4sf_t *) &CO[4* ldc+J]; \
- rowC[0] += result[3] ; \
+ rowC[0] += result[0]; \
rowC = (v4sf_t *) &CO[5*ldc+J]; \
- rowC[0] += result[2] ; \
+ rowC[0] += result[1]; \
rowC = (v4sf_t *) &CO[6*ldc+J]; \
- rowC[0] += result[1] ; \
+ rowC[0] += result[2]; \
rowC = (v4sf_t *) &CO[7*ldc+J]; \
- rowC[0] += result[0] ;
+ rowC[0] += result[3];
void
MMA (int m, int n, int k, float *A, float *B, float *C)
{