diff options
-rw-r--r-- | gcc/config/rs6000/altivec.md | 80 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.cc | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 28 | ||||
-rw-r--r-- | gcc/testsuite/g++.target/powerpc/pr106069.C | 119 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/pr115355.c | 37 |
5 files changed, 232 insertions, 40 deletions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index bb20441..dcc71cc 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1212,16 +1212,18 @@ (use (match_operand:V4SI 2 "register_operand"))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" { - rtx (*fun) (rtx, rtx, rtx); - fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_v4si - : gen_altivec_vmrglw_direct_v4si; - if (!BYTES_BIG_ENDIAN) - std::swap (operands[1], operands[2]); - emit_insn (fun (operands[0], operands[1], operands[2])); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], + operands[1], + operands[2])); + else + emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], + operands[2], + operands[1])); DONE; }) -(define_insn "altivec_vmrghw_direct_<mode>" +(define_insn "altivec_vmrghw_direct_<mode>_be" [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") (vec_select:VSX_W (vec_concat:<VS_double> @@ -1229,7 +1231,21 @@ (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 0) (const_int 4) (const_int 1) (const_int 5)])))] - "TARGET_ALTIVEC" + "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" + "@ + xxmrghw %x0,%x1,%x2 + vmrghw %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrghw_direct_<mode>_le" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat:<VS_double> + (match_operand:VSX_W 2 "register_operand" "wa,v") + (match_operand:VSX_W 1 "register_operand" "wa,v")) + (parallel [(const_int 2) (const_int 6) + (const_int 3) (const_int 7)])))] + "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" "@ xxmrghw %x0,%x1,%x2 vmrghw %0,%1,%2" @@ -1318,16 +1334,18 @@ (use (match_operand:V4SI 2 "register_operand"))] "VECTOR_MEM_ALTIVEC_P (V4SImode)" { - rtx (*fun) (rtx, rtx, rtx); - fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_v4si - : gen_altivec_vmrghw_direct_v4si; - if (!BYTES_BIG_ENDIAN) - std::swap (operands[1], operands[2]); - emit_insn (fun (operands[0], operands[1], operands[2])); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], + operands[1], + operands[2])); + else + emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], + operands[2], + operands[1])); DONE; }) -(define_insn "altivec_vmrglw_direct_<mode>" +(define_insn "altivec_vmrglw_direct_<mode>_be" [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") (vec_select:VSX_W (vec_concat:<VS_double> @@ -1335,7 +1353,21 @@ (match_operand:VSX_W 2 "register_operand" "wa,v")) (parallel [(const_int 2) (const_int 6) (const_int 3) (const_int 7)])))] - "TARGET_ALTIVEC" + "TARGET_ALTIVEC && BYTES_BIG_ENDIAN" + "@ + xxmrglw %x0,%x1,%x2 + vmrglw %0,%1,%2" + [(set_attr "type" "vecperm")]) + +(define_insn "altivec_vmrglw_direct_<mode>_le" + [(set (match_operand:VSX_W 0 "register_operand" "=wa,v") + (vec_select:VSX_W + (vec_concat:<VS_double> + (match_operand:VSX_W 2 "register_operand" "wa,v") + (match_operand:VSX_W 1 "register_operand" "wa,v")) + (parallel [(const_int 0) (const_int 4) + (const_int 1) (const_int 5)])))] + "TARGET_ALTIVEC && !BYTES_BIG_ENDIAN" "@ xxmrglw %x0,%x1,%x2 vmrglw %0,%1,%2" @@ -3861,13 +3893,13 @@ { emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); } DONE; }) @@ -3886,13 +3918,13 @@ { emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulouh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmuleuh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); } DONE; }) @@ -3911,13 +3943,13 @@ { emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrghw_direct_v4si (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); } DONE; }) @@ -3936,13 +3968,13 @@ { emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], ve, vo)); + emit_insn (gen_altivec_vmrglw (operands[0], ve, vo)); } else { emit_insn (gen_altivec_vmulosh (ve, operands[1], operands[2])); emit_insn (gen_altivec_vmulesh (vo, operands[1], operands[2])); - emit_insn (gen_altivec_vmrglw_direct_v4si (operands[0], vo, ve)); + emit_insn (gen_altivec_vmrghw (operands[0], ve, vo)); } DONE; }) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index e4dc629..2046a83 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -23450,8 +23450,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, : CODE_FOR_altivec_vmrglh_direct, {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23}}, {OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si - : CODE_FOR_altivec_vmrglw_direct_v4si, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct_v4si_be + : CODE_FOR_altivec_vmrglw_direct_v4si_le, {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23}}, {OPTION_MASK_ALTIVEC, BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct @@ -23462,8 +23462,8 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1, : CODE_FOR_altivec_vmrghh_direct, {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31}}, {OPTION_MASK_ALTIVEC, - BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si - : CODE_FOR_altivec_vmrghw_direct_v4si, + BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct_v4si_be + : CODE_FOR_altivec_vmrghw_direct_v4si_le, {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31}}, {OPTION_MASK_P8_VECTOR, BYTES_BIG_ENDIAN ? CODE_FOR_p8_vmrgew_v4sf_direct diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index f135fa0..7a9c19a 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -4798,12 +4798,14 @@ (const_int 1) (const_int 5)])))] "VECTOR_MEM_VSX_P (<MODE>mode)" { - rtx (*fun) (rtx, rtx, rtx); - fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrghw_direct_<mode> - : gen_altivec_vmrglw_direct_<mode>; - if (!BYTES_BIG_ENDIAN) - std::swap (operands[1], operands[2]); - emit_insn (fun (operands[0], operands[1], operands[2])); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmrghw_direct_v4si_be (operands[0], + operands[1], + operands[2])); + else + emit_insn (gen_altivec_vmrglw_direct_v4si_le (operands[0], + operands[2], + operands[1])); DONE; } [(set_attr "type" "vecperm")]) @@ -4818,12 +4820,14 @@ (const_int 3) (const_int 7)])))] "VECTOR_MEM_VSX_P (<MODE>mode)" { - rtx (*fun) (rtx, rtx, rtx); - fun = BYTES_BIG_ENDIAN ? gen_altivec_vmrglw_direct_<mode> - : gen_altivec_vmrghw_direct_<mode>; - if (!BYTES_BIG_ENDIAN) - std::swap (operands[1], operands[2]); - emit_insn (fun (operands[0], operands[1], operands[2])); + if (BYTES_BIG_ENDIAN) + emit_insn (gen_altivec_vmrglw_direct_v4si_be (operands[0], + operands[1], + operands[2])); + else + emit_insn (gen_altivec_vmrghw_direct_v4si_le (operands[0], + operands[2], + operands[1])); DONE; } [(set_attr "type" "vecperm")]) diff --git a/gcc/testsuite/g++.target/powerpc/pr106069.C b/gcc/testsuite/g++.target/powerpc/pr106069.C new file mode 100644 index 0000000..537207d --- /dev/null +++ b/gcc/testsuite/g++.target/powerpc/pr106069.C @@ -0,0 +1,119 @@ +/* { dg-options "-O -fno-tree-forwprop -maltivec" } */ +/* { dg-require-effective-target vmx_hw } */ +/* { dg-do run } */ + +typedef __attribute__ ((altivec (vector__))) unsigned native_simd_type; + +union +{ + native_simd_type V; + int R[4]; +} store_le_vec; + +struct S +{ + S () = default; + S (unsigned B0) + { + native_simd_type val{B0}; + m_simd = val; + } + void store_le (unsigned int out[]) + { + store_le_vec.V = m_simd; + unsigned int x0 = store_le_vec.R[0]; + __builtin_memcpy (out, &x0, 4); + } + S rotl (unsigned int r) + { + native_simd_type rot{r}; + return __builtin_vec_rl (m_simd, rot); + } + void operator+= (S other) + { + m_simd = __builtin_vec_add (m_simd, other.m_simd); + } + void operator^= (S other) + { + m_simd = __builtin_vec_xor (m_simd, other.m_simd); + } + static void transpose (S &B0, S B1, S B2, S B3) + { + native_simd_type T0 = __builtin_vec_mergeh (B0.m_simd, B2.m_simd); + native_simd_type T1 = __builtin_vec_mergeh (B1.m_simd, B3.m_simd); + native_simd_type T2 = __builtin_vec_mergel (B0.m_simd, B2.m_simd); + native_simd_type T3 = __builtin_vec_mergel (B1.m_simd, B3.m_simd); + B0 = __builtin_vec_mergeh (T0, T1); + B3 = __builtin_vec_mergel (T2, T3); + } + S (native_simd_type x) : m_simd (x) {} + native_simd_type m_simd; +}; + +void +foo (unsigned int output[], unsigned state[]) +{ + S R00 = state[0]; + S R01 = state[0]; + S R02 = state[2]; + S R03 = state[0]; + S R05 = state[5]; + S R06 = state[6]; + S R07 = state[7]; + S R08 = state[8]; + S R09 = state[9]; + S R10 = state[10]; + S R11 = state[11]; + S R12 = state[12]; + S R13 = state[13]; + S R14 = state[4]; + S R15 = state[15]; + for (int r = 0; r != 10; ++r) + { + R09 += R13; + R11 += R15; + R05 ^= R09; + R06 ^= R10; + R07 ^= R11; + R07 = R07.rotl (7); + R00 += R05; + R01 += R06; + R02 += R07; + R15 ^= R00; + R12 ^= R01; + R13 ^= R02; + R00 += R05; + R01 += R06; + R02 += R07; + R15 ^= R00; + R12 = R12.rotl (8); + R13 = R13.rotl (8); + R10 += R15; + R11 += R12; + R08 += R13; + R09 += R14; + R05 ^= R10; + R06 ^= R11; + R07 ^= R08; + R05 = R05.rotl (7); + R06 = R06.rotl (7); + R07 = R07.rotl (7); + } + R00 += state[0]; + S::transpose (R00, R01, R02, R03); + R00.store_le (output); +} + +unsigned int res[1]; +unsigned main_state[]{1634760805, 60878, 2036477234, 6, + 0, 825562964, 1471091955, 1346092787, + 506976774, 4197066702, 518848283, 118491664, + 0, 0, 0, 0}; +int +main () +{ + foo (res, main_state); + if (res[0] != 0x41fcef98) + __builtin_abort (); + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/pr115355.c b/gcc/testsuite/gcc.target/powerpc/pr115355.c new file mode 100644 index 0000000..8955126 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr115355.c @@ -0,0 +1,37 @@ +/* { dg-do run } */ +/* { dg-require-effective-target p9vector_hw } */ +/* Force vectorization with -fno-vect-cost-model to have vector unpack + which exposes the issue in PR115355. */ +/* { dg-options "-O2 -mdejagnu-cpu=power9 -fno-vect-cost-model" } */ + +/* Verify it runs successfully. */ + +__attribute__((noipa)) +void setToIdentityGOOD(unsigned long long *mVec, unsigned int mLen) +{ + #pragma GCC novector + for (unsigned int i = 0; i < mLen; i++) + mVec[i] = i; +} + +__attribute__((noipa)) +void setToIdentityBAD(unsigned long long *mVec, unsigned int mLen) +{ + for (unsigned int i = 0; i < mLen; i++) + mVec[i] = i; +} + +unsigned long long vec1[100]; +unsigned long long vec2[100]; + +int main() +{ + unsigned int l = 29; + setToIdentityGOOD (vec1, 29); + setToIdentityBAD (vec2, 29); + + if (__builtin_memcmp (vec1, vec2, l * sizeof (vec1[0])) != 0) + __builtin_abort (); + + return 0; +} |