aboutsummaryrefslogtreecommitdiff
path: root/libgcc
diff options
context:
space:
mode:
authorAndrew Stubbs <ams@codesourcery.com>2023-04-27 15:34:28 +0100
committerAndrew Stubbs <ams@codesourcery.com>2023-06-19 12:35:35 +0100
commitd9d6774527bccc5ce0394851aa232f8abdaade4c (patch)
tree089556cc71cd794fc7a61351af5a8a50c1a9944a /libgcc
parent1ff8ba48a2958b5917653e1bc2ddd5ff22097fe7 (diff)
downloadgcc-d9d6774527bccc5ce0394851aa232f8abdaade4c.zip
gcc-d9d6774527bccc5ce0394851aa232f8abdaade4c.tar.gz
gcc-d9d6774527bccc5ce0394851aa232f8abdaade4c.tar.bz2
amdgcn: implement vector div and mod libfuncs
Also divmod, but only for scalar modes, for now (because there are no complex int vectors yet). gcc/ChangeLog: * config/gcn/gcn.cc (gcn_expand_divmod_libfunc): New function. (gcn_init_libfuncs): Add div and mod functions for all modes. Add placeholders for divmod functions. (TARGET_EXPAND_DIVMOD_LIBFUNC): Define. libgcc/ChangeLog: * config/gcn/lib2-divmod-di.c: Reimplement like lib2-divmod.c. * config/gcn/lib2-divmod.c: Likewise. * config/gcn/lib2-gcn.h: Add new types and prototypes for all the new vector libfuncs. * config/gcn/t-amdgcn: Add new files. * config/gcn/amdgcn_veclib.h: New file. * config/gcn/lib2-vec_divmod-di.c: New file. * config/gcn/lib2-vec_divmod-hi.c: New file. * config/gcn/lib2-vec_divmod-qi.c: New file. * config/gcn/lib2-vec_divmod.c: New file. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/predcom-2.c: Avoid vectors on amdgcn. * gcc.dg/unroll-8.c: Likewise. * gcc.dg/vect/slp-26.c: Change expected results on amdgdn. * lib/target-supports.exp (check_effective_target_vect_int_mod): Add amdgcn. (check_effective_target_divmod): Likewise. * gcc.target/gcn/simd-math-3-16.c: New test. * gcc.target/gcn/simd-math-3-2.c: New test. * gcc.target/gcn/simd-math-3-32.c: New test. * gcc.target/gcn/simd-math-3-4.c: New test. * gcc.target/gcn/simd-math-3-8.c: New test. * gcc.target/gcn/simd-math-3-char-16.c: New test. * gcc.target/gcn/simd-math-3-char-2.c: New test. * gcc.target/gcn/simd-math-3-char-32.c: New test. * gcc.target/gcn/simd-math-3-char-4.c: New test. * gcc.target/gcn/simd-math-3-char-8.c: New test. * gcc.target/gcn/simd-math-3-char-run-16.c: New test. * gcc.target/gcn/simd-math-3-char-run-2.c: New test. * gcc.target/gcn/simd-math-3-char-run-32.c: New test. * gcc.target/gcn/simd-math-3-char-run-4.c: New test. * gcc.target/gcn/simd-math-3-char-run-8.c: New test. * gcc.target/gcn/simd-math-3-char-run.c: New test. * gcc.target/gcn/simd-math-3-char.c: New test. * gcc.target/gcn/simd-math-3-long-16.c: New test. * gcc.target/gcn/simd-math-3-long-2.c: New test. * gcc.target/gcn/simd-math-3-long-32.c: New test. * gcc.target/gcn/simd-math-3-long-4.c: New test. * gcc.target/gcn/simd-math-3-long-8.c: New test. * gcc.target/gcn/simd-math-3-long-run-16.c: New test. * gcc.target/gcn/simd-math-3-long-run-2.c: New test. * gcc.target/gcn/simd-math-3-long-run-32.c: New test. * gcc.target/gcn/simd-math-3-long-run-4.c: New test. * gcc.target/gcn/simd-math-3-long-run-8.c: New test. * gcc.target/gcn/simd-math-3-long-run.c: New test. * gcc.target/gcn/simd-math-3-long.c: New test. * gcc.target/gcn/simd-math-3-run-16.c: New test. * gcc.target/gcn/simd-math-3-run-2.c: New test. * gcc.target/gcn/simd-math-3-run-32.c: New test. * gcc.target/gcn/simd-math-3-run-4.c: New test. * gcc.target/gcn/simd-math-3-run-8.c: New test. * gcc.target/gcn/simd-math-3-run.c: New test. * gcc.target/gcn/simd-math-3-short-16.c: New test. * gcc.target/gcn/simd-math-3-short-2.c: New test. * gcc.target/gcn/simd-math-3-short-32.c: New test. * gcc.target/gcn/simd-math-3-short-4.c: New test. * gcc.target/gcn/simd-math-3-short-8.c: New test. * gcc.target/gcn/simd-math-3-short-run-16.c: New test. * gcc.target/gcn/simd-math-3-short-run-2.c: New test. * gcc.target/gcn/simd-math-3-short-run-32.c: New test. * gcc.target/gcn/simd-math-3-short-run-4.c: New test. * gcc.target/gcn/simd-math-3-short-run-8.c: New test. * gcc.target/gcn/simd-math-3-short-run.c: New test. * gcc.target/gcn/simd-math-3-short.c: New test. * gcc.target/gcn/simd-math-3.c: New test. * gcc.target/gcn/simd-math-4-char-run.c: New test. * gcc.target/gcn/simd-math-4-char.c: New test. * gcc.target/gcn/simd-math-4-long-run.c: New test. * gcc.target/gcn/simd-math-4-long.c: New test. * gcc.target/gcn/simd-math-4-run.c: New test. * gcc.target/gcn/simd-math-4-short-run.c: New test. * gcc.target/gcn/simd-math-4-short.c: New test. * gcc.target/gcn/simd-math-4.c: New test. * gcc.target/gcn/simd-math-5-16.c: New test. * gcc.target/gcn/simd-math-5-32.c: New test. * gcc.target/gcn/simd-math-5-4.c: New test. * gcc.target/gcn/simd-math-5-8.c: New test. * gcc.target/gcn/simd-math-5-char-16.c: New test. * gcc.target/gcn/simd-math-5-char-32.c: New test. * gcc.target/gcn/simd-math-5-char-4.c: New test. * gcc.target/gcn/simd-math-5-char-8.c: New test. * gcc.target/gcn/simd-math-5-char-run-16.c: New test. * gcc.target/gcn/simd-math-5-char-run-32.c: New test. * gcc.target/gcn/simd-math-5-char-run-4.c: New test. * gcc.target/gcn/simd-math-5-char-run-8.c: New test. * gcc.target/gcn/simd-math-5-char-run.c: New test. * gcc.target/gcn/simd-math-5-char.c: New test. * gcc.target/gcn/simd-math-5-long-16.c: New test. * gcc.target/gcn/simd-math-5-long-32.c: New test. * gcc.target/gcn/simd-math-5-long-4.c: New test. * gcc.target/gcn/simd-math-5-long-8.c: New test. * gcc.target/gcn/simd-math-5-long-run-16.c: New test. * gcc.target/gcn/simd-math-5-long-run-32.c: New test. * gcc.target/gcn/simd-math-5-long-run-4.c: New test. * gcc.target/gcn/simd-math-5-long-run-8.c: New test. * gcc.target/gcn/simd-math-5-long-run.c: New test. * gcc.target/gcn/simd-math-5-long.c: New test. * gcc.target/gcn/simd-math-5-run-16.c: New test. * gcc.target/gcn/simd-math-5-run-32.c: New test. * gcc.target/gcn/simd-math-5-run-4.c: New test. * gcc.target/gcn/simd-math-5-run-8.c: New test. * gcc.target/gcn/simd-math-5-run.c: New test. * gcc.target/gcn/simd-math-5-short-16.c: New test. * gcc.target/gcn/simd-math-5-short-32.c: New test. * gcc.target/gcn/simd-math-5-short-4.c: New test. * gcc.target/gcn/simd-math-5-short-8.c: New test. * gcc.target/gcn/simd-math-5-short-run-16.c: New test. * gcc.target/gcn/simd-math-5-short-run-32.c: New test. * gcc.target/gcn/simd-math-5-short-run-4.c: New test. * gcc.target/gcn/simd-math-5-short-run-8.c: New test. * gcc.target/gcn/simd-math-5-short-run.c: New test. * gcc.target/gcn/simd-math-5-short.c: New test. * gcc.target/gcn/simd-math-5.c: New test.
Diffstat (limited to 'libgcc')
-rw-r--r--libgcc/config/gcn/amdgcn_veclib.h322
-rw-r--r--libgcc/config/gcn/lib2-divmod-di.c105
-rw-r--r--libgcc/config/gcn/lib2-divmod.c82
-rw-r--r--libgcc/config/gcn/lib2-gcn.h114
-rw-r--r--libgcc/config/gcn/lib2-vec_divmod-di.c118
-rw-r--r--libgcc/config/gcn/lib2-vec_divmod-hi.c118
-rw-r--r--libgcc/config/gcn/lib2-vec_divmod-qi.c118
-rw-r--r--libgcc/config/gcn/lib2-vec_divmod.c118
-rw-r--r--libgcc/config/gcn/t-amdgcn4
9 files changed, 1052 insertions, 47 deletions
diff --git a/libgcc/config/gcn/amdgcn_veclib.h b/libgcc/config/gcn/amdgcn_veclib.h
new file mode 100644
index 0000000..15ea20b
--- /dev/null
+++ b/libgcc/config/gcn/amdgcn_veclib.h
@@ -0,0 +1,322 @@
+/* Macro library used to help during conversion of scalar math functions to
+ vectorized SIMD equivalents on AMD GCN.
+
+ Copyright (C) 2023 Free Software Foundation, Inc.
+ Contributed by Siemens.
+
+ This file is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 3, or (at your option) any
+ later version.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+typedef union {
+ v2sf t_v2sf;
+ v4sf t_v4sf;
+ v8sf t_v8sf;
+ v16sf t_v16sf;
+ v32sf t_v32sf;
+ v64sf t_v64sf;
+
+ v2df t_v2df;
+ v4df t_v4df;
+ v8df t_v8df;
+ v16df t_v16df;
+ v32df t_v32df;
+ v64df t_v64df;
+
+ v64qi t_v64qi;
+ v64hi t_v64hi;
+
+ v2si t_v2si;
+ v4si t_v4si;
+ v8si t_v8si;
+ v16si t_v16si;
+ v32si t_v32si;
+ v64si t_v64si;
+
+ v64usi t_v64usi;
+
+ v2di t_v2di;
+ v4di t_v4di;
+ v8di t_v8di;
+ v16di t_v16di;
+ v32di t_v32di;
+ v64di t_v64di;
+} vector_union;
+
+/* Cast between vectors with a different number of elements, or type. */
+
+#define VGPR_CAST(to_t, from) \
+({ \
+ to_t __res; \
+ __asm__ ("" : "=v"(__res) : "0"(from)); \
+ __res; \
+})
+
+#define PACK_SI_PAIR(low, high) \
+({ \
+ v64udi __res; \
+ asm ("v_mov_b32\t%L0, %1\n\t" \
+ "v_mov_b32\t%H0, %2" \
+ : "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \
+ __res; \
+ })
+
+#define UNPACK_SI_LOW(to_t, pair) VGPR_CAST(to_t, pair)
+#define UNPACK_SI_HIGH(to_t, pair) \
+({ \
+ to_t __res; \
+ asm ("v_mov_b32\t%0, %H1" : "=v"(__res) : "v"(pair), "e"(-1L)); \
+ __res; \
+ })
+
+#define PACK_DI_PAIR(low, high) \
+({ \
+ v64uti __res; \
+ asm ("v_mov_b32\t%L0, %L1\n\t" \
+ "v_mov_b32\t%H0, %H1\n\t" \
+ "v_mov_b32\t%J0, %L2\n\t" \
+ "v_mov_b32\t%K0, %H2" \
+ : "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \
+ __res; \
+ })
+
+#define UNPACK_DI_LOW(to_t, pair) VGPR_CAST(to_t, pair)
+#define UNPACK_DI_HIGH(to_t, pair) \
+({ \
+ to_t __res; \
+ asm ("v_mov_b32\t%L0, %J1\n\t" \
+ "v_mov_b32\t%H0, %K1" : "=v"(__res) : "v"(pair), "e"(-1L)); \
+ __res; \
+ })
+
+#define NO_COND __mask
+
+/* Note - __mask is _not_ accounted for in VECTOR_MERGE! */
+#define VECTOR_MERGE(vec1, vec2, cond) \
+({ \
+ _Static_assert (__builtin_types_compatible_p (typeof (vec1), typeof (vec2))); \
+ union { \
+ typeof (vec1) val; \
+ v64qi t_v64qi; \
+ v64hi t_v64hi; \
+ v64si t_v64si; \
+ v64di t_v64di; \
+ } __vec1, __vec2, __res; \
+ __vec1.val = (vec1); \
+ __vec2.val = (vec2); \
+ __builtin_choose_expr ( \
+ sizeof (vec1) == sizeof (v64si), \
+ ({ \
+ v64si __bitmask = __builtin_convertvector ((cond), v64si); \
+ __res.t_v64si = (__vec1.t_v64si & __bitmask) \
+ | (__vec2.t_v64si & ~__bitmask); \
+ }), \
+ __builtin_choose_expr ( \
+ sizeof (vec1) == sizeof (v64hi), \
+ ({ \
+ v64hi __bitmask = __builtin_convertvector ((cond), v64hi); \
+ __res.t_v64hi = (__vec1.t_v64hi & __bitmask) \
+ | (__vec2.t_v64hi & ~__bitmask); \
+ }), \
+ __builtin_choose_expr ( \
+ sizeof (vec1) == sizeof (v64qi), \
+ ({ \
+ v64qi __bitmask = __builtin_convertvector ((cond), v64qi); \
+ __res.t_v64qi = (__vec1.t_v64qi & __bitmask) \
+ | (__vec2.t_v64qi & ~__bitmask); \
+ }), \
+ ({ \
+ v64di __bitmask = __builtin_convertvector ((cond), v64di); \
+ __res.t_v64di = (__vec1.t_v64di & __bitmask) \
+ | (__vec2.t_v64di & ~__bitmask); \
+ })))); \
+ __res.val; \
+})
+
+#define VECTOR_COND_MOVE(var, val, cond) \
+do { \
+ _Static_assert (__builtin_types_compatible_p (typeof (var), typeof (val))); \
+ __auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \
+ var = VECTOR_MERGE ((val), var, __cond & __mask); \
+} while (0)
+
+#define VECTOR_IF(cond, cond_var) \
+{ \
+ __auto_type cond_var = (cond); \
+ __auto_type __inv_cond __attribute__((unused)) = ~cond_var; \
+ if (!ALL_ZEROES_P (cond_var)) \
+ {
+
+#define VECTOR_ELSEIF(cond, cond_var) \
+ } \
+ cond_var = __inv_cond & (cond); \
+ __inv_cond &= ~(cond); \
+ if (!ALL_ZEROES_P (cond_var)) \
+ {
+
+#define VECTOR_ELSE(cond_var) \
+ } \
+ cond_var = __inv_cond; \
+ if (!ALL_ZEROES_P (cond_var)) \
+ {
+
+#define VECTOR_IF2(cond, cond_var, prev_cond_var) \
+{ \
+ __auto_type cond_var = (cond) & __builtin_convertvector (prev_cond_var, typeof (cond)); \
+ __auto_type __inv_cond __attribute__((unused)) = ~cond_var; \
+ if (!ALL_ZEROES_P (cond_var)) \
+ {
+
+#define VECTOR_ELSEIF2(cond, cond_var, prev_cond_var) \
+ } \
+ cond_var = (cond) & __inv_cond & __builtin_convertvector (prev_cond_var, typeof (cond)); \
+ __inv_cond &= ~(cond); \
+ if (!ALL_ZEROES_P (cond_var)) \
+ {
+
+#define VECTOR_ELSE2(cond_var, prev_cond_var) \
+ } \
+ cond_var = __inv_cond & __builtin_convertvector (prev_cond_var, typeof (__inv_cond)); \
+ if (!ALL_ZEROES_P (cond_var)) \
+ {
+
+
+#define VECTOR_ENDIF \
+ } \
+}
+
+#define VECTOR_INIT_AUX(x, type) \
+({ \
+ typeof (x) __e = (x); \
+ type __tmp = { \
+ __e, __e, __e, __e, __e, __e, __e, __e, \
+ __e, __e, __e, __e, __e, __e, __e, __e, \
+ __e, __e, __e, __e, __e, __e, __e, __e, \
+ __e, __e, __e, __e, __e, __e, __e, __e, \
+ __e, __e, __e, __e, __e, __e, __e, __e, \
+ __e, __e, __e, __e, __e, __e, __e, __e, \
+ __e, __e, __e, __e, __e, __e, __e, __e, \
+ __e, __e, __e, __e, __e, __e, __e, __e }; \
+ __tmp; \
+})
+
+#define VECTOR_INIT(x) \
+ (_Generic ((x), int: VECTOR_INIT_AUX ((x), v64si), \
+ unsigned: VECTOR_INIT_AUX ((x), v64usi), \
+ char: VECTOR_INIT_AUX ((x), v64qi), \
+ unsigned char: VECTOR_INIT_AUX ((x), v64uqi), \
+ short: VECTOR_INIT_AUX ((x), v64hi), \
+ unsigned short: VECTOR_INIT_AUX ((x), v64uhi), \
+ long: VECTOR_INIT_AUX ((x), v64di), \
+ unsigned long: VECTOR_INIT_AUX ((x), v64udi), \
+ float: VECTOR_INIT_AUX ((x), v64sf), \
+ double: VECTOR_INIT_AUX ((x), v64df)))
+
+
+#if defined (__GCN3__) || defined (__GCN5__) \
+ || defined (__CDNA1__) || defined (__CDNA2__)
+#define CDNA3_PLUS 0
+#else
+#define CDNA3_PLUS 1
+#endif
+
+#define VECTOR_INIT_MASK(COUNT) \
+({ \
+ MASKMODE __mask; \
+ int count = (COUNT); \
+ if (count == 64) \
+ { \
+ if (sizeof (MASKMODE) < 512 || CDNA3_PLUS) \
+ asm ("v_mov%B0\t%0, -1" : "=v"(__mask) : "e"(-1L)); \
+ else \
+ asm ("v_mov_b32\t%L0, -1\n\t" \
+ "v_mov_b32\t%H0, -1" : "=v"(__mask) : "e"(-1L)); \
+ } \
+ else \
+ { \
+ long bitmask = (count == 64 ? -1 : (1<<count)-1); \
+ if (sizeof (MASKMODE) < 512 || CDNA3_PLUS) \
+ { \
+ asm ("v_mov%B0\t%0, 0" : "=v"(__mask) : "e"(-1L)); \
+ asm ("v_mov%B0\t%0, -1" : "+v"(__mask) : "e"(bitmask)); \
+ } \
+ else \
+ { \
+ asm ("v_mov_b32\t%L0, 0\n\t" \
+ "v_mov_b32\t%H0, 0" : "=v"(__mask) : "e"(-1L)); \
+ asm ("v_mov_b32\t%L0, -1\n\t" \
+ "v_mov_b32\t%H0, -1" : "+v"(__mask) : "e"(bitmask)); \
+ } \
+ } \
+ __mask; \
+})
+
+#define ALL_ZEROES_P(x) (COND_TO_BITMASK(x) == 0)
+
+#define COND_TO_BITMASK(x) \
+({ \
+ long __tmp = 0; \
+ __auto_type __x = __builtin_convertvector((x), typeof (__mask)) & __mask; \
+ __builtin_choose_expr (sizeof (__mask) != 512, \
+ ({ asm ("v_cmp_ne_u32_e64 %0, %1, 0" \
+ : "=Sg" (__tmp) \
+ : "v" (__x)); }), \
+ ({ asm ("v_cmp_ne_u64_e64 %0, %1, 0" \
+ : "=Sg" (__tmp) \
+ : "v" (__x)); })); \
+ __tmp; \
+})
+
+#define VECTOR_WHILE(cond, cond_var, prev_cond_var) \
+{ \
+ __auto_type cond_var = prev_cond_var; \
+ for (;;) { \
+ cond_var &= (cond); \
+ if (ALL_ZEROES_P (cond_var)) \
+ break;
+
+#define VECTOR_ENDWHILE \
+ } \
+}
+
+#define DEF_VARIANT(FUN, SUFFIX, OTYPE, TYPE, COUNT) \
+v##COUNT##OTYPE \
+FUN##v##COUNT##SUFFIX (v##COUNT##TYPE __arg1, v##COUNT##TYPE __arg2) \
+{ \
+ __auto_type __upsized_arg1 = VGPR_CAST (v64##TYPE, __arg1); \
+ __auto_type __upsized_arg2 = VGPR_CAST (v64##TYPE, __arg2); \
+ __auto_type __mask = VECTOR_INIT_MASK (COUNT); \
+ __auto_type __result = FUN##v64##SUFFIX##_aux (__upsized_arg1, __upsized_arg2, __mask); \
+ return VGPR_CAST (v##COUNT##OTYPE, __result); \
+}
+
+#define DEF_VARIANTS(FUN, SUFFIX, TYPE) \
+ DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 2) \
+ DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 4) \
+ DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 8) \
+ DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 16) \
+ DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 32) \
+ DEF_VARIANT (FUN, SUFFIX, TYPE, TYPE, 64)
+
+#define DEF_VARIANTS_B(FUN, SUFFIX, OTYPE, TYPE) \
+ DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 2) \
+ DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 4) \
+ DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 8) \
+ DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 16) \
+ DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 32) \
+ DEF_VARIANT (FUN, SUFFIX, OTYPE, TYPE, 64)
diff --git a/libgcc/config/gcn/lib2-divmod-di.c b/libgcc/config/gcn/lib2-divmod-di.c
index a902377..d0385f3 100644
--- a/libgcc/config/gcn/lib2-divmod-di.c
+++ b/libgcc/config/gcn/lib2-divmod-di.c
@@ -22,14 +22,101 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#include "lib2-gcn.h"
-/* We really want DImode here: override LIBGCC2_UNITS_PER_WORD. */
-#define LIBGCC2_UNITS_PER_WORD 4
-#define TARGET_HAS_NO_HW_DIVIDE
+/* 64-bit SI divide and modulo as used in gcn. */
-#define L_divmoddi4
-#define L_divdi3
-#define L_moddi3
-#define L_udivdi3
-#define L_umoddi3
+union pack {
+ UTItype ti;
+ struct {DItype quot, rem;} pair;
+};
+union upack {
+ UTItype ti;
+ struct {UDItype quot, rem;} pair;
+};
+
+UTItype
+__udivmoddi4 (UDItype num, UDItype den)
+{
+ UDItype bit = 1;
+ union upack res = {0};
+
+ while (den < num && bit && !(den & (1L<<63)))
+ {
+ den <<=1;
+ bit <<=1;
+ }
+ while (bit)
+ {
+ if (num >= den)
+ {
+ num -= den;
+ res.pair.quot |= bit;
+ }
+ bit >>=1;
+ den >>=1;
+ }
+ res.pair.rem = num;
+ return res.ti;
+}
+
+UTItype
+__divmoddi4 (DItype a, DItype b)
+{
+ word_type nega = 0, negb = 0;
+ union pack res;
+
+ if (a < 0)
+ {
+ a = -a;
+ nega = 1;
+ }
+
+ if (b < 0)
+ {
+ b = -b;
+ negb = 1;
+ }
+
+ res.ti = __udivmoddi4 (a, b);
+
+ if (nega)
+ res.pair.rem = -res.pair.rem;
+ if (nega ^ negb)
+ res.pair.quot = -res.pair.quot;
+
+ return res.ti;
+}
+
+
+DItype
+__divdi3 (DItype a, DItype b)
+{
+ union pack u;
+ u.ti = __divmoddi4 (a, b);
+ return u.pair.quot;
+}
+
+DItype
+__moddi3 (DItype a, DItype b)
+{
+ union pack u;
+ u.ti = __divmoddi4 (a, b);
+ return u.pair.rem;
+}
+
+
+UDItype
+__udivdi3 (UDItype a, UDItype b)
+{
+ union pack u;
+ u.ti = __udivmoddi4 (a, b);
+ return u.pair.quot;
+}
+
+UDItype
+__umoddi3 (UDItype a, UDItype b)
+{
+ union pack u;
+ u.ti = __udivmoddi4 (a, b);
+ return u.pair.rem;
+}
-#include "libgcc2.c"
diff --git a/libgcc/config/gcn/lib2-divmod.c b/libgcc/config/gcn/lib2-divmod.c
index c350f78..d701d1a 100644
--- a/libgcc/config/gcn/lib2-divmod.c
+++ b/libgcc/config/gcn/lib2-divmod.c
@@ -24,11 +24,20 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
/* 32-bit SI divide and modulo as used in gcn. */
-static USItype
-udivmodsi4 (USItype num, USItype den, word_type modwanted)
+union pack {
+ UDItype di;
+ struct {SItype quot, rem;} pair;
+};
+union upack {
+ UDItype di;
+ struct {USItype quot, rem;} pair;
+};
+
+UDItype
+__udivmodsi4 (USItype num, USItype den)
{
USItype bit = 1;
- USItype res = 0;
+ union upack res = {0};
while (den < num && bit && !(den & (1L<<31)))
{
@@ -40,78 +49,75 @@ udivmodsi4 (USItype num, USItype den, word_type modwanted)
if (num >= den)
{
num -= den;
- res |= bit;
+ res.pair.quot |= bit;
}
bit >>=1;
den >>=1;
}
- if (modwanted)
- return num;
- return res;
+ res.pair.rem = num;
+ return res.di;
}
-
-SItype
-__divsi3 (SItype a, SItype b)
+UDItype
+__divmodsi4 (SItype a, SItype b)
{
- word_type neg = 0;
- SItype res;
+ word_type nega = 0, negb = 0;
+ union pack res;
if (a < 0)
{
a = -a;
- neg = !neg;
+ nega = 1;
}
if (b < 0)
{
b = -b;
- neg = !neg;
+ negb = 1;
}
- res = udivmodsi4 (a, b, 0);
+ res.di = __udivmodsi4 (a, b);
- if (neg)
- res = -res;
+ if (nega)
+ res.pair.rem = -res.pair.rem;
+ if (nega ^ negb)
+ res.pair.quot = -res.pair.quot;
- return res;
+ return res.di;
}
SItype
-__modsi3 (SItype a, SItype b)
+__divsi3 (SItype a, SItype b)
{
- word_type neg = 0;
- SItype res;
-
- if (a < 0)
- {
- a = -a;
- neg = 1;
- }
-
- if (b < 0)
- b = -b;
-
- res = udivmodsi4 (a, b, 1);
-
- if (neg)
- res = -res;
+ union pack u;
+ u.di = __divmodsi4 (a, b);
+ return u.pair.quot;
+}
- return res;
+SItype
+__modsi3 (SItype a, SItype b)
+{
+ union pack u;
+ u.di = __divmodsi4 (a, b);
+ return u.pair.rem;
}
USItype
__udivsi3 (USItype a, USItype b)
{
- return udivmodsi4 (a, b, 0);
+ union pack u;
+ u.di = __udivmodsi4 (a, b);
+ return u.pair.quot;
}
USItype
__umodsi3 (USItype a, USItype b)
{
- return udivmodsi4 (a, b, 1);
+ union pack u;
+ u.di = __udivmodsi4 (a, b);
+ return u.pair.rem;
}
diff --git a/libgcc/config/gcn/lib2-gcn.h b/libgcc/config/gcn/lib2-gcn.h
index 67ad9ba..dc071c0 100644
--- a/libgcc/config/gcn/lib2-gcn.h
+++ b/libgcc/config/gcn/lib2-gcn.h
@@ -35,15 +35,129 @@ typedef int TItype __attribute__ ((mode (TI)));
typedef unsigned int UTItype __attribute__ ((mode (TI)));
typedef int word_type __attribute__ ((mode (__word__)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef float v8sf __attribute__ ((vector_size (32)));
+typedef float v16sf __attribute__ ((vector_size (64)));
+typedef float v32sf __attribute__ ((vector_size (128)));
+typedef float v64sf __attribute__ ((vector_size (256)));
+
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef double v4df __attribute__ ((vector_size (32)));
+typedef double v8df __attribute__ ((vector_size (64)));
+typedef double v16df __attribute__ ((vector_size (128)));
+typedef double v32df __attribute__ ((vector_size (256)));
+typedef double v64df __attribute__ ((vector_size (512)));
+
+typedef signed char v2qi __attribute__ ((vector_size (2)));
+typedef signed char v4qi __attribute__ ((vector_size (4)));
+typedef signed char v8qi __attribute__ ((vector_size (8)));
+typedef signed char v16qi __attribute__ ((vector_size (16)));
+typedef signed char v32qi __attribute__ ((vector_size (32)));
+typedef signed char v64qi __attribute__ ((vector_size (64)));
+
+typedef unsigned char v2uqi __attribute__ ((vector_size (2)));
+typedef unsigned char v4uqi __attribute__ ((vector_size (4)));
+typedef unsigned char v8uqi __attribute__ ((vector_size (8)));
+typedef unsigned char v16uqi __attribute__ ((vector_size (16)));
+typedef unsigned char v32uqi __attribute__ ((vector_size (32)));
+typedef unsigned char v64uqi __attribute__ ((vector_size (64)));
+
+typedef short v2hi __attribute__ ((vector_size (4)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef short v32hi __attribute__ ((vector_size (64)));
+typedef short v64hi __attribute__ ((vector_size (128)));
+
+typedef unsigned short v2uhi __attribute__ ((vector_size (4)));
+typedef unsigned short v4uhi __attribute__ ((vector_size (8)));
+typedef unsigned short v8uhi __attribute__ ((vector_size (16)));
+typedef unsigned short v16uhi __attribute__ ((vector_size (32)));
+typedef unsigned short v32uhi __attribute__ ((vector_size (64)));
+typedef unsigned short v64uhi __attribute__ ((vector_size (128)));
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef int v16si __attribute__ ((vector_size (64)));
+typedef int v32si __attribute__ ((vector_size (128)));
+typedef int v64si __attribute__ ((vector_size (256)));
+
+typedef unsigned int v2usi __attribute__ ((vector_size (8)));
+typedef unsigned int v4usi __attribute__ ((vector_size (16)));
+typedef unsigned int v8usi __attribute__ ((vector_size (32)));
+typedef unsigned int v16usi __attribute__ ((vector_size (64)));
+typedef unsigned int v32usi __attribute__ ((vector_size (128)));
+typedef unsigned int v64usi __attribute__ ((vector_size (256)));
+
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef long v4di __attribute__ ((vector_size (32)));
+typedef long v8di __attribute__ ((vector_size (64)));
+typedef long v16di __attribute__ ((vector_size (128)));
+typedef long v32di __attribute__ ((vector_size (256)));
+typedef long v64di __attribute__ ((vector_size (512)));
+
+typedef unsigned long v2udi __attribute__ ((vector_size (16)));
+typedef unsigned long v4udi __attribute__ ((vector_size (32)));
+typedef unsigned long v8udi __attribute__ ((vector_size (64)));
+typedef unsigned long v16udi __attribute__ ((vector_size (128)));
+typedef unsigned long v32udi __attribute__ ((vector_size (256)));
+typedef unsigned long v64udi __attribute__ ((vector_size (512)));
+
+typedef UTItype v2uti __attribute__ ((vector_size (32)));
+typedef UTItype v4uti __attribute__ ((vector_size (64)));
+typedef UTItype v8uti __attribute__ ((vector_size (128)));
+typedef UTItype v16uti __attribute__ ((vector_size (256)));
+typedef UTItype v32uti __attribute__ ((vector_size (512)));
+typedef UTItype v64uti __attribute__ ((vector_size (1024)));
+
/* Exported functions. */
extern DItype __divdi3 (DItype, DItype);
extern DItype __moddi3 (DItype, DItype);
+extern UTItype __divmoddi4 (DItype, DItype);
extern UDItype __udivdi3 (UDItype, UDItype);
extern UDItype __umoddi3 (UDItype, UDItype);
+extern UTItype __udivmoddi4 (UDItype, UDItype);
extern SItype __divsi3 (SItype, SItype);
extern SItype __modsi3 (SItype, SItype);
+extern UDItype __divmodsi4 (SItype, SItype);
extern USItype __udivsi3 (USItype, USItype);
extern USItype __umodsi3 (USItype, USItype);
+extern UDItype __udivmodsi4 (USItype, USItype);
extern SItype __mulsi3 (SItype, SItype);
+#define VECTOR_PROTOTYPES(SIZE) \
+ extern v##SIZE##qi __divv##SIZE##qi3 (v##SIZE##qi, v##SIZE##qi); \
+ extern v##SIZE##qi __modv##SIZE##qi3 (v##SIZE##qi, v##SIZE##qi); \
+ extern v##SIZE##udi __divmodv##SIZE##qi4 (v##SIZE##qi, v##SIZE##qi); \
+ extern v##SIZE##uqi __udivv##SIZE##qi3 (v##SIZE##uqi, v##SIZE##uqi); \
+ extern v##SIZE##uqi __umodv##SIZE##qi3 (v##SIZE##uqi, v##SIZE##uqi); \
+ extern v##SIZE##udi __udivmodv##SIZE##qi4 (v##SIZE##uqi, v##SIZE##uqi); \
+ extern v##SIZE##hi __divv##SIZE##hi3 (v##SIZE##hi, v##SIZE##hi); \
+ extern v##SIZE##hi __modv##SIZE##hi3 (v##SIZE##hi, v##SIZE##hi); \
+ extern v##SIZE##udi __divmodv##SIZE##hi4 (v##SIZE##hi, v##SIZE##hi); \
+ extern v##SIZE##uhi __udivv##SIZE##hi3 (v##SIZE##uhi, v##SIZE##uhi); \
+ extern v##SIZE##uhi __umodv##SIZE##hi3 (v##SIZE##uhi, v##SIZE##uhi); \
+ extern v##SIZE##udi __udivmodv##SIZE##hi4 (v##SIZE##uhi, v##SIZE##uhi); \
+ extern v##SIZE##si __divv##SIZE##si3 (v##SIZE##si, v##SIZE##si); \
+ extern v##SIZE##si __modv##SIZE##si3 (v##SIZE##si, v##SIZE##si); \
+ extern v##SIZE##udi __divmodv##SIZE##si4 (v##SIZE##si, v##SIZE##si); \
+ extern v##SIZE##usi __udivv##SIZE##si3 (v##SIZE##usi, v##SIZE##usi); \
+ extern v##SIZE##usi __umodv##SIZE##si3 (v##SIZE##usi, v##SIZE##usi); \
+ extern v##SIZE##udi __udivmodv##SIZE##si4 (v##SIZE##usi, v##SIZE##usi); \
+ extern v##SIZE##di __divv##SIZE##di3 (v##SIZE##di, v##SIZE##di); \
+ extern v##SIZE##di __modv##SIZE##di3 (v##SIZE##di, v##SIZE##di); \
+ extern v##SIZE##uti __divmodv##SIZE##di4 (v##SIZE##di, v##SIZE##di); \
+ extern v##SIZE##udi __udivv##SIZE##di3 (v##SIZE##udi, v##SIZE##udi); \
+ extern v##SIZE##udi __umodv##SIZE##di3 (v##SIZE##udi, v##SIZE##udi); \
+ extern v##SIZE##uti __udivmodv##SIZE##di4 (v##SIZE##udi, v##SIZE##udi);
+VECTOR_PROTOTYPES (2)
+VECTOR_PROTOTYPES (4)
+VECTOR_PROTOTYPES (8)
+VECTOR_PROTOTYPES (16)
+VECTOR_PROTOTYPES (32)
+VECTOR_PROTOTYPES (64)
+#undef VECTOR_PROTOTYPES
+
#endif /* LIB2_GCN_H */
diff --git a/libgcc/config/gcn/lib2-vec_divmod-di.c b/libgcc/config/gcn/lib2-vec_divmod-di.c
new file mode 100644
index 0000000..8f4a035
--- /dev/null
+++ b/libgcc/config/gcn/lib2-vec_divmod-di.c
@@ -0,0 +1,118 @@
+/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ Contributed by Altera and Mentor Graphics, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "lib2-gcn.h"
+
+/* 64-bit V64SI divide and modulo as used in gcn.
+ This is a simple conversion from lib2-divmod.c. */
+
+#define MASKMODE v64di
+#include "amdgcn_veclib.h"
+
+static v64uti
+__udivmodv64di4_aux (v64udi num, v64udi den, v64di __mask)
+{
+ v64udi bit = VECTOR_INIT (1UL);
+ v64udi res = VECTOR_INIT (0UL);
+
+ VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1L<<31)) == 0),
+ cond, NO_COND)
+ VECTOR_COND_MOVE (den, den << 1, cond);
+ VECTOR_COND_MOVE (bit, bit << 1, cond);
+ VECTOR_ENDWHILE
+ VECTOR_WHILE (bit != 0, loopcond, NO_COND)
+ VECTOR_IF2 (num >= den, ifcond, loopcond)
+ VECTOR_COND_MOVE (num, num - den, ifcond);
+ VECTOR_COND_MOVE (res, res | bit, ifcond);
+ VECTOR_ENDIF
+ VECTOR_COND_MOVE (bit, bit >> 1, loopcond);
+ VECTOR_COND_MOVE (den, den >> 1, loopcond);
+ VECTOR_ENDWHILE
+
+ return PACK_DI_PAIR (res, num);
+}
+
+static v64uti
+__divmodv64di4_aux (v64di a, v64di b, v64di __mask)
+{
+ v64di nega = VECTOR_INIT (0L);
+ v64di negb = VECTOR_INIT (0L);
+
+ VECTOR_IF (a < 0, cond)
+ VECTOR_COND_MOVE (a, -a, cond);
+ nega = cond;
+ VECTOR_ENDIF
+
+ VECTOR_IF (b < 0, cond)
+ VECTOR_COND_MOVE (b, -b, cond);
+ negb = cond;
+ VECTOR_ENDIF
+
+ v64udi ua = __builtin_convertvector (a, v64udi);
+ v64udi ub = __builtin_convertvector (b, v64udi);
+ v64uti pair = __udivmodv64di4_aux (ua, ub, __mask);
+
+ v64di quot = UNPACK_DI_LOW (v64di, pair);
+ v64di rem = UNPACK_DI_HIGH (v64di, pair);
+ VECTOR_COND_MOVE (quot, -quot, nega ^ negb);
+ VECTOR_COND_MOVE (rem, -rem, nega);
+ pair = PACK_DI_PAIR (quot, rem);
+
+ return pair;
+}
+
+
+static inline v64di
+__divv64di3_aux (v64di a, v64di b, v64di __mask)
+{
+ v64uti pair = __divmodv64di4_aux (a, b, __mask);
+ return UNPACK_DI_LOW (v64di, pair);
+}
+
+static inline v64di
+__modv64di3_aux (v64di a, v64di b, v64di __mask)
+{
+ v64uti pair = __divmodv64di4_aux (a, b, __mask);
+ return UNPACK_DI_HIGH (v64di, pair);
+}
+
+
+static inline v64udi
+__udivv64di3_aux (v64udi a, v64udi b, v64di __mask)
+{
+ v64uti pair = __udivmodv64di4_aux (a, b, __mask);
+ return UNPACK_DI_LOW (v64udi, pair);
+}
+
+static inline v64udi
+__umodv64di3_aux (v64udi a, v64udi b, v64di __mask)
+{
+ v64uti pair = __udivmodv64di4_aux (a, b, __mask);
+ return UNPACK_DI_HIGH (v64udi, pair);
+}
+
+DEF_VARIANTS (__div, di3, di)
+DEF_VARIANTS (__mod, di3, di)
+DEF_VARIANTS_B (__divmod, di4, uti, di)
+DEF_VARIANTS (__udiv, di3, udi)
+DEF_VARIANTS (__umod, di3, udi)
+DEF_VARIANTS_B (__udivmod, di4, uti, udi)
diff --git a/libgcc/config/gcn/lib2-vec_divmod-hi.c b/libgcc/config/gcn/lib2-vec_divmod-hi.c
new file mode 100644
index 0000000..175ddf8
--- /dev/null
+++ b/libgcc/config/gcn/lib2-vec_divmod-hi.c
@@ -0,0 +1,118 @@
+/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ Contributed by Altera and Mentor Graphics, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "lib2-gcn.h"
+
+/* 16-bit V64HI divide and modulo as used in gcn.
+ This is a simple conversion from lib2-divmod.c. */
+
+#define MASKMODE v64hi
+#include "amdgcn_veclib.h"
+
+static v64udi
+__udivmodv64hi4_aux (v64uhi num, v64uhi den, v64hi __mask)
+{
+ v64uhi bit = VECTOR_INIT ((unsigned short)1U);
+ v64uhi res = VECTOR_INIT ((unsigned short)0U);
+
+ VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1L<<15)) == 0),
+ cond, NO_COND)
+ VECTOR_COND_MOVE (den, den << 1, cond);
+ VECTOR_COND_MOVE (bit, bit << 1, cond);
+ VECTOR_ENDWHILE
+ VECTOR_WHILE (bit != 0, loopcond, NO_COND)
+ VECTOR_IF2 (num >= den, ifcond, loopcond)
+ VECTOR_COND_MOVE (num, num - den, ifcond);
+ VECTOR_COND_MOVE (res, res | bit, ifcond);
+ VECTOR_ENDIF
+ VECTOR_COND_MOVE (bit, bit >> 1, loopcond);
+ VECTOR_COND_MOVE (den, den >> 1, loopcond);
+ VECTOR_ENDWHILE
+
+ return PACK_SI_PAIR (res, num);
+}
+
+static v64udi
+__divmodv64hi4_aux (v64hi a, v64hi b, v64hi __mask)
+{
+ v64hi nega = VECTOR_INIT ((short)0);
+ v64hi negb = VECTOR_INIT ((short)0);
+
+ VECTOR_IF (a < 0, cond)
+ VECTOR_COND_MOVE (a, -a, cond);
+ nega = cond;
+ VECTOR_ENDIF
+
+ VECTOR_IF (b < 0, cond)
+ VECTOR_COND_MOVE (b, -b, cond);
+ negb = cond;
+ VECTOR_ENDIF
+
+ v64uhi ua = __builtin_convertvector (a, v64uhi);
+ v64uhi ub = __builtin_convertvector (b, v64uhi);
+ v64udi pair = __udivmodv64hi4_aux (ua, ub, __mask);
+
+ v64hi quot = UNPACK_SI_LOW (v64hi, pair);
+ v64hi rem = UNPACK_SI_HIGH (v64hi, pair);
+ VECTOR_COND_MOVE (quot, -quot, nega ^ negb);
+ VECTOR_COND_MOVE (rem, -rem, nega);
+ pair = PACK_SI_PAIR (quot, rem);
+
+ return pair;
+}
+
+
+static inline v64hi
+__divv64hi3_aux (v64hi a, v64hi b, v64hi __mask)
+{
+ v64udi pair = __divmodv64hi4_aux (a, b, __mask);
+ return UNPACK_SI_LOW (v64hi, pair);
+}
+
+static inline v64hi
+__modv64hi3_aux (v64hi a, v64hi b, v64hi __mask)
+{
+ v64udi pair = __divmodv64hi4_aux (a, b, __mask);
+ return UNPACK_SI_HIGH (v64hi, pair);
+}
+
+
+static inline v64uhi
+__udivv64hi3_aux (v64uhi a, v64uhi b, v64hi __mask)
+{
+ v64udi pair = __udivmodv64hi4_aux (a, b, __mask);
+ return UNPACK_SI_LOW (v64uhi, pair);
+}
+
+static inline v64uhi
+__umodv64hi3_aux (v64uhi a, v64uhi b, v64hi __mask)
+{
+ v64udi pair = __udivmodv64hi4_aux (a, b, __mask);
+ return UNPACK_SI_HIGH (v64uhi, pair);
+}
+
+DEF_VARIANTS (__div, hi3, hi)
+DEF_VARIANTS (__mod, hi3, hi)
+DEF_VARIANTS_B (__divmod, hi4, udi, hi)
+DEF_VARIANTS (__udiv, hi3, uhi)
+DEF_VARIANTS (__umod, hi3, uhi)
+DEF_VARIANTS_B (__udivmod, hi4, udi, uhi)
diff --git a/libgcc/config/gcn/lib2-vec_divmod-qi.c b/libgcc/config/gcn/lib2-vec_divmod-qi.c
new file mode 100644
index 0000000..ff6b5c2
--- /dev/null
+++ b/libgcc/config/gcn/lib2-vec_divmod-qi.c
@@ -0,0 +1,118 @@
+/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ Contributed by Altera and Mentor Graphics, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "lib2-gcn.h"
+
+/* 8-bit V64QI divide and modulo as used in gcn.
+ This is a simple conversion from lib2-divmod.c. */
+
+#define MASKMODE v64qi
+#include "amdgcn_veclib.h"
+
+static v64udi
+__udivmodv64qi4_aux (v64uqi num, v64uqi den, v64qi __mask)
+{
+ v64uqi bit = VECTOR_INIT ((unsigned char)1U);
+ v64uqi res = VECTOR_INIT ((unsigned char)0U);
+
+ VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1<<7)) == 0),
+ cond, NO_COND)
+ VECTOR_COND_MOVE (den, den << 1, cond);
+ VECTOR_COND_MOVE (bit, bit << 1, cond);
+ VECTOR_ENDWHILE
+ VECTOR_WHILE (bit != 0, loopcond, NO_COND)
+ VECTOR_IF2 (num >= den, ifcond, loopcond)
+ VECTOR_COND_MOVE (num, num - den, ifcond);
+ VECTOR_COND_MOVE (res, res | bit, ifcond);
+ VECTOR_ENDIF
+ VECTOR_COND_MOVE (bit, bit >> 1, loopcond);
+ VECTOR_COND_MOVE (den, den >> 1, loopcond);
+ VECTOR_ENDWHILE
+
+ return PACK_SI_PAIR (res, num);
+}
+
+static v64udi
+__divmodv64qi4_aux (v64qi a, v64qi b, v64qi __mask)
+{
+ v64qi nega = VECTOR_INIT ((char)0);
+ v64qi negb = VECTOR_INIT ((char)0);
+
+ VECTOR_IF (a < 0, cond)
+ VECTOR_COND_MOVE (a, -a, cond);
+ nega = cond;
+ VECTOR_ENDIF
+
+ VECTOR_IF (b < 0, cond)
+ VECTOR_COND_MOVE (b, -b, cond);
+ negb = cond;
+ VECTOR_ENDIF
+
+ v64uqi ua = __builtin_convertvector (a, v64uqi);
+ v64uqi ub = __builtin_convertvector (b, v64uqi);
+ v64udi pair = __udivmodv64qi4_aux (ua, ub, __mask);
+
+ v64qi quot = UNPACK_SI_LOW (v64qi, pair);
+ v64qi rem = UNPACK_SI_HIGH (v64qi, pair);
+ VECTOR_COND_MOVE (quot, -quot, nega ^ negb);
+ VECTOR_COND_MOVE (rem, -rem, nega);
+ pair = PACK_SI_PAIR (quot, rem);
+
+ return pair;
+}
+
+
+static inline v64qi
+__divv64qi3_aux (v64qi a, v64qi b, v64qi __mask)
+{
+ v64udi pair = __divmodv64qi4_aux (a, b, __mask);
+ return UNPACK_SI_LOW (v64qi, pair);
+}
+
+static inline v64qi
+__modv64qi3_aux (v64qi a, v64qi b, v64qi __mask)
+{
+ v64udi pair = __divmodv64qi4_aux (a, b, __mask);
+ return UNPACK_SI_HIGH (v64qi, pair);
+}
+
+
+static inline v64uqi
+__udivv64qi3_aux (v64uqi a, v64uqi b, v64qi __mask)
+{
+ v64udi pair = __udivmodv64qi4_aux (a, b, __mask);
+ return UNPACK_SI_LOW (v64uqi, pair);
+}
+
+static inline v64uqi
+__umodv64qi3_aux (v64uqi a, v64uqi b, v64qi __mask)
+{
+ v64udi pair = __udivmodv64qi4_aux (a, b, __mask);
+ return UNPACK_SI_HIGH (v64uqi, pair);
+}
+
+DEF_VARIANTS (__div, qi3, qi)
+DEF_VARIANTS (__mod, qi3, qi)
+DEF_VARIANTS_B (__divmod, qi4, udi, qi)
+DEF_VARIANTS (__udiv, qi3, uqi)
+DEF_VARIANTS (__umod, qi3, uqi)
+DEF_VARIANTS_B (__udivmod, qi4, udi, uqi)
diff --git a/libgcc/config/gcn/lib2-vec_divmod.c b/libgcc/config/gcn/lib2-vec_divmod.c
new file mode 100644
index 0000000..e166766
--- /dev/null
+++ b/libgcc/config/gcn/lib2-vec_divmod.c
@@ -0,0 +1,118 @@
+/* Copyright (C) 2012-2023 Free Software Foundation, Inc.
+ Contributed by Altera and Mentor Graphics, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+<http://www.gnu.org/licenses/>. */
+
+#include "lib2-gcn.h"
+
+/* 32-bit V64SI divide and modulo as used in gcn.
+ This is a simple conversion from lib2-divmod.c. */
+
+#define MASKMODE v64si
+#include "amdgcn_veclib.h"
+
+static v64udi
+__udivmodv64si4_aux (v64usi num, v64usi den, v64si __mask)
+{
+ v64usi bit = VECTOR_INIT (1U);
+ v64usi res = VECTOR_INIT (0U);
+
+ VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1L<<31)) == 0),
+ cond, NO_COND)
+ VECTOR_COND_MOVE (den, den << 1, cond);
+ VECTOR_COND_MOVE (bit, bit << 1, cond);
+ VECTOR_ENDWHILE
+ VECTOR_WHILE (bit != 0, loopcond, NO_COND)
+ VECTOR_IF2 (num >= den, ifcond, loopcond)
+ VECTOR_COND_MOVE (num, num - den, ifcond);
+ VECTOR_COND_MOVE (res, res | bit, ifcond);
+ VECTOR_ENDIF
+ VECTOR_COND_MOVE (bit, bit >> 1, loopcond);
+ VECTOR_COND_MOVE (den, den >> 1, loopcond);
+ VECTOR_ENDWHILE
+
+ return PACK_SI_PAIR (res, num);
+}
+
+static v64udi
+__divmodv64si4_aux (v64si a, v64si b, v64si __mask)
+{
+ v64si nega = VECTOR_INIT (0);
+ v64si negb = VECTOR_INIT (0);
+
+ VECTOR_IF (a < 0, cond)
+ VECTOR_COND_MOVE (a, -a, cond);
+ nega = cond;
+ VECTOR_ENDIF
+
+ VECTOR_IF (b < 0, cond)
+ VECTOR_COND_MOVE (b, -b, cond);
+ negb = cond;
+ VECTOR_ENDIF
+
+ v64usi ua = __builtin_convertvector (a, v64usi);
+ v64usi ub = __builtin_convertvector (b, v64usi);
+ v64udi pair = __udivmodv64si4_aux (ua, ub, __mask);
+
+ v64si quot = UNPACK_SI_LOW (v64si, pair);
+ v64si rem = UNPACK_SI_HIGH (v64si, pair);
+ VECTOR_COND_MOVE (quot, -quot, nega ^ negb);
+ VECTOR_COND_MOVE (rem, -rem, nega);
+ pair = PACK_SI_PAIR (quot, rem);
+
+ return pair;
+}
+
+
+static inline v64si
+__divv64si3_aux (v64si a, v64si b, v64si __mask)
+{
+ v64udi pair = __divmodv64si4_aux (a, b, __mask);
+ return UNPACK_SI_LOW (v64si, pair);
+}
+
+static inline v64si
+__modv64si3_aux (v64si a, v64si b, v64si __mask)
+{
+ v64udi pair = __divmodv64si4_aux (a, b, __mask);
+ return UNPACK_SI_HIGH (v64si, pair);
+}
+
+
+static inline v64usi
+__udivv64si3_aux (v64usi a, v64usi b, v64si __mask)
+{
+ v64udi pair = __udivmodv64si4_aux (a, b, __mask);
+ return UNPACK_SI_LOW (v64usi, pair);
+}
+
+static inline v64usi
+__umodv64si3_aux (v64usi a, v64usi b, v64si __mask)
+{
+ v64udi pair = __udivmodv64si4_aux (a, b, __mask);
+ return UNPACK_SI_HIGH (v64usi, pair);
+}
+
+DEF_VARIANTS (__div, si3, si)
+DEF_VARIANTS (__mod, si3, si)
+DEF_VARIANTS_B (__divmod, si4, udi, si)
+DEF_VARIANTS (__udiv, si3, usi)
+DEF_VARIANTS (__umod, si3, usi)
+DEF_VARIANTS_B (__udivmod, si4, udi, usi)
diff --git a/libgcc/config/gcn/t-amdgcn b/libgcc/config/gcn/t-amdgcn
index e64953e..d1d9a4f 100644
--- a/libgcc/config/gcn/t-amdgcn
+++ b/libgcc/config/gcn/t-amdgcn
@@ -1,6 +1,10 @@
LIB2ADD += $(srcdir)/config/gcn/atomic.c \
$(srcdir)/config/gcn/lib2-divmod.c \
$(srcdir)/config/gcn/lib2-divmod-di.c \
+ $(srcdir)/config/gcn/lib2-vec_divmod.c \
+ $(srcdir)/config/gcn/lib2-vec_divmod-qi.c \
+ $(srcdir)/config/gcn/lib2-vec_divmod-hi.c \
+ $(srcdir)/config/gcn/lib2-vec_divmod-di.c \
$(srcdir)/config/gcn/lib2-bswapti2.c \
$(srcdir)/config/gcn/unwind-gcn.c