From ddb0f66e6c1e846bdc217075c9a770bfd0b01970 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 5 Apr 2023 14:44:54 +0000 Subject: Add assember CFI directives to millicode division and remainder routines. The millicode division and remainder routines trap division by zero. The unwinder needs these directives to unwind divide by zero traps. 2023-04-05 John David Anglin libgcc/ChangeLog: PR target/109374 * config/pa/milli64.S (RETURN_COLUMN): Define. ($$divI): Add CFI directives. ($$divU): Likewise. ($$remI): Likewise. ($$remU): Likewise. --- libgcc/config/pa/milli64.S | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'libgcc') diff --git a/libgcc/config/pa/milli64.S b/libgcc/config/pa/milli64.S index 2316f79..e1c84f4 100644 --- a/libgcc/config/pa/milli64.S +++ b/libgcc/config/pa/milli64.S @@ -105,8 +105,10 @@ sar: .reg %cr11 /* Shift Amount Register */ rp: .reg r2 /* return pointer */ #ifdef pa64 mrp: .reg r2 /* millicode return pointer */ +#define RETURN_COLUMN 2 #else mrp: .reg r31 /* millicode return pointer */ +#define RETURN_COLUMN 31 #endif ret0: .reg r28 /* return value */ ret1: .reg r29 /* return value (high part of double) */ @@ -313,6 +315,8 @@ RDEFINE(temp1,arg0) .import $$divI_15,millicode .export $$divI,millicode .export $$divoI,millicode + .cfi_startproc + .cfi_return_column RETURN_COLUMN .proc .callinfo millicode .entry @@ -484,6 +488,7 @@ LSYM(negative1) addo arg0,arg1,r0 /* trap iff dividend==0x80000000 && divisor==-1 */ .exit .procend + .cfi_endproc .end #endif @@ -543,6 +548,8 @@ RDEFINE(temp1,arg0) .import $$divU_12,millicode .import $$divU_14,millicode .import $$divU_15,millicode + .cfi_startproc + .cfi_return_column RETURN_COLUMN .proc .callinfo millicode .entry @@ -706,6 +713,7 @@ LSYM(big_divisor) addc r0,r0,retreg .exit .procend + .cfi_endproc .end #endif @@ -750,6 +758,8 @@ RDEFINE(retreg,ret1) SUBSPA_MILLI ATTR_MILLI + .cfi_startproc + .cfi_return_column RETURN_COLUMN .proc .callinfo millicode .entry @@ -875,6 +885,7 @@ LSYM(finish) nop .exit .procend + .cfi_endproc #ifdef milliext .origin 0x00000200 #endif @@ -922,6 +933,8 @@ RDEFINE(rmndr,ret1) /* r29 */ SUBSPA_MILLI ATTR_MILLI .export $$remU,millicode + .cfi_startproc + .cfi_return_column RETURN_COLUMN .proc .callinfo millicode .entry @@ -1013,6 +1026,7 @@ LSYM(special_case) nop .exit .procend + .cfi_endproc .end #endif -- cgit v1.1 From 5229788da723442d51155693ab98f831e94d1d58 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 6 Apr 2023 00:16:43 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 54b6930..26539c9 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,12 @@ +2023-04-05 John David Anglin + + PR target/109374 + * config/pa/milli64.S (RETURN_COLUMN): Define. + ($$divI): Add CFI directives. + ($$divU): Likewise. + ($$remI): Likewise. + ($$remU): Likewise. + 2023-03-19 Stafford Horne * config/or1k/sfp-machine.h (FP_HANDLE_EXCEPTIONS): Remove -- cgit v1.1 From 79fb2de020c499bd01708e1418965a11166b7d5b Mon Sep 17 00:00:00 2001 From: Tetsuma Hoshino Date: Sat, 8 Apr 2023 08:19:53 -0600 Subject: PR target/109402: v850 (not v850e) variant of __muldi3() moves sp in reversed direction [PR109402] muldi3 will deallocate stack space after the call to __save_r26_r31, then re-allocate the space a short while later. If an interrupt occurs in that window, it can clobber items on the stack. PR target/109402 libgcc/ * config/v850/lib1funcs.S (___muldi3): Remove unnecessary stack manipulations. --- libgcc/config/v850/lib1funcs.S | 2 -- 1 file changed, 2 deletions(-) (limited to 'libgcc') diff --git a/libgcc/config/v850/lib1funcs.S b/libgcc/config/v850/lib1funcs.S index 20fa499..35b5ab5 100644 --- a/libgcc/config/v850/lib1funcs.S +++ b/libgcc/config/v850/lib1funcs.S @@ -2211,7 +2211,6 @@ ___ucmpdi2: ___muldi3: #ifdef __v850__ jarl __save_r26_r31, r10 - addi 16, sp, sp mov r6, r28 shr 15, r28 movea lo(32767), r0, r14 @@ -2256,7 +2255,6 @@ ___muldi3: mulh r12, r6 mov r28, r17 mulh r10, r17 - add -16, sp mov r28, r12 mulh r8, r12 add r17, r18 -- cgit v1.1 From 0f04ebb49468bc99b58e8726b6ddcff47086d562 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 9 Apr 2023 00:16:37 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 26539c9..40de969 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,9 @@ +2023-04-08 Tetsuma Hoshino + + PR target/109402 + * config/v850/lib1funcs.S (___muldi3): Remove unnecessary + stack manipulations. + 2023-04-05 John David Anglin PR target/109374 -- cgit v1.1 From f797260adaf52bee0ec0e16190bbefbe1bfc3692 Mon Sep 17 00:00:00 2001 From: Patrick O'Neill Date: Tue, 18 Apr 2023 14:33:13 -0700 Subject: RISCV: Inline subword atomic ops RISC-V has no support for subword atomic operations; code currently generates libatomic library calls. This patch changes the default behavior to inline subword atomic calls (using the same logic as the existing library call). Behavior can be specified using the -minline-atomics and -mno-inline-atomics command line flags. gcc/libgcc/config/riscv/atomic.c has the same logic implemented in asm. This will need to stay for backwards compatibility and the -mno-inline-atomics flag. 2023-04-18 Patrick O'Neill gcc/ChangeLog: PR target/104338 * config/riscv/riscv-protos.h: Add helper function stubs. * config/riscv/riscv.cc: Add helper functions for subword masking. * config/riscv/riscv.opt: Add command-line flag. * config/riscv/sync.md: Add masking logic and inline asm for fetch_and_op, fetch_and_nand, CAS, and exchange ops. * doc/invoke.texi: Add blurb regarding command-line flag. libgcc/ChangeLog: PR target/104338 * config/riscv/atomic.c: Add reference to duplicate logic. gcc/testsuite/ChangeLog: PR target/104338 * gcc.target/riscv/inline-atomics-1.c: New test. * gcc.target/riscv/inline-atomics-2.c: New test. * gcc.target/riscv/inline-atomics-3.c: New test. * gcc.target/riscv/inline-atomics-4.c: New test. * gcc.target/riscv/inline-atomics-5.c: New test. * gcc.target/riscv/inline-atomics-6.c: New test. * gcc.target/riscv/inline-atomics-7.c: New test. * gcc.target/riscv/inline-atomics-8.c: New test. Signed-off-by: Patrick O'Neill Signed-off-by: Palmer Dabbelt --- libgcc/config/riscv/atomic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'libgcc') diff --git a/libgcc/config/riscv/atomic.c b/libgcc/config/riscv/atomic.c index 69f5362..573d163 100644 --- a/libgcc/config/riscv/atomic.c +++ b/libgcc/config/riscv/atomic.c @@ -30,6 +30,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see #define INVERT "not %[tmp1], %[tmp1]\n\t" #define DONT_INVERT "" +/* Logic duplicated in gcc/gcc/config/riscv/sync.md for use when inlining is enabled */ + #define GENERATE_FETCH_AND_OP(type, size, opname, insn, invert, cop) \ type __sync_fetch_and_ ## opname ## _ ## size (type *p, type v) \ { \ -- cgit v1.1 From d7f0bc05949481048d2fd524ecb2d892f6a04294 Mon Sep 17 00:00:00 2001 From: Hans-Peter Nilsson Date: Thu, 27 Apr 2023 02:00:33 +0200 Subject: libgcc CRIS: Define TARGET_HAS_NO_HW_DIVIDE With this, execution time for e.g. __moddi3 go from 59 to 40 cycles in the "fast" case or from 290 to 200 cycles in the "slow" case (when the !TARGET_HAS_NO_HW_DIVIDE variant calls division and modulus functions for 32-bit SImode), as exposed by gcc.c-torture/execute/arith-rand-ll.c compiled for -march=v10. Unfortunately, it just puts a performance improvement "dent" of 0.07% in a arith-rand-ll.c-based performance test - where all loops are also reduced to 1/10. The size of every affected libgcc function is reduced to less than half and they are all now leaf functions. * config/cris/t-cris (HOST_LIBGCC2_CFLAGS): Add -DTARGET_HAS_NO_HW_DIVIDE. --- libgcc/config/cris/t-cris | 3 +++ 1 file changed, 3 insertions(+) (limited to 'libgcc') diff --git a/libgcc/config/cris/t-cris b/libgcc/config/cris/t-cris index b582974..e002029 100644 --- a/libgcc/config/cris/t-cris +++ b/libgcc/config/cris/t-cris @@ -8,3 +8,6 @@ $(LIB2ADD): $(srcdir)/config/cris/arit.c echo "#define L$$name" > tmp-$@ \ && echo '#include "$<"' >> tmp-$@ \ && mv -f tmp-$@ $@ + +# Use an appropriate implementation when implementing DImode division. +HOST_LIBGCC2_CFLAGS += -DTARGET_HAS_NO_HW_DIVIDE -- cgit v1.1 From 4a3dbcbdb3d2b1f0759082987135965f6a185d17 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 27 Apr 2023 00:16:44 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 40de969..ee2a5ba 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,13 @@ +2023-04-27 Hans-Peter Nilsson + + * config/cris/t-cris (HOST_LIBGCC2_CFLAGS): Add + -DTARGET_HAS_NO_HW_DIVIDE. + +2023-04-26 Patrick O'Neill + + PR target/104338 + * config/riscv/atomic.c: Add reference to duplicate logic. + 2023-04-08 Tetsuma Hoshino PR target/109402 -- cgit v1.1 From 1ee457a5febc8aa1e21d2298769bc70975f0206f Mon Sep 17 00:00:00 2001 From: Dimitar Dimitrov Date: Thu, 27 Apr 2023 19:07:47 +0300 Subject: libgcc pru: Define TARGET_HAS_NO_HW_DIVIDE This patch aligns the configuration to the actual PRU capabilities. It also reduces the size of the affected libgcc functions. For a real-world project using integer arithmetics the savings are significant: Before: text data bss dec hex filename 3688 865 544 5097 13e9 hc-sr04-range-sensor.elf With TARGET_HAS_NO_HW_DIVIDE defined: text data bss dec hex filename 2824 865 544 4233 1089 hc-sr04-range-sensor.elf Execution speed also appears to have improved. The moddi3 function is now executed in half the CPU cycles. libgcc/ChangeLog: * config/pru/t-pru (HOST_LIBGCC2_CFLAGS): Add -DTARGET_HAS_NO_HW_DIVIDE. Signed-off-by: Dimitar Dimitrov --- libgcc/config/pru/t-pru | 3 +++ 1 file changed, 3 insertions(+) (limited to 'libgcc') diff --git a/libgcc/config/pru/t-pru b/libgcc/config/pru/t-pru index a5b1871..7d5f5ee 100644 --- a/libgcc/config/pru/t-pru +++ b/libgcc/config/pru/t-pru @@ -42,6 +42,9 @@ LIB2ADD += \ HOST_LIBGCC2_CFLAGS += -Os -ffunction-sections -fdata-sections +# Use an appropriate implementation when implementing DImode division. +HOST_LIBGCC2_CFLAGS += -DTARGET_HAS_NO_HW_DIVIDE + LIB2FUNCS_EXCLUDE = _muldi3 SHLIB_MAPFILES += $(srcdir)/config/pru/libgcc-eabi.ver -- cgit v1.1 From 1fc8da95d93d1f1f151149178dc836d1ba09594e Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 2 May 2023 00:17:10 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index ee2a5ba..41556a7 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,8 @@ +2023-05-01 Dimitar Dimitrov + + * config/pru/t-pru (HOST_LIBGCC2_CFLAGS): Add + -DTARGET_HAS_NO_HW_DIVIDE. + 2023-04-27 Hans-Peter Nilsson * config/cris/t-cris (HOST_LIBGCC2_CFLAGS): Add -- cgit v1.1 From dcd7b2f5f7233a04c8b14b362d0befa76e9654c0 Mon Sep 17 00:00:00 2001 From: Patrick O'Neill Date: Wed, 5 Apr 2023 09:44:57 -0700 Subject: RISC-V: Enforce Libatomic LR/SC SEQ_CST Replace LR.aq/SC.rl pairs with the SEQ_CST LR.aqrl/SC.rl pairs recommended by table A.6 of the ISA manual. 2023-04-27 Patrick O'Neill libgcc/ChangeLog: * config/riscv/atomic.c: Change LR.aq/SC.rl pairs into sequentially consistent LR.aqrl/SC.rl pairs. Signed-off-by: Patrick O'Neill --- libgcc/config/riscv/atomic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'libgcc') diff --git a/libgcc/config/riscv/atomic.c b/libgcc/config/riscv/atomic.c index 573d163..bd2b033 100644 --- a/libgcc/config/riscv/atomic.c +++ b/libgcc/config/riscv/atomic.c @@ -41,7 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see unsigned old, tmp1, tmp2; \ \ asm volatile ("1:\n\t" \ - "lr.w.aq %[old], %[mem]\n\t" \ + "lr.w.aqrl %[old], %[mem]\n\t" \ #insn " %[tmp1], %[old], %[value]\n\t" \ invert \ "and %[tmp1], %[tmp1], %[mask]\n\t" \ @@ -75,7 +75,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see unsigned old, tmp1; \ \ asm volatile ("1:\n\t" \ - "lr.w.aq %[old], %[mem]\n\t" \ + "lr.w.aqrl %[old], %[mem]\n\t" \ "and %[tmp1], %[old], %[mask]\n\t" \ "bne %[tmp1], %[o], 1f\n\t" \ "and %[tmp1], %[old], %[not_mask]\n\t" \ -- cgit v1.1 From d7cb9720ed54687bd1135c5e6ef90776a9db0bd5 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Wed, 3 May 2023 00:17:11 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 41556a7..1ebcf32 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,8 @@ +2023-05-02 Patrick O'Neill + + * config/riscv/atomic.c: Change LR.aq/SC.rl pairs into + sequentially consistent LR.aqrl/SC.rl pairs. + 2023-05-01 Dimitar Dimitrov * config/pru/t-pru (HOST_LIBGCC2_CFLAGS): Add -- cgit v1.1 From 9be9be828dc9020735bc7eacddd1ceae1aeedb1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=B6ren=20Tempel?= Date: Sun, 14 May 2023 19:30:21 +0200 Subject: fix assert in __deregister_frame_info_bases MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assertion in __deregister_frame_info_bases assumes that for every frame something was inserted into the lookup data structure by __register_frame_info_bases. Unfortunately, this does not necessarily hold true as the btree_insert call in __register_frame_info_bases will not insert anything for empty ranges. Therefore, we need to explicitly account for such empty ranges in the assertion as `ob` will be a null pointer for such ranges, hence causing the assertion to fail. Signed-off-by: Sören Tempel libgcc/ChangeLog: * unwind-dw2-fde.c: Accept empty ranges when deregistering frames. --- libgcc/unwind-dw2-fde.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'libgcc') diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c index 7b74c39..8683a65 100644 --- a/libgcc/unwind-dw2-fde.c +++ b/libgcc/unwind-dw2-fde.c @@ -278,7 +278,9 @@ __deregister_frame_info_bases (const void *begin) __gthread_mutex_unlock (&object_mutex); #endif - gcc_assert (in_shutdown || ob); + // If we didn't find anything in the lookup data structures then they + // were either already destroyed or we tried to remove an empty range. + gcc_assert (in_shutdown || ((range[1] - range[0]) == 0 || ob)); return (void *) ob; } -- cgit v1.1 From 30adfb85ff994c0faa0cc556ba46838b218263f5 Mon Sep 17 00:00:00 2001 From: Thomas Neumann Date: Mon, 15 May 2023 14:59:22 +0200 Subject: fix assert in non-atomic path The non-atomic path does not have range information, we have to adjust the assert handle that case, too. libgcc/ChangeLog: * unwind-dw2-fde.c: Fix assert in non-atomic path. --- libgcc/unwind-dw2-fde.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'libgcc') diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c index 8683a65..a5786bf 100644 --- a/libgcc/unwind-dw2-fde.c +++ b/libgcc/unwind-dw2-fde.c @@ -240,6 +240,7 @@ __deregister_frame_info_bases (const void *begin) // And remove ob = btree_remove (®istered_frames, range[0]); + bool empty_table = (range[1] - range[0]) == 0; #else init_object_mutex_once (); __gthread_mutex_lock (&object_mutex); @@ -276,11 +277,12 @@ __deregister_frame_info_bases (const void *begin) out: __gthread_mutex_unlock (&object_mutex); + const int empty_table = 0; // The non-atomic path stores all tables. #endif // If we didn't find anything in the lookup data structures then they // were either already destroyed or we tried to remove an empty range. - gcc_assert (in_shutdown || ((range[1] - range[0]) == 0 || ob)); + gcc_assert (in_shutdown || (empty_table || ob)); return (void *) ob; } -- cgit v1.1 From 3427b51bccaca9c2df43f67f2dd3860e0e0f48c6 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 16 May 2023 00:17:47 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 1ebcf32..78aa86c 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,11 @@ +2023-05-15 Thomas Neumann + + * unwind-dw2-fde.c: Fix assert in non-atomic path. + +2023-05-15 Sören Tempel + + * unwind-dw2-fde.c: Accept empty ranges when deregistering frames. + 2023-05-02 Patrick O'Neill * config/riscv/atomic.c: Change LR.aq/SC.rl pairs into -- cgit v1.1 From 20b8779ea9bd82b26eeb195b30f695168cd7ae1d Mon Sep 17 00:00:00 2001 From: Iain Sandoe Date: Thu, 11 May 2023 23:24:02 +0100 Subject: Darwin, libgcc : Adjust min version supported for the OS. Tools from later versions of the OS deprecate or fail to support earlier OS revisions. Signed-off-by: Iain Sandoe libgcc/ChangeLog: * config.host: Arrange to set min Darwin OS versions from the configured host version. * config/darwin10-unwind-find-enc-func.c: Do not use current headers, but declare the nexessary structures locally to the versions in use for Mac OSX 10.6. * config/t-darwin: Amend to handle configured min OS versions. * config/t-darwin-min-1: New. * config/t-darwin-min-5: New. * config/t-darwin-min-8: New. --- libgcc/config.host | 18 ++++++++++++++ libgcc/config/darwin10-unwind-find-enc-func.c | 34 +++++++++++++++++++++++---- libgcc/config/t-darwin | 10 ++++---- libgcc/config/t-darwin-min-1 | 3 +++ libgcc/config/t-darwin-min-5 | 3 +++ libgcc/config/t-darwin-min-8 | 3 +++ 6 files changed, 63 insertions(+), 8 deletions(-) create mode 100644 libgcc/config/t-darwin-min-1 create mode 100644 libgcc/config/t-darwin-min-5 create mode 100644 libgcc/config/t-darwin-min-8 (limited to 'libgcc') diff --git a/libgcc/config.host b/libgcc/config.host index b9975de..9d72120 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -233,6 +233,24 @@ case ${host} in ;; esac tmake_file="$tmake_file t-slibgcc-darwin" + # newer toolsets produce warnings when building for unsupported versions. + case ${host} in + *-*-darwin1[89]* | *-*-darwin2* ) + tmake_file="t-darwin-min-8 $tmake_file" + ;; + *-*-darwin9* | *-*-darwin1[0-7]*) + tmake_file="t-darwin-min-5 $tmake_file" + ;; + *-*-darwin[4-8]*) + tmake_file="t-darwin-min-1 $tmake_file" + ;; + *) + # Fall back to configuring for the oldest system known to work with + # all archs and the current sources. + tmake_file="t-darwin-min-5 $tmake_file" + echo "Warning: libgcc configured to support macOS 10.5" 1>&2 + ;; + esac extra_parts="crt3.o libd10-uwfef.a crttms.o crttme.o libemutls_w.a" ;; *-*-dragonfly*) diff --git a/libgcc/config/darwin10-unwind-find-enc-func.c b/libgcc/config/darwin10-unwind-find-enc-func.c index 882ec3a..b08396c 100644 --- a/libgcc/config/darwin10-unwind-find-enc-func.c +++ b/libgcc/config/darwin10-unwind-find-enc-func.c @@ -1,8 +1,34 @@ -#include "tconfig.h" -#include "tsystem.h" -#include "unwind-dw2-fde.h" #include "libgcc_tm.h" +/* This shim is special, it needs to be built for Mac OSX 10.6 + regardless of the current system version. + We must also build it to use the unwinder layout that was + present for 10.6 (and not update that). + So we copy the referenced structures from unwind-dw2-fde.h + to avoid pulling in newer system headers and/or changed + layouts. */ +struct dwarf_eh_bases +{ + void *tbase; + void *dbase; + void *func; +}; + +typedef int sword __attribute__ ((mode (SI))); +typedef unsigned int uword __attribute__ ((mode (SI))); + +/* The first few fields of an FDE. */ +struct dwarf_fde +{ + uword length; + sword CIE_delta; + unsigned char pc_begin[]; +} __attribute__ ((packed, aligned (__alignof__ (void *)))); + +typedef struct dwarf_fde fde; + +extern const fde * _Unwind_Find_FDE (void *, struct dwarf_eh_bases *); + void * _darwin10_Unwind_FindEnclosingFunction (void *pc) { @@ -10,5 +36,5 @@ _darwin10_Unwind_FindEnclosingFunction (void *pc) const struct dwarf_fde *fde = _Unwind_Find_FDE (pc-1, &bases); if (fde) return bases.func; - return NULL; + return (void *) 0; } diff --git a/libgcc/config/t-darwin b/libgcc/config/t-darwin index 299d26c..a3bb70c 100644 --- a/libgcc/config/t-darwin +++ b/libgcc/config/t-darwin @@ -1,15 +1,15 @@ # Set this as a minimum (unless overriden by arch t-files) since it's a # reasonable lowest common denominator that works for all our archs. -HOST_LIBGCC2_CFLAGS += -mmacosx-version-min=10.4 +HOST_LIBGCC2_CFLAGS += $(DARWIN_MIN_LIB_VERSION) crt3.o: $(srcdir)/config/darwin-crt3.c - $(crt_compile) -mmacosx-version-min=10.4 -c $< + $(crt_compile) $(DARWIN_MIN_CRT_VERSION) -c $< crttms.o: $(srcdir)/config/darwin-crt-tm.c - $(crt_compile) -mmacosx-version-min=10.4 -DSTART -c $< + $(crt_compile) $(DARWIN_MIN_CRT_VERSION) -DSTART -c $< crttme.o: $(srcdir)/config/darwin-crt-tm.c - $(crt_compile) -mmacosx-version-min=10.4 -DEND -c $< + $(crt_compile) $(DARWIN_MIN_CRT_VERSION) -DEND -c $< # Make emutls weak so that we can deal with -static-libgcc, override the # hidden visibility when this is present in libgcc_eh. @@ -25,6 +25,8 @@ libemutls_w.a: emutls_s.o $(RANLIB_FOR_TARGET) $@ # Patch to __Unwind_Find_Enclosing_Function for Darwin10. +# This needs to be built for darwin10, regardless of the current platform +# version. d10-uwfef.o: $(srcdir)/config/darwin10-unwind-find-enc-func.c libgcc_tm.h $(crt_compile) -mmacosx-version-min=10.6 -c $< diff --git a/libgcc/config/t-darwin-min-1 b/libgcc/config/t-darwin-min-1 new file mode 100644 index 0000000..8c2cf8a --- /dev/null +++ b/libgcc/config/t-darwin-min-1 @@ -0,0 +1,3 @@ +# Support building with -mmacosx-version-min back to 10.1. +DARWIN_MIN_LIB_VERSION = -mmacosx-version-min=10.4 +DARWIN_MIN_CRT_VERSION = -mmacosx-version-min=10.1 diff --git a/libgcc/config/t-darwin-min-5 b/libgcc/config/t-darwin-min-5 new file mode 100644 index 0000000..1381931 --- /dev/null +++ b/libgcc/config/t-darwin-min-5 @@ -0,0 +1,3 @@ +# Support building with -mmacosx-version-min back to 10.5. +DARWIN_MIN_LIB_VERSION = -mmacosx-version-min=10.5 +DARWIN_MIN_CRT_VERSION = -mmacosx-version-min=10.5 diff --git a/libgcc/config/t-darwin-min-8 b/libgcc/config/t-darwin-min-8 new file mode 100644 index 0000000..9efc9dc --- /dev/null +++ b/libgcc/config/t-darwin-min-8 @@ -0,0 +1,3 @@ +# Support building with -mmacosx-version-min back to 10.8. +DARWIN_MIN_LIB_VERSION = -mmacosx-version-min=10.8 +DARWIN_MIN_CRT_VERSION = -mmacosx-version-min=10.8 -- cgit v1.1 From 5df01f89b7f49c2750de4acf3a9b93a08210c627 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sat, 20 May 2023 00:16:40 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 78aa86c..1d4f3ab 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,16 @@ +2023-05-19 Iain Sandoe + + * config.host: Arrange to set min Darwin OS versions from + the configured host version. + * config/darwin10-unwind-find-enc-func.c: Do not use current + headers, but declare the nexessary structures locally to the + versions in use for Mac OSX 10.6. + * config/t-darwin: Amend to handle configured min OS + versions. + * config/t-darwin-min-1: New. + * config/t-darwin-min-5: New. + * config/t-darwin-min-8: New. + 2023-05-15 Thomas Neumann * unwind-dw2-fde.c: Fix assert in non-atomic path. -- cgit v1.1 From 5cf60b6ba111f4169305c7832b063b000e9ec36a Mon Sep 17 00:00:00 2001 From: Thomas Neumann Date: Tue, 2 May 2023 16:21:09 +0200 Subject: release the sorted FDE array when deregistering a frame [PR109685] The atomic fastpath bypasses the code that releases the sort array which was lazily allocated during unwinding. We now check after deregistering if there is an array to free. libgcc/ChangeLog: PR libgcc/109685 * unwind-dw2-fde.c: Free sort array in atomic fast path. --- libgcc/unwind-dw2-fde.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'libgcc') diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c index a5786bf..32b9e64 100644 --- a/libgcc/unwind-dw2-fde.c +++ b/libgcc/unwind-dw2-fde.c @@ -241,6 +241,12 @@ __deregister_frame_info_bases (const void *begin) // And remove ob = btree_remove (®istered_frames, range[0]); bool empty_table = (range[1] - range[0]) == 0; + + // Deallocate the sort array if any. + if (ob && ob->s.b.sorted) + { + free (ob->u.sort); + } #else init_object_mutex_once (); __gthread_mutex_lock (&object_mutex); -- cgit v1.1 From 38e88d41f50d844f1404172657ef7e8372014ef6 Mon Sep 17 00:00:00 2001 From: Thomas Neumann Date: Wed, 10 May 2023 12:33:49 +0200 Subject: fix radix sort on 32bit platforms [PR109670] The radix sort uses two buffers, a1 for input and a2 for output. After every digit the role of the two buffers is swapped. When terminating the sort early the code made sure the output was in a2. However, when we run out of bits, as can happen on 32bit platforms, the sorted result was in a1, as we had just swapped a1 and a2. This patch fixes the problem by unconditionally having a1 as output after every loop iteration. This bug manifested itself only on 32bit platforms and even then only in some circumstances, as it needs frames where a swap is required due to differences in the top-most byte, which is affected by ASLR. The new logic was validated by exhaustive search over 32bit input values. libgcc/ChangeLog: PR libgcc/109670 * unwind-dw2-fde.c: Fix radix sort buffer management. --- libgcc/unwind-dw2-fde.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'libgcc') diff --git a/libgcc/unwind-dw2-fde.c b/libgcc/unwind-dw2-fde.c index 32b9e64..d7c4a46 100644 --- a/libgcc/unwind-dw2-fde.c +++ b/libgcc/unwind-dw2-fde.c @@ -634,8 +634,6 @@ fde_radixsort (struct object *ob, fde_extractor_t fde_extractor, // Stop if we are already sorted. if (!violations) { - // The sorted data is in a1 now. - a2 = a1; break; } @@ -670,9 +668,9 @@ fde_radixsort (struct object *ob, fde_extractor_t fde_extractor, #undef FANOUT #undef FANOUTBITS - // The data is in a2 now, move in place if needed. - if (a2 != v1->array) - memcpy (v1->array, a2, sizeof (const fde *) * n); + // The data is in a1 now, move in place if needed. + if (a1 != v1->array) + memcpy (v1->array, a1, sizeof (const fde *) * n); } static inline void -- cgit v1.1 From 321cee7e29d1b9fc0b5e0b28f2bb8faa9eb6d3e4 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Sun, 4 Jun 2023 00:16:43 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 1d4f3ab..66e1ee3 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,13 @@ +2023-06-03 Thomas Neumann + + PR libgcc/109670 + * unwind-dw2-fde.c: Fix radix sort buffer management. + +2023-06-03 Thomas Neumann + + PR libgcc/109685 + * unwind-dw2-fde.c: Free sort array in atomic fast path. + 2023-05-19 Iain Sandoe * config.host: Arrange to set min Darwin OS versions from -- cgit v1.1 From 29b74545531f6afbee9fc38c267524326dbfbedf Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Thu, 1 Jun 2023 10:14:24 +0800 Subject: MIPS: Add speculation_barrier support speculation_barrier for MIPS needs sync+jr.hb (r2+), so we implement __speculation_barrier in libgcc, like arm32 does. gcc/ChangeLog: * config/mips/mips-protos.h (mips_emit_speculation_barrier): New prototype. * config/mips/mips.cc (speculation_barrier_libfunc): New static variable. (mips_init_libfuncs): Initialize it. (mips_emit_speculation_barrier): New function. * config/mips/mips.md (speculation_barrier): Call mips_emit_speculation_barrier. libgcc/ChangeLog: * config/mips/lib1funcs.S: New file. define __speculation_barrier and include mips16.S. * config/mips/t-mips: define LIB1ASMSRC as mips/lib1funcs.S. define LIB1ASMFUNCS as _speculation_barrier. set version info for __speculation_barrier. * config/mips/libgcc-mips.ver: New file. * config/mips/t-mips16: don't define LIB1ASMSRC as mips16.S included in lib1funcs.S now. --- libgcc/config/mips/lib1funcs.S | 65 ++++++++++++++++++++++++++++++++++++++ libgcc/config/mips/libgcc-mips.ver | 21 ++++++++++++ libgcc/config/mips/t-mips | 7 ++++ libgcc/config/mips/t-mips16 | 3 +- 4 files changed, 94 insertions(+), 2 deletions(-) create mode 100644 libgcc/config/mips/lib1funcs.S create mode 100644 libgcc/config/mips/libgcc-mips.ver (limited to 'libgcc') diff --git a/libgcc/config/mips/lib1funcs.S b/libgcc/config/mips/lib1funcs.S new file mode 100644 index 0000000..97a3655 --- /dev/null +++ b/libgcc/config/mips/lib1funcs.S @@ -0,0 +1,65 @@ +/* Copyright (C) 2023 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +//#include "mips16.S" + +#ifdef L_speculation_barrier + +/* MIPS16e1 has no sync/jr.hb instructions, and MIPS16e2 lacks of jr.hb. + So, we use normal MIPS code here, just like what we do for __sync_*. */ + .set nomips16 + + .set noreorder + .globl __speculation_barrier + .ent __speculation_barrier + +__speculation_barrier: + .set push +#if __mips >= 2 + sync /* complementation barrier for memory. */ +#elif defined (__linux) + /* MIPS1 has no sync, while Linux can trap&emu sync. */ + /* FIXME: Will somebody use linux/gcc for MIPS1/baremetal? */ + .word 0x0000000f +#endif + + +#if __mips_isa_rev >= 1 + /* Binutils claims that JR in R1 can do same as jr.hb. + R6 changes the encoding of jr.hb. */ + jr.hb $ra /* Jump with instruction hazard barrier. */ +#else + /* ssnop is actually available since R5500, + and it will be decoded as nop on earlier processors. + gas can only recognize it with -march=mips1 since 2.21. + MIPS1 to MIPSr1: R10000 have 7 stage pipeline, + so 8 ssnop is sufficient to block all speculation on all CPUs. */ + .rept 8 + .word 0x00000040 /* The encoding of ssnop. */ + .endr + /* jr.hb will be decoded as jr on earlier processors. */ + .word 0x03e00408 /* The encoding of jr.hb $ra. */ +#endif + .set pop + .end __speculation_barrier + + .set reorder +#endif diff --git a/libgcc/config/mips/libgcc-mips.ver b/libgcc/config/mips/libgcc-mips.ver new file mode 100644 index 0000000..68f8d2b --- /dev/null +++ b/libgcc/config/mips/libgcc-mips.ver @@ -0,0 +1,21 @@ +# Copyright (C) 2023 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +GCC_14.0 { + __speculation_barrier +} diff --git a/libgcc/config/mips/t-mips b/libgcc/config/mips/t-mips index 4fb8e13..d05ef7c 100644 --- a/libgcc/config/mips/t-mips +++ b/libgcc/config/mips/t-mips @@ -7,3 +7,10 @@ softfp_truncations := softfp_exclude_libgcc2 := n LIB2ADD_ST += $(srcdir)/config/mips/lib2funcs.c + + +LIB1ASMSRC = mips/lib1funcs.S +LIB1ASMFUNCS = _speculation_barrier + +# Version these symbols if building libgcc.so. +SHLIB_MAPFILES += $(srcdir)/config/mips/libgcc-mips.ver diff --git a/libgcc/config/mips/t-mips16 b/libgcc/config/mips/t-mips16 index 2bad511..5fd9d60 100644 --- a/libgcc/config/mips/t-mips16 +++ b/libgcc/config/mips/t-mips16 @@ -16,8 +16,7 @@ # along with GCC; see the file COPYING3. If not see # . -LIB1ASMSRC = mips/mips16.S -LIB1ASMFUNCS = _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \ +LIB1ASMFUNCS += _m16addsf3 _m16subsf3 _m16mulsf3 _m16divsf3 \ _m16eqsf2 _m16nesf2 _m16gtsf2 _m16gesf2 _m16lesf2 _m16ltsf2 \ _m16unordsf2 \ _m16fltsisf _m16fix_truncsfsi _m16fltunsisf \ -- cgit v1.1 From 83c3550ee96aa2d0c964bfb6d0f82428561479fd Mon Sep 17 00:00:00 2001 From: Kewen Lin Date: Sun, 4 Jun 2023 22:39:32 -0500 Subject: libgcc: Use initarray section type for .init_stack One of my workmates found there is a warning like: libgcc/config/rs6000/morestack.S:402: Warning: ignoring incorrect section type for .init_array.00000 when compiling libgcc/config/rs6000/morestack.S. Since commit r13-6545 touched that file recently, which was suspected to be responsible for this warning, I did some investigation and found this is a warning staying for a long time. For section .init_stack*, it's preferred to use section type SHT_INIT_ARRAY. So this patch is use "@init_array" to replace "@progbits". Although the warning is trivial, Segher suggested me to post this to fix it, in order to avoid any possible misunderstanding/confusion on the warning. As Alan confirmed, this doesn't require a premise check on if the existing binutils supports "@init_array" or not, "because if you want split-stack to work, you must link with gold, any version of binutils that has gold has an assembler that understands @init_array". (Thanks Alan!) libgcc/ChangeLog: * config/i386/morestack.S: Use @init_array rather than @progbits for section type of section .init_array. * config/rs6000/morestack.S: Likewise. * config/s390/morestack.S: Likewise. --- libgcc/config/i386/morestack.S | 2 +- libgcc/config/rs6000/morestack.S | 2 +- libgcc/config/s390/morestack.S | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'libgcc') diff --git a/libgcc/config/i386/morestack.S b/libgcc/config/i386/morestack.S index 7ae99b5..c822b71 100644 --- a/libgcc/config/i386/morestack.S +++ b/libgcc/config/i386/morestack.S @@ -850,7 +850,7 @@ __morestack_make_guard: # This is ELF specific. #if HAVE_INITFINI_ARRAY_SUPPORT - .section .init_array.00000,"aw",@progbits + .section .init_array.00000,"aw",@init_array #else .section .ctors.65535,"aw",@progbits #endif diff --git a/libgcc/config/rs6000/morestack.S b/libgcc/config/rs6000/morestack.S index f2fea6a..dd1e27c 100644 --- a/libgcc/config/rs6000/morestack.S +++ b/libgcc/config/rs6000/morestack.S @@ -399,7 +399,7 @@ ENTRY0(__morestack_make_guard) # Make __stack_split_initialize a high priority constructor. #if HAVE_INITFINI_ARRAY_SUPPORT - .section .init_array.00000,"aw",@progbits + .section .init_array.00000,"aw",@init_array #else .section .ctors.65535,"aw",@progbits #endif diff --git a/libgcc/config/s390/morestack.S b/libgcc/config/s390/morestack.S index 09a49bb..f52e7a6 100644 --- a/libgcc/config/s390/morestack.S +++ b/libgcc/config/s390/morestack.S @@ -597,7 +597,7 @@ __morestack_make_guard: # Make __stack_split_initialize a high priority constructor. #if HAVE_INITFINI_ARRAY_SUPPORT - .section .init_array.00000,"aw",@progbits + .section .init_array.00000,"aw",@init_array #else .section .ctors.65535,"aw",@progbits #endif -- cgit v1.1 From 14da764809d3bffe9336e72999312ba3fb019dc6 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 6 Jun 2023 00:17:33 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 66e1ee3..f6ff5ce 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,21 @@ +2023-06-05 Kewen Lin + + * config/i386/morestack.S: Use @init_array rather than + @progbits for section type of section .init_array. + * config/rs6000/morestack.S: Likewise. + * config/s390/morestack.S: Likewise. + +2023-06-05 YunQiang Su + + * config/mips/lib1funcs.S: New file. + define __speculation_barrier and include mips16.S. + * config/mips/t-mips: define LIB1ASMSRC as mips/lib1funcs.S. + define LIB1ASMFUNCS as _speculation_barrier. + set version info for __speculation_barrier. + * config/mips/libgcc-mips.ver: New file. + * config/mips/t-mips16: don't define LIB1ASMSRC as mips16.S + included in lib1funcs.S now. + 2023-06-03 Thomas Neumann PR libgcc/109670 -- cgit v1.1 From 49310a993308492348119f4033e4db0bda4fe46a Mon Sep 17 00:00:00 2001 From: Florian Weimer Date: Tue, 6 Jun 2023 11:01:07 +0200 Subject: libgcc: Fix eh_frame fast path in find_fde_tail The eh_frame value is only used by linear_search_fdes, not the binary search directly in find_fde_tail, so the bug is not immediately apparent with most programs. Fixes commit e724b0480bfa5ec04f39be8c7290330b495c59de ("libgcc: Special-case BFD ld unwind table encodings in find_fde_tail"). libgcc/ PR libgcc/109712 * unwind-dw2-fde-dip.c (find_fde_tail): Correct fast path for parsing eh_frame. --- libgcc/unwind-dw2-fde-dip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'libgcc') diff --git a/libgcc/unwind-dw2-fde-dip.c b/libgcc/unwind-dw2-fde-dip.c index 6223f5f..4e0b880 100644 --- a/libgcc/unwind-dw2-fde-dip.c +++ b/libgcc/unwind-dw2-fde-dip.c @@ -403,8 +403,8 @@ find_fde_tail (_Unwind_Ptr pc, BFD ld generates. */ signed value __attribute__ ((mode (SI))); memcpy (&value, p, sizeof (value)); + eh_frame = p + value; p += sizeof (value); - dbase = value; /* No adjustment because pcrel has base 0. */ } else p = read_encoded_value_with_base (hdr->eh_frame_ptr_enc, -- cgit v1.1 From 8c5b136583494532dcf43c213a9fb4ed0d9f6ecd Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Thu, 8 Jun 2023 00:17:20 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index f6ff5ce..0668e26 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,9 @@ +2023-06-07 Florian Weimer + + PR libgcc/109712 + * unwind-dw2-fde-dip.c (find_fde_tail): Correct fast path for + parsing eh_frame. + 2023-06-05 Kewen Lin * config/i386/morestack.S: Use @init_array rather than -- cgit v1.1 From 1ff8ba48a2958b5917653e1bc2ddd5ff22097fe7 Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Fri, 16 Jun 2023 17:48:23 +0100 Subject: amdgcn: Delete inactive libfuncs The HImode libfuncs weren't called and trying to enable them fails because TARGET_PROMOTE_FUNCTION_MODE wants to widen the arguments but the signedness isn't known. libgcc/ChangeLog: * config/gcn/lib2-gcn.h (QItype, UQItype, HItype, UHItype): Delete. (__divhi3, __modhi3, __udivhi3, __umodhi3): Delete. * config/gcn/t-amdgcn: Don't build lib2-divmod-hi.c. * config/gcn/lib2-divmod-hi.c: Removed. --- libgcc/config/gcn/lib2-divmod-hi.c | 117 ------------------------------------- libgcc/config/gcn/lib2-gcn.h | 8 --- libgcc/config/gcn/t-amdgcn | 1 - 3 files changed, 126 deletions(-) delete mode 100644 libgcc/config/gcn/lib2-divmod-hi.c (limited to 'libgcc') diff --git a/libgcc/config/gcn/lib2-divmod-hi.c b/libgcc/config/gcn/lib2-divmod-hi.c deleted file mode 100644 index f4584aab..0000000 --- a/libgcc/config/gcn/lib2-divmod-hi.c +++ /dev/null @@ -1,117 +0,0 @@ -/* Copyright (C) 2012-2023 Free Software Foundation, Inc. - Contributed by Altera and Mentor Graphics, Inc. - -This file is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 3, or (at your option) any -later version. - -This file is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -General Public License for more details. - -Under Section 7 of GPL version 3, you are granted additional -permissions described in the GCC Runtime Library Exception, version -3.1, as published by the Free Software Foundation. - -You should have received a copy of the GNU General Public License and -a copy of the GCC Runtime Library Exception along with this program; -see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -. */ - -#include "lib2-gcn.h" - -/* 16-bit HI divide and modulo as used in gcn. */ - -static UHItype -udivmodhi4 (UHItype num, UHItype den, word_type modwanted) -{ - UHItype bit = 1; - UHItype res = 0; - - while (den < num && bit && !(den & (1L<<15))) - { - den <<=1; - bit <<=1; - } - while (bit) - { - if (num >= den) - { - num -= den; - res |= bit; - } - bit >>=1; - den >>=1; - } - if (modwanted) - return num; - return res; -} - - -HItype -__divhi3 (HItype a, HItype b) -{ - word_type neg = 0; - HItype res; - - if (a < 0) - { - a = -a; - neg = !neg; - } - - if (b < 0) - { - b = -b; - neg = !neg; - } - - res = udivmodhi4 (a, b, 0); - - if (neg) - res = -res; - - return res; -} - - -HItype -__modhi3 (HItype a, HItype b) -{ - word_type neg = 0; - HItype res; - - if (a < 0) - { - a = -a; - neg = 1; - } - - if (b < 0) - b = -b; - - res = udivmodhi4 (a, b, 1); - - if (neg) - res = -res; - - return res; -} - - -UHItype -__udivhi3 (UHItype a, UHItype b) -{ - return udivmodhi4 (a, b, 0); -} - - -UHItype -__umodhi3 (UHItype a, UHItype b) -{ - return udivmodhi4 (a, b, 1); -} - diff --git a/libgcc/config/gcn/lib2-gcn.h b/libgcc/config/gcn/lib2-gcn.h index 645245b..67ad9ba 100644 --- a/libgcc/config/gcn/lib2-gcn.h +++ b/libgcc/config/gcn/lib2-gcn.h @@ -27,10 +27,6 @@ /* Types. */ -typedef char QItype __attribute__ ((mode (QI))); -typedef unsigned char UQItype __attribute__ ((mode (QI))); -typedef short HItype __attribute__ ((mode (HI))); -typedef unsigned short UHItype __attribute__ ((mode (HI))); typedef int SItype __attribute__ ((mode (SI))); typedef unsigned int USItype __attribute__ ((mode (SI))); typedef int DItype __attribute__ ((mode (DI))); @@ -48,10 +44,6 @@ extern SItype __divsi3 (SItype, SItype); extern SItype __modsi3 (SItype, SItype); extern USItype __udivsi3 (USItype, USItype); extern USItype __umodsi3 (USItype, USItype); -extern HItype __divhi3 (HItype, HItype); -extern HItype __modhi3 (HItype, HItype); -extern UHItype __udivhi3 (UHItype, UHItype); -extern UHItype __umodhi3 (UHItype, UHItype); extern SItype __mulsi3 (SItype, SItype); #endif /* LIB2_GCN_H */ diff --git a/libgcc/config/gcn/t-amdgcn b/libgcc/config/gcn/t-amdgcn index 38bde54..e64953e 100644 --- a/libgcc/config/gcn/t-amdgcn +++ b/libgcc/config/gcn/t-amdgcn @@ -1,6 +1,5 @@ LIB2ADD += $(srcdir)/config/gcn/atomic.c \ $(srcdir)/config/gcn/lib2-divmod.c \ - $(srcdir)/config/gcn/lib2-divmod-hi.c \ $(srcdir)/config/gcn/lib2-divmod-di.c \ $(srcdir)/config/gcn/lib2-bswapti2.c \ $(srcdir)/config/gcn/unwind-gcn.c -- cgit v1.1 From d9d6774527bccc5ce0394851aa232f8abdaade4c Mon Sep 17 00:00:00 2001 From: Andrew Stubbs Date: Thu, 27 Apr 2023 15:34:28 +0100 Subject: amdgcn: implement vector div and mod libfuncs Also divmod, but only for scalar modes, for now (because there are no complex int vectors yet). gcc/ChangeLog: * config/gcn/gcn.cc (gcn_expand_divmod_libfunc): New function. (gcn_init_libfuncs): Add div and mod functions for all modes. Add placeholders for divmod functions. (TARGET_EXPAND_DIVMOD_LIBFUNC): Define. libgcc/ChangeLog: * config/gcn/lib2-divmod-di.c: Reimplement like lib2-divmod.c. * config/gcn/lib2-divmod.c: Likewise. * config/gcn/lib2-gcn.h: Add new types and prototypes for all the new vector libfuncs. * config/gcn/t-amdgcn: Add new files. * config/gcn/amdgcn_veclib.h: New file. * config/gcn/lib2-vec_divmod-di.c: New file. * config/gcn/lib2-vec_divmod-hi.c: New file. * config/gcn/lib2-vec_divmod-qi.c: New file. * config/gcn/lib2-vec_divmod.c: New file. gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/predcom-2.c: Avoid vectors on amdgcn. * gcc.dg/unroll-8.c: Likewise. * gcc.dg/vect/slp-26.c: Change expected results on amdgdn. * lib/target-supports.exp (check_effective_target_vect_int_mod): Add amdgcn. (check_effective_target_divmod): Likewise. * gcc.target/gcn/simd-math-3-16.c: New test. * gcc.target/gcn/simd-math-3-2.c: New test. * gcc.target/gcn/simd-math-3-32.c: New test. * gcc.target/gcn/simd-math-3-4.c: New test. * gcc.target/gcn/simd-math-3-8.c: New test. * gcc.target/gcn/simd-math-3-char-16.c: New test. * gcc.target/gcn/simd-math-3-char-2.c: New test. * gcc.target/gcn/simd-math-3-char-32.c: New test. * gcc.target/gcn/simd-math-3-char-4.c: New test. * gcc.target/gcn/simd-math-3-char-8.c: New test. * gcc.target/gcn/simd-math-3-char-run-16.c: New test. * gcc.target/gcn/simd-math-3-char-run-2.c: New test. * gcc.target/gcn/simd-math-3-char-run-32.c: New test. * gcc.target/gcn/simd-math-3-char-run-4.c: New test. * gcc.target/gcn/simd-math-3-char-run-8.c: New test. * gcc.target/gcn/simd-math-3-char-run.c: New test. * gcc.target/gcn/simd-math-3-char.c: New test. * gcc.target/gcn/simd-math-3-long-16.c: New test. * gcc.target/gcn/simd-math-3-long-2.c: New test. * gcc.target/gcn/simd-math-3-long-32.c: New test. * gcc.target/gcn/simd-math-3-long-4.c: New test. * gcc.target/gcn/simd-math-3-long-8.c: New test. * gcc.target/gcn/simd-math-3-long-run-16.c: New test. * gcc.target/gcn/simd-math-3-long-run-2.c: New test. * gcc.target/gcn/simd-math-3-long-run-32.c: New test. * gcc.target/gcn/simd-math-3-long-run-4.c: New test. * gcc.target/gcn/simd-math-3-long-run-8.c: New test. * gcc.target/gcn/simd-math-3-long-run.c: New test. * gcc.target/gcn/simd-math-3-long.c: New test. * gcc.target/gcn/simd-math-3-run-16.c: New test. * gcc.target/gcn/simd-math-3-run-2.c: New test. * gcc.target/gcn/simd-math-3-run-32.c: New test. * gcc.target/gcn/simd-math-3-run-4.c: New test. * gcc.target/gcn/simd-math-3-run-8.c: New test. * gcc.target/gcn/simd-math-3-run.c: New test. * gcc.target/gcn/simd-math-3-short-16.c: New test. * gcc.target/gcn/simd-math-3-short-2.c: New test. * gcc.target/gcn/simd-math-3-short-32.c: New test. * gcc.target/gcn/simd-math-3-short-4.c: New test. * gcc.target/gcn/simd-math-3-short-8.c: New test. * gcc.target/gcn/simd-math-3-short-run-16.c: New test. * gcc.target/gcn/simd-math-3-short-run-2.c: New test. * gcc.target/gcn/simd-math-3-short-run-32.c: New test. * gcc.target/gcn/simd-math-3-short-run-4.c: New test. * gcc.target/gcn/simd-math-3-short-run-8.c: New test. * gcc.target/gcn/simd-math-3-short-run.c: New test. * gcc.target/gcn/simd-math-3-short.c: New test. * gcc.target/gcn/simd-math-3.c: New test. * gcc.target/gcn/simd-math-4-char-run.c: New test. * gcc.target/gcn/simd-math-4-char.c: New test. * gcc.target/gcn/simd-math-4-long-run.c: New test. * gcc.target/gcn/simd-math-4-long.c: New test. * gcc.target/gcn/simd-math-4-run.c: New test. * gcc.target/gcn/simd-math-4-short-run.c: New test. * gcc.target/gcn/simd-math-4-short.c: New test. * gcc.target/gcn/simd-math-4.c: New test. * gcc.target/gcn/simd-math-5-16.c: New test. * gcc.target/gcn/simd-math-5-32.c: New test. * gcc.target/gcn/simd-math-5-4.c: New test. * gcc.target/gcn/simd-math-5-8.c: New test. * gcc.target/gcn/simd-math-5-char-16.c: New test. * gcc.target/gcn/simd-math-5-char-32.c: New test. * gcc.target/gcn/simd-math-5-char-4.c: New test. * gcc.target/gcn/simd-math-5-char-8.c: New test. * gcc.target/gcn/simd-math-5-char-run-16.c: New test. * gcc.target/gcn/simd-math-5-char-run-32.c: New test. * gcc.target/gcn/simd-math-5-char-run-4.c: New test. * gcc.target/gcn/simd-math-5-char-run-8.c: New test. * gcc.target/gcn/simd-math-5-char-run.c: New test. * gcc.target/gcn/simd-math-5-char.c: New test. * gcc.target/gcn/simd-math-5-long-16.c: New test. * gcc.target/gcn/simd-math-5-long-32.c: New test. * gcc.target/gcn/simd-math-5-long-4.c: New test. * gcc.target/gcn/simd-math-5-long-8.c: New test. * gcc.target/gcn/simd-math-5-long-run-16.c: New test. * gcc.target/gcn/simd-math-5-long-run-32.c: New test. * gcc.target/gcn/simd-math-5-long-run-4.c: New test. * gcc.target/gcn/simd-math-5-long-run-8.c: New test. * gcc.target/gcn/simd-math-5-long-run.c: New test. * gcc.target/gcn/simd-math-5-long.c: New test. * gcc.target/gcn/simd-math-5-run-16.c: New test. * gcc.target/gcn/simd-math-5-run-32.c: New test. * gcc.target/gcn/simd-math-5-run-4.c: New test. * gcc.target/gcn/simd-math-5-run-8.c: New test. * gcc.target/gcn/simd-math-5-run.c: New test. * gcc.target/gcn/simd-math-5-short-16.c: New test. * gcc.target/gcn/simd-math-5-short-32.c: New test. * gcc.target/gcn/simd-math-5-short-4.c: New test. * gcc.target/gcn/simd-math-5-short-8.c: New test. * gcc.target/gcn/simd-math-5-short-run-16.c: New test. * gcc.target/gcn/simd-math-5-short-run-32.c: New test. * gcc.target/gcn/simd-math-5-short-run-4.c: New test. * gcc.target/gcn/simd-math-5-short-run-8.c: New test. * gcc.target/gcn/simd-math-5-short-run.c: New test. * gcc.target/gcn/simd-math-5-short.c: New test. * gcc.target/gcn/simd-math-5.c: New test. --- libgcc/config/gcn/amdgcn_veclib.h | 322 +++++++++++++++++++++++++++++++++ libgcc/config/gcn/lib2-divmod-di.c | 105 ++++++++++- libgcc/config/gcn/lib2-divmod.c | 82 +++++---- libgcc/config/gcn/lib2-gcn.h | 114 ++++++++++++ libgcc/config/gcn/lib2-vec_divmod-di.c | 118 ++++++++++++ libgcc/config/gcn/lib2-vec_divmod-hi.c | 118 ++++++++++++ libgcc/config/gcn/lib2-vec_divmod-qi.c | 118 ++++++++++++ libgcc/config/gcn/lib2-vec_divmod.c | 118 ++++++++++++ libgcc/config/gcn/t-amdgcn | 4 + 9 files changed, 1052 insertions(+), 47 deletions(-) create mode 100644 libgcc/config/gcn/amdgcn_veclib.h create mode 100644 libgcc/config/gcn/lib2-vec_divmod-di.c create mode 100644 libgcc/config/gcn/lib2-vec_divmod-hi.c create mode 100644 libgcc/config/gcn/lib2-vec_divmod-qi.c create mode 100644 libgcc/config/gcn/lib2-vec_divmod.c (limited to 'libgcc') diff --git a/libgcc/config/gcn/amdgcn_veclib.h b/libgcc/config/gcn/amdgcn_veclib.h new file mode 100644 index 0000000..15ea20b --- /dev/null +++ b/libgcc/config/gcn/amdgcn_veclib.h @@ -0,0 +1,322 @@ +/* Macro library used to help during conversion of scalar math functions to + vectorized SIMD equivalents on AMD GCN. + + Copyright (C) 2023 Free Software Foundation, Inc. + Contributed by Siemens. + + This file is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 3, or (at your option) any + later version. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +typedef union { + v2sf t_v2sf; + v4sf t_v4sf; + v8sf t_v8sf; + v16sf t_v16sf; + v32sf t_v32sf; + v64sf t_v64sf; + + v2df t_v2df; + v4df t_v4df; + v8df t_v8df; + v16df t_v16df; + v32df t_v32df; + v64df t_v64df; + + v64qi t_v64qi; + v64hi t_v64hi; + + v2si t_v2si; + v4si t_v4si; + v8si t_v8si; + v16si t_v16si; + v32si t_v32si; + v64si t_v64si; + + v64usi t_v64usi; + + v2di t_v2di; + v4di t_v4di; + v8di t_v8di; + v16di t_v16di; + v32di t_v32di; + v64di t_v64di; +} vector_union; + +/* Cast between vectors with a different number of elements, or type. */ + +#define VGPR_CAST(to_t, from) \ +({ \ + to_t __res; \ + __asm__ ("" : "=v"(__res) : "0"(from)); \ + __res; \ +}) + +#define PACK_SI_PAIR(low, high) \ +({ \ + v64udi __res; \ + asm ("v_mov_b32\t%L0, %1\n\t" \ + "v_mov_b32\t%H0, %2" \ + : "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \ + __res; \ + }) + +#define UNPACK_SI_LOW(to_t, pair) VGPR_CAST(to_t, pair) +#define UNPACK_SI_HIGH(to_t, pair) \ +({ \ + to_t __res; \ + asm ("v_mov_b32\t%0, %H1" : "=v"(__res) : "v"(pair), "e"(-1L)); \ + __res; \ + }) + +#define PACK_DI_PAIR(low, high) \ +({ \ + v64uti __res; \ + asm ("v_mov_b32\t%L0, %L1\n\t" \ + "v_mov_b32\t%H0, %H1\n\t" \ + "v_mov_b32\t%J0, %L2\n\t" \ + "v_mov_b32\t%K0, %H2" \ + : "=&v"(__res) : "v0"(low), "v"(high), "e"(-1L)); \ + __res; \ + }) + +#define UNPACK_DI_LOW(to_t, pair) VGPR_CAST(to_t, pair) +#define UNPACK_DI_HIGH(to_t, pair) \ +({ \ + to_t __res; \ + asm ("v_mov_b32\t%L0, %J1\n\t" \ + "v_mov_b32\t%H0, %K1" : "=v"(__res) : "v"(pair), "e"(-1L)); \ + __res; \ + }) + +#define NO_COND __mask + +/* Note - __mask is _not_ accounted for in VECTOR_MERGE! */ +#define VECTOR_MERGE(vec1, vec2, cond) \ +({ \ + _Static_assert (__builtin_types_compatible_p (typeof (vec1), typeof (vec2))); \ + union { \ + typeof (vec1) val; \ + v64qi t_v64qi; \ + v64hi t_v64hi; \ + v64si t_v64si; \ + v64di t_v64di; \ + } __vec1, __vec2, __res; \ + __vec1.val = (vec1); \ + __vec2.val = (vec2); \ + __builtin_choose_expr ( \ + sizeof (vec1) == sizeof (v64si), \ + ({ \ + v64si __bitmask = __builtin_convertvector ((cond), v64si); \ + __res.t_v64si = (__vec1.t_v64si & __bitmask) \ + | (__vec2.t_v64si & ~__bitmask); \ + }), \ + __builtin_choose_expr ( \ + sizeof (vec1) == sizeof (v64hi), \ + ({ \ + v64hi __bitmask = __builtin_convertvector ((cond), v64hi); \ + __res.t_v64hi = (__vec1.t_v64hi & __bitmask) \ + | (__vec2.t_v64hi & ~__bitmask); \ + }), \ + __builtin_choose_expr ( \ + sizeof (vec1) == sizeof (v64qi), \ + ({ \ + v64qi __bitmask = __builtin_convertvector ((cond), v64qi); \ + __res.t_v64qi = (__vec1.t_v64qi & __bitmask) \ + | (__vec2.t_v64qi & ~__bitmask); \ + }), \ + ({ \ + v64di __bitmask = __builtin_convertvector ((cond), v64di); \ + __res.t_v64di = (__vec1.t_v64di & __bitmask) \ + | (__vec2.t_v64di & ~__bitmask); \ + })))); \ + __res.val; \ +}) + +#define VECTOR_COND_MOVE(var, val, cond) \ +do { \ + _Static_assert (__builtin_types_compatible_p (typeof (var), typeof (val))); \ + __auto_type __cond = __builtin_convertvector ((cond), typeof (__mask)); \ + var = VECTOR_MERGE ((val), var, __cond & __mask); \ +} while (0) + +#define VECTOR_IF(cond, cond_var) \ +{ \ + __auto_type cond_var = (cond); \ + __auto_type __inv_cond __attribute__((unused)) = ~cond_var; \ + if (!ALL_ZEROES_P (cond_var)) \ + { + +#define VECTOR_ELSEIF(cond, cond_var) \ + } \ + cond_var = __inv_cond & (cond); \ + __inv_cond &= ~(cond); \ + if (!ALL_ZEROES_P (cond_var)) \ + { + +#define VECTOR_ELSE(cond_var) \ + } \ + cond_var = __inv_cond; \ + if (!ALL_ZEROES_P (cond_var)) \ + { + +#define VECTOR_IF2(cond, cond_var, prev_cond_var) \ +{ \ + __auto_type cond_var = (cond) & __builtin_convertvector (prev_cond_var, typeof (cond)); \ + __auto_type __inv_cond __attribute__((unused)) = ~cond_var; \ + if (!ALL_ZEROES_P (cond_var)) \ + { + +#define VECTOR_ELSEIF2(cond, cond_var, prev_cond_var) \ + } \ + cond_var = (cond) & __inv_cond & __builtin_convertvector (prev_cond_var, typeof (cond)); \ + __inv_cond &= ~(cond); \ + if (!ALL_ZEROES_P (cond_var)) \ + { + +#define VECTOR_ELSE2(cond_var, prev_cond_var) \ + } \ + cond_var = __inv_cond & __builtin_convertvector (prev_cond_var, typeof (__inv_cond)); \ + if (!ALL_ZEROES_P (cond_var)) \ + { + + +#define VECTOR_ENDIF \ + } \ +} + +#define VECTOR_INIT_AUX(x, type) \ +({ \ + typeof (x) __e = (x); \ + type __tmp = { \ + __e, __e, __e, __e, __e, __e, __e, __e, \ + __e, __e, __e, __e, __e, __e, __e, __e, \ + __e, __e, __e, __e, __e, __e, __e, __e, \ + __e, __e, __e, __e, __e, __e, __e, __e, \ + __e, __e, __e, __e, __e, __e, __e, __e, \ + __e, __e, __e, __e, __e, __e, __e, __e, \ + __e, __e, __e, __e, __e, __e, __e, __e, \ + __e, __e, __e, __e, __e, __e, __e, __e }; \ + __tmp; \ +}) + +#define VECTOR_INIT(x) \ + (_Generic ((x), int: VECTOR_INIT_AUX ((x), v64si), \ + unsigned: VECTOR_INIT_AUX ((x), v64usi), \ + char: VECTOR_INIT_AUX ((x), v64qi), \ + unsigned char: VECTOR_INIT_AUX ((x), v64uqi), \ + short: VECTOR_INIT_AUX ((x), v64hi), \ + unsigned short: VECTOR_INIT_AUX ((x), v64uhi), \ + long: VECTOR_INIT_AUX ((x), v64di), \ + unsigned long: VECTOR_INIT_AUX ((x), v64udi), \ + float: VECTOR_INIT_AUX ((x), v64sf), \ + double: VECTOR_INIT_AUX ((x), v64df))) + + +#if defined (__GCN3__) || defined (__GCN5__) \ + || defined (__CDNA1__) || defined (__CDNA2__) +#define CDNA3_PLUS 0 +#else +#define CDNA3_PLUS 1 +#endif + +#define VECTOR_INIT_MASK(COUNT) \ +({ \ + MASKMODE __mask; \ + int count = (COUNT); \ + if (count == 64) \ + { \ + if (sizeof (MASKMODE) < 512 || CDNA3_PLUS) \ + asm ("v_mov%B0\t%0, -1" : "=v"(__mask) : "e"(-1L)); \ + else \ + asm ("v_mov_b32\t%L0, -1\n\t" \ + "v_mov_b32\t%H0, -1" : "=v"(__mask) : "e"(-1L)); \ + } \ + else \ + { \ + long bitmask = (count == 64 ? -1 : (1<= den) + { + num -= den; + res.pair.quot |= bit; + } + bit >>=1; + den >>=1; + } + res.pair.rem = num; + return res.ti; +} + +UTItype +__divmoddi4 (DItype a, DItype b) +{ + word_type nega = 0, negb = 0; + union pack res; + + if (a < 0) + { + a = -a; + nega = 1; + } + + if (b < 0) + { + b = -b; + negb = 1; + } + + res.ti = __udivmoddi4 (a, b); + + if (nega) + res.pair.rem = -res.pair.rem; + if (nega ^ negb) + res.pair.quot = -res.pair.quot; + + return res.ti; +} + + +DItype +__divdi3 (DItype a, DItype b) +{ + union pack u; + u.ti = __divmoddi4 (a, b); + return u.pair.quot; +} + +DItype +__moddi3 (DItype a, DItype b) +{ + union pack u; + u.ti = __divmoddi4 (a, b); + return u.pair.rem; +} + + +UDItype +__udivdi3 (UDItype a, UDItype b) +{ + union pack u; + u.ti = __udivmoddi4 (a, b); + return u.pair.quot; +} + +UDItype +__umoddi3 (UDItype a, UDItype b) +{ + union pack u; + u.ti = __udivmoddi4 (a, b); + return u.pair.rem; +} -#include "libgcc2.c" diff --git a/libgcc/config/gcn/lib2-divmod.c b/libgcc/config/gcn/lib2-divmod.c index c350f78..d701d1a 100644 --- a/libgcc/config/gcn/lib2-divmod.c +++ b/libgcc/config/gcn/lib2-divmod.c @@ -24,11 +24,20 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see /* 32-bit SI divide and modulo as used in gcn. */ -static USItype -udivmodsi4 (USItype num, USItype den, word_type modwanted) +union pack { + UDItype di; + struct {SItype quot, rem;} pair; +}; +union upack { + UDItype di; + struct {USItype quot, rem;} pair; +}; + +UDItype +__udivmodsi4 (USItype num, USItype den) { USItype bit = 1; - USItype res = 0; + union upack res = {0}; while (den < num && bit && !(den & (1L<<31))) { @@ -40,78 +49,75 @@ udivmodsi4 (USItype num, USItype den, word_type modwanted) if (num >= den) { num -= den; - res |= bit; + res.pair.quot |= bit; } bit >>=1; den >>=1; } - if (modwanted) - return num; - return res; + res.pair.rem = num; + return res.di; } - -SItype -__divsi3 (SItype a, SItype b) +UDItype +__divmodsi4 (SItype a, SItype b) { - word_type neg = 0; - SItype res; + word_type nega = 0, negb = 0; + union pack res; if (a < 0) { a = -a; - neg = !neg; + nega = 1; } if (b < 0) { b = -b; - neg = !neg; + negb = 1; } - res = udivmodsi4 (a, b, 0); + res.di = __udivmodsi4 (a, b); - if (neg) - res = -res; + if (nega) + res.pair.rem = -res.pair.rem; + if (nega ^ negb) + res.pair.quot = -res.pair.quot; - return res; + return res.di; } SItype -__modsi3 (SItype a, SItype b) +__divsi3 (SItype a, SItype b) { - word_type neg = 0; - SItype res; - - if (a < 0) - { - a = -a; - neg = 1; - } - - if (b < 0) - b = -b; - - res = udivmodsi4 (a, b, 1); - - if (neg) - res = -res; + union pack u; + u.di = __divmodsi4 (a, b); + return u.pair.quot; +} - return res; +SItype +__modsi3 (SItype a, SItype b) +{ + union pack u; + u.di = __divmodsi4 (a, b); + return u.pair.rem; } USItype __udivsi3 (USItype a, USItype b) { - return udivmodsi4 (a, b, 0); + union pack u; + u.di = __udivmodsi4 (a, b); + return u.pair.quot; } USItype __umodsi3 (USItype a, USItype b) { - return udivmodsi4 (a, b, 1); + union pack u; + u.di = __udivmodsi4 (a, b); + return u.pair.rem; } diff --git a/libgcc/config/gcn/lib2-gcn.h b/libgcc/config/gcn/lib2-gcn.h index 67ad9ba..dc071c0 100644 --- a/libgcc/config/gcn/lib2-gcn.h +++ b/libgcc/config/gcn/lib2-gcn.h @@ -35,15 +35,129 @@ typedef int TItype __attribute__ ((mode (TI))); typedef unsigned int UTItype __attribute__ ((mode (TI))); typedef int word_type __attribute__ ((mode (__word__))); +typedef float v2sf __attribute__ ((vector_size (8))); +typedef float v4sf __attribute__ ((vector_size (16))); +typedef float v8sf __attribute__ ((vector_size (32))); +typedef float v16sf __attribute__ ((vector_size (64))); +typedef float v32sf __attribute__ ((vector_size (128))); +typedef float v64sf __attribute__ ((vector_size (256))); + +typedef double v2df __attribute__ ((vector_size (16))); +typedef double v4df __attribute__ ((vector_size (32))); +typedef double v8df __attribute__ ((vector_size (64))); +typedef double v16df __attribute__ ((vector_size (128))); +typedef double v32df __attribute__ ((vector_size (256))); +typedef double v64df __attribute__ ((vector_size (512))); + +typedef signed char v2qi __attribute__ ((vector_size (2))); +typedef signed char v4qi __attribute__ ((vector_size (4))); +typedef signed char v8qi __attribute__ ((vector_size (8))); +typedef signed char v16qi __attribute__ ((vector_size (16))); +typedef signed char v32qi __attribute__ ((vector_size (32))); +typedef signed char v64qi __attribute__ ((vector_size (64))); + +typedef unsigned char v2uqi __attribute__ ((vector_size (2))); +typedef unsigned char v4uqi __attribute__ ((vector_size (4))); +typedef unsigned char v8uqi __attribute__ ((vector_size (8))); +typedef unsigned char v16uqi __attribute__ ((vector_size (16))); +typedef unsigned char v32uqi __attribute__ ((vector_size (32))); +typedef unsigned char v64uqi __attribute__ ((vector_size (64))); + +typedef short v2hi __attribute__ ((vector_size (4))); +typedef short v4hi __attribute__ ((vector_size (8))); +typedef short v8hi __attribute__ ((vector_size (16))); +typedef short v16hi __attribute__ ((vector_size (32))); +typedef short v32hi __attribute__ ((vector_size (64))); +typedef short v64hi __attribute__ ((vector_size (128))); + +typedef unsigned short v2uhi __attribute__ ((vector_size (4))); +typedef unsigned short v4uhi __attribute__ ((vector_size (8))); +typedef unsigned short v8uhi __attribute__ ((vector_size (16))); +typedef unsigned short v16uhi __attribute__ ((vector_size (32))); +typedef unsigned short v32uhi __attribute__ ((vector_size (64))); +typedef unsigned short v64uhi __attribute__ ((vector_size (128))); + +typedef int v2si __attribute__ ((vector_size (8))); +typedef int v4si __attribute__ ((vector_size (16))); +typedef int v8si __attribute__ ((vector_size (32))); +typedef int v16si __attribute__ ((vector_size (64))); +typedef int v32si __attribute__ ((vector_size (128))); +typedef int v64si __attribute__ ((vector_size (256))); + +typedef unsigned int v2usi __attribute__ ((vector_size (8))); +typedef unsigned int v4usi __attribute__ ((vector_size (16))); +typedef unsigned int v8usi __attribute__ ((vector_size (32))); +typedef unsigned int v16usi __attribute__ ((vector_size (64))); +typedef unsigned int v32usi __attribute__ ((vector_size (128))); +typedef unsigned int v64usi __attribute__ ((vector_size (256))); + +typedef long v2di __attribute__ ((vector_size (16))); +typedef long v4di __attribute__ ((vector_size (32))); +typedef long v8di __attribute__ ((vector_size (64))); +typedef long v16di __attribute__ ((vector_size (128))); +typedef long v32di __attribute__ ((vector_size (256))); +typedef long v64di __attribute__ ((vector_size (512))); + +typedef unsigned long v2udi __attribute__ ((vector_size (16))); +typedef unsigned long v4udi __attribute__ ((vector_size (32))); +typedef unsigned long v8udi __attribute__ ((vector_size (64))); +typedef unsigned long v16udi __attribute__ ((vector_size (128))); +typedef unsigned long v32udi __attribute__ ((vector_size (256))); +typedef unsigned long v64udi __attribute__ ((vector_size (512))); + +typedef UTItype v2uti __attribute__ ((vector_size (32))); +typedef UTItype v4uti __attribute__ ((vector_size (64))); +typedef UTItype v8uti __attribute__ ((vector_size (128))); +typedef UTItype v16uti __attribute__ ((vector_size (256))); +typedef UTItype v32uti __attribute__ ((vector_size (512))); +typedef UTItype v64uti __attribute__ ((vector_size (1024))); + /* Exported functions. */ extern DItype __divdi3 (DItype, DItype); extern DItype __moddi3 (DItype, DItype); +extern UTItype __divmoddi4 (DItype, DItype); extern UDItype __udivdi3 (UDItype, UDItype); extern UDItype __umoddi3 (UDItype, UDItype); +extern UTItype __udivmoddi4 (UDItype, UDItype); extern SItype __divsi3 (SItype, SItype); extern SItype __modsi3 (SItype, SItype); +extern UDItype __divmodsi4 (SItype, SItype); extern USItype __udivsi3 (USItype, USItype); extern USItype __umodsi3 (USItype, USItype); +extern UDItype __udivmodsi4 (USItype, USItype); extern SItype __mulsi3 (SItype, SItype); +#define VECTOR_PROTOTYPES(SIZE) \ + extern v##SIZE##qi __divv##SIZE##qi3 (v##SIZE##qi, v##SIZE##qi); \ + extern v##SIZE##qi __modv##SIZE##qi3 (v##SIZE##qi, v##SIZE##qi); \ + extern v##SIZE##udi __divmodv##SIZE##qi4 (v##SIZE##qi, v##SIZE##qi); \ + extern v##SIZE##uqi __udivv##SIZE##qi3 (v##SIZE##uqi, v##SIZE##uqi); \ + extern v##SIZE##uqi __umodv##SIZE##qi3 (v##SIZE##uqi, v##SIZE##uqi); \ + extern v##SIZE##udi __udivmodv##SIZE##qi4 (v##SIZE##uqi, v##SIZE##uqi); \ + extern v##SIZE##hi __divv##SIZE##hi3 (v##SIZE##hi, v##SIZE##hi); \ + extern v##SIZE##hi __modv##SIZE##hi3 (v##SIZE##hi, v##SIZE##hi); \ + extern v##SIZE##udi __divmodv##SIZE##hi4 (v##SIZE##hi, v##SIZE##hi); \ + extern v##SIZE##uhi __udivv##SIZE##hi3 (v##SIZE##uhi, v##SIZE##uhi); \ + extern v##SIZE##uhi __umodv##SIZE##hi3 (v##SIZE##uhi, v##SIZE##uhi); \ + extern v##SIZE##udi __udivmodv##SIZE##hi4 (v##SIZE##uhi, v##SIZE##uhi); \ + extern v##SIZE##si __divv##SIZE##si3 (v##SIZE##si, v##SIZE##si); \ + extern v##SIZE##si __modv##SIZE##si3 (v##SIZE##si, v##SIZE##si); \ + extern v##SIZE##udi __divmodv##SIZE##si4 (v##SIZE##si, v##SIZE##si); \ + extern v##SIZE##usi __udivv##SIZE##si3 (v##SIZE##usi, v##SIZE##usi); \ + extern v##SIZE##usi __umodv##SIZE##si3 (v##SIZE##usi, v##SIZE##usi); \ + extern v##SIZE##udi __udivmodv##SIZE##si4 (v##SIZE##usi, v##SIZE##usi); \ + extern v##SIZE##di __divv##SIZE##di3 (v##SIZE##di, v##SIZE##di); \ + extern v##SIZE##di __modv##SIZE##di3 (v##SIZE##di, v##SIZE##di); \ + extern v##SIZE##uti __divmodv##SIZE##di4 (v##SIZE##di, v##SIZE##di); \ + extern v##SIZE##udi __udivv##SIZE##di3 (v##SIZE##udi, v##SIZE##udi); \ + extern v##SIZE##udi __umodv##SIZE##di3 (v##SIZE##udi, v##SIZE##udi); \ + extern v##SIZE##uti __udivmodv##SIZE##di4 (v##SIZE##udi, v##SIZE##udi); +VECTOR_PROTOTYPES (2) +VECTOR_PROTOTYPES (4) +VECTOR_PROTOTYPES (8) +VECTOR_PROTOTYPES (16) +VECTOR_PROTOTYPES (32) +VECTOR_PROTOTYPES (64) +#undef VECTOR_PROTOTYPES + #endif /* LIB2_GCN_H */ diff --git a/libgcc/config/gcn/lib2-vec_divmod-di.c b/libgcc/config/gcn/lib2-vec_divmod-di.c new file mode 100644 index 0000000..8f4a035 --- /dev/null +++ b/libgcc/config/gcn/lib2-vec_divmod-di.c @@ -0,0 +1,118 @@ +/* Copyright (C) 2012-2023 Free Software Foundation, Inc. + Contributed by Altera and Mentor Graphics, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "lib2-gcn.h" + +/* 64-bit V64SI divide and modulo as used in gcn. + This is a simple conversion from lib2-divmod.c. */ + +#define MASKMODE v64di +#include "amdgcn_veclib.h" + +static v64uti +__udivmodv64di4_aux (v64udi num, v64udi den, v64di __mask) +{ + v64udi bit = VECTOR_INIT (1UL); + v64udi res = VECTOR_INIT (0UL); + + VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1L<<31)) == 0), + cond, NO_COND) + VECTOR_COND_MOVE (den, den << 1, cond); + VECTOR_COND_MOVE (bit, bit << 1, cond); + VECTOR_ENDWHILE + VECTOR_WHILE (bit != 0, loopcond, NO_COND) + VECTOR_IF2 (num >= den, ifcond, loopcond) + VECTOR_COND_MOVE (num, num - den, ifcond); + VECTOR_COND_MOVE (res, res | bit, ifcond); + VECTOR_ENDIF + VECTOR_COND_MOVE (bit, bit >> 1, loopcond); + VECTOR_COND_MOVE (den, den >> 1, loopcond); + VECTOR_ENDWHILE + + return PACK_DI_PAIR (res, num); +} + +static v64uti +__divmodv64di4_aux (v64di a, v64di b, v64di __mask) +{ + v64di nega = VECTOR_INIT (0L); + v64di negb = VECTOR_INIT (0L); + + VECTOR_IF (a < 0, cond) + VECTOR_COND_MOVE (a, -a, cond); + nega = cond; + VECTOR_ENDIF + + VECTOR_IF (b < 0, cond) + VECTOR_COND_MOVE (b, -b, cond); + negb = cond; + VECTOR_ENDIF + + v64udi ua = __builtin_convertvector (a, v64udi); + v64udi ub = __builtin_convertvector (b, v64udi); + v64uti pair = __udivmodv64di4_aux (ua, ub, __mask); + + v64di quot = UNPACK_DI_LOW (v64di, pair); + v64di rem = UNPACK_DI_HIGH (v64di, pair); + VECTOR_COND_MOVE (quot, -quot, nega ^ negb); + VECTOR_COND_MOVE (rem, -rem, nega); + pair = PACK_DI_PAIR (quot, rem); + + return pair; +} + + +static inline v64di +__divv64di3_aux (v64di a, v64di b, v64di __mask) +{ + v64uti pair = __divmodv64di4_aux (a, b, __mask); + return UNPACK_DI_LOW (v64di, pair); +} + +static inline v64di +__modv64di3_aux (v64di a, v64di b, v64di __mask) +{ + v64uti pair = __divmodv64di4_aux (a, b, __mask); + return UNPACK_DI_HIGH (v64di, pair); +} + + +static inline v64udi +__udivv64di3_aux (v64udi a, v64udi b, v64di __mask) +{ + v64uti pair = __udivmodv64di4_aux (a, b, __mask); + return UNPACK_DI_LOW (v64udi, pair); +} + +static inline v64udi +__umodv64di3_aux (v64udi a, v64udi b, v64di __mask) +{ + v64uti pair = __udivmodv64di4_aux (a, b, __mask); + return UNPACK_DI_HIGH (v64udi, pair); +} + +DEF_VARIANTS (__div, di3, di) +DEF_VARIANTS (__mod, di3, di) +DEF_VARIANTS_B (__divmod, di4, uti, di) +DEF_VARIANTS (__udiv, di3, udi) +DEF_VARIANTS (__umod, di3, udi) +DEF_VARIANTS_B (__udivmod, di4, uti, udi) diff --git a/libgcc/config/gcn/lib2-vec_divmod-hi.c b/libgcc/config/gcn/lib2-vec_divmod-hi.c new file mode 100644 index 0000000..175ddf8 --- /dev/null +++ b/libgcc/config/gcn/lib2-vec_divmod-hi.c @@ -0,0 +1,118 @@ +/* Copyright (C) 2012-2023 Free Software Foundation, Inc. + Contributed by Altera and Mentor Graphics, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "lib2-gcn.h" + +/* 16-bit V64HI divide and modulo as used in gcn. + This is a simple conversion from lib2-divmod.c. */ + +#define MASKMODE v64hi +#include "amdgcn_veclib.h" + +static v64udi +__udivmodv64hi4_aux (v64uhi num, v64uhi den, v64hi __mask) +{ + v64uhi bit = VECTOR_INIT ((unsigned short)1U); + v64uhi res = VECTOR_INIT ((unsigned short)0U); + + VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1L<<15)) == 0), + cond, NO_COND) + VECTOR_COND_MOVE (den, den << 1, cond); + VECTOR_COND_MOVE (bit, bit << 1, cond); + VECTOR_ENDWHILE + VECTOR_WHILE (bit != 0, loopcond, NO_COND) + VECTOR_IF2 (num >= den, ifcond, loopcond) + VECTOR_COND_MOVE (num, num - den, ifcond); + VECTOR_COND_MOVE (res, res | bit, ifcond); + VECTOR_ENDIF + VECTOR_COND_MOVE (bit, bit >> 1, loopcond); + VECTOR_COND_MOVE (den, den >> 1, loopcond); + VECTOR_ENDWHILE + + return PACK_SI_PAIR (res, num); +} + +static v64udi +__divmodv64hi4_aux (v64hi a, v64hi b, v64hi __mask) +{ + v64hi nega = VECTOR_INIT ((short)0); + v64hi negb = VECTOR_INIT ((short)0); + + VECTOR_IF (a < 0, cond) + VECTOR_COND_MOVE (a, -a, cond); + nega = cond; + VECTOR_ENDIF + + VECTOR_IF (b < 0, cond) + VECTOR_COND_MOVE (b, -b, cond); + negb = cond; + VECTOR_ENDIF + + v64uhi ua = __builtin_convertvector (a, v64uhi); + v64uhi ub = __builtin_convertvector (b, v64uhi); + v64udi pair = __udivmodv64hi4_aux (ua, ub, __mask); + + v64hi quot = UNPACK_SI_LOW (v64hi, pair); + v64hi rem = UNPACK_SI_HIGH (v64hi, pair); + VECTOR_COND_MOVE (quot, -quot, nega ^ negb); + VECTOR_COND_MOVE (rem, -rem, nega); + pair = PACK_SI_PAIR (quot, rem); + + return pair; +} + + +static inline v64hi +__divv64hi3_aux (v64hi a, v64hi b, v64hi __mask) +{ + v64udi pair = __divmodv64hi4_aux (a, b, __mask); + return UNPACK_SI_LOW (v64hi, pair); +} + +static inline v64hi +__modv64hi3_aux (v64hi a, v64hi b, v64hi __mask) +{ + v64udi pair = __divmodv64hi4_aux (a, b, __mask); + return UNPACK_SI_HIGH (v64hi, pair); +} + + +static inline v64uhi +__udivv64hi3_aux (v64uhi a, v64uhi b, v64hi __mask) +{ + v64udi pair = __udivmodv64hi4_aux (a, b, __mask); + return UNPACK_SI_LOW (v64uhi, pair); +} + +static inline v64uhi +__umodv64hi3_aux (v64uhi a, v64uhi b, v64hi __mask) +{ + v64udi pair = __udivmodv64hi4_aux (a, b, __mask); + return UNPACK_SI_HIGH (v64uhi, pair); +} + +DEF_VARIANTS (__div, hi3, hi) +DEF_VARIANTS (__mod, hi3, hi) +DEF_VARIANTS_B (__divmod, hi4, udi, hi) +DEF_VARIANTS (__udiv, hi3, uhi) +DEF_VARIANTS (__umod, hi3, uhi) +DEF_VARIANTS_B (__udivmod, hi4, udi, uhi) diff --git a/libgcc/config/gcn/lib2-vec_divmod-qi.c b/libgcc/config/gcn/lib2-vec_divmod-qi.c new file mode 100644 index 0000000..ff6b5c2 --- /dev/null +++ b/libgcc/config/gcn/lib2-vec_divmod-qi.c @@ -0,0 +1,118 @@ +/* Copyright (C) 2012-2023 Free Software Foundation, Inc. + Contributed by Altera and Mentor Graphics, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "lib2-gcn.h" + +/* 8-bit V64QI divide and modulo as used in gcn. + This is a simple conversion from lib2-divmod.c. */ + +#define MASKMODE v64qi +#include "amdgcn_veclib.h" + +static v64udi +__udivmodv64qi4_aux (v64uqi num, v64uqi den, v64qi __mask) +{ + v64uqi bit = VECTOR_INIT ((unsigned char)1U); + v64uqi res = VECTOR_INIT ((unsigned char)0U); + + VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1<<7)) == 0), + cond, NO_COND) + VECTOR_COND_MOVE (den, den << 1, cond); + VECTOR_COND_MOVE (bit, bit << 1, cond); + VECTOR_ENDWHILE + VECTOR_WHILE (bit != 0, loopcond, NO_COND) + VECTOR_IF2 (num >= den, ifcond, loopcond) + VECTOR_COND_MOVE (num, num - den, ifcond); + VECTOR_COND_MOVE (res, res | bit, ifcond); + VECTOR_ENDIF + VECTOR_COND_MOVE (bit, bit >> 1, loopcond); + VECTOR_COND_MOVE (den, den >> 1, loopcond); + VECTOR_ENDWHILE + + return PACK_SI_PAIR (res, num); +} + +static v64udi +__divmodv64qi4_aux (v64qi a, v64qi b, v64qi __mask) +{ + v64qi nega = VECTOR_INIT ((char)0); + v64qi negb = VECTOR_INIT ((char)0); + + VECTOR_IF (a < 0, cond) + VECTOR_COND_MOVE (a, -a, cond); + nega = cond; + VECTOR_ENDIF + + VECTOR_IF (b < 0, cond) + VECTOR_COND_MOVE (b, -b, cond); + negb = cond; + VECTOR_ENDIF + + v64uqi ua = __builtin_convertvector (a, v64uqi); + v64uqi ub = __builtin_convertvector (b, v64uqi); + v64udi pair = __udivmodv64qi4_aux (ua, ub, __mask); + + v64qi quot = UNPACK_SI_LOW (v64qi, pair); + v64qi rem = UNPACK_SI_HIGH (v64qi, pair); + VECTOR_COND_MOVE (quot, -quot, nega ^ negb); + VECTOR_COND_MOVE (rem, -rem, nega); + pair = PACK_SI_PAIR (quot, rem); + + return pair; +} + + +static inline v64qi +__divv64qi3_aux (v64qi a, v64qi b, v64qi __mask) +{ + v64udi pair = __divmodv64qi4_aux (a, b, __mask); + return UNPACK_SI_LOW (v64qi, pair); +} + +static inline v64qi +__modv64qi3_aux (v64qi a, v64qi b, v64qi __mask) +{ + v64udi pair = __divmodv64qi4_aux (a, b, __mask); + return UNPACK_SI_HIGH (v64qi, pair); +} + + +static inline v64uqi +__udivv64qi3_aux (v64uqi a, v64uqi b, v64qi __mask) +{ + v64udi pair = __udivmodv64qi4_aux (a, b, __mask); + return UNPACK_SI_LOW (v64uqi, pair); +} + +static inline v64uqi +__umodv64qi3_aux (v64uqi a, v64uqi b, v64qi __mask) +{ + v64udi pair = __udivmodv64qi4_aux (a, b, __mask); + return UNPACK_SI_HIGH (v64uqi, pair); +} + +DEF_VARIANTS (__div, qi3, qi) +DEF_VARIANTS (__mod, qi3, qi) +DEF_VARIANTS_B (__divmod, qi4, udi, qi) +DEF_VARIANTS (__udiv, qi3, uqi) +DEF_VARIANTS (__umod, qi3, uqi) +DEF_VARIANTS_B (__udivmod, qi4, udi, uqi) diff --git a/libgcc/config/gcn/lib2-vec_divmod.c b/libgcc/config/gcn/lib2-vec_divmod.c new file mode 100644 index 0000000..e166766 --- /dev/null +++ b/libgcc/config/gcn/lib2-vec_divmod.c @@ -0,0 +1,118 @@ +/* Copyright (C) 2012-2023 Free Software Foundation, Inc. + Contributed by Altera and Mentor Graphics, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 3, or (at your option) any +later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +. */ + +#include "lib2-gcn.h" + +/* 32-bit V64SI divide and modulo as used in gcn. + This is a simple conversion from lib2-divmod.c. */ + +#define MASKMODE v64si +#include "amdgcn_veclib.h" + +static v64udi +__udivmodv64si4_aux (v64usi num, v64usi den, v64si __mask) +{ + v64usi bit = VECTOR_INIT (1U); + v64usi res = VECTOR_INIT (0U); + + VECTOR_WHILE ((den < num) & (bit != 0) & ((den & (1L<<31)) == 0), + cond, NO_COND) + VECTOR_COND_MOVE (den, den << 1, cond); + VECTOR_COND_MOVE (bit, bit << 1, cond); + VECTOR_ENDWHILE + VECTOR_WHILE (bit != 0, loopcond, NO_COND) + VECTOR_IF2 (num >= den, ifcond, loopcond) + VECTOR_COND_MOVE (num, num - den, ifcond); + VECTOR_COND_MOVE (res, res | bit, ifcond); + VECTOR_ENDIF + VECTOR_COND_MOVE (bit, bit >> 1, loopcond); + VECTOR_COND_MOVE (den, den >> 1, loopcond); + VECTOR_ENDWHILE + + return PACK_SI_PAIR (res, num); +} + +static v64udi +__divmodv64si4_aux (v64si a, v64si b, v64si __mask) +{ + v64si nega = VECTOR_INIT (0); + v64si negb = VECTOR_INIT (0); + + VECTOR_IF (a < 0, cond) + VECTOR_COND_MOVE (a, -a, cond); + nega = cond; + VECTOR_ENDIF + + VECTOR_IF (b < 0, cond) + VECTOR_COND_MOVE (b, -b, cond); + negb = cond; + VECTOR_ENDIF + + v64usi ua = __builtin_convertvector (a, v64usi); + v64usi ub = __builtin_convertvector (b, v64usi); + v64udi pair = __udivmodv64si4_aux (ua, ub, __mask); + + v64si quot = UNPACK_SI_LOW (v64si, pair); + v64si rem = UNPACK_SI_HIGH (v64si, pair); + VECTOR_COND_MOVE (quot, -quot, nega ^ negb); + VECTOR_COND_MOVE (rem, -rem, nega); + pair = PACK_SI_PAIR (quot, rem); + + return pair; +} + + +static inline v64si +__divv64si3_aux (v64si a, v64si b, v64si __mask) +{ + v64udi pair = __divmodv64si4_aux (a, b, __mask); + return UNPACK_SI_LOW (v64si, pair); +} + +static inline v64si +__modv64si3_aux (v64si a, v64si b, v64si __mask) +{ + v64udi pair = __divmodv64si4_aux (a, b, __mask); + return UNPACK_SI_HIGH (v64si, pair); +} + + +static inline v64usi +__udivv64si3_aux (v64usi a, v64usi b, v64si __mask) +{ + v64udi pair = __udivmodv64si4_aux (a, b, __mask); + return UNPACK_SI_LOW (v64usi, pair); +} + +static inline v64usi +__umodv64si3_aux (v64usi a, v64usi b, v64si __mask) +{ + v64udi pair = __udivmodv64si4_aux (a, b, __mask); + return UNPACK_SI_HIGH (v64usi, pair); +} + +DEF_VARIANTS (__div, si3, si) +DEF_VARIANTS (__mod, si3, si) +DEF_VARIANTS_B (__divmod, si4, udi, si) +DEF_VARIANTS (__udiv, si3, usi) +DEF_VARIANTS (__umod, si3, usi) +DEF_VARIANTS_B (__udivmod, si4, udi, usi) diff --git a/libgcc/config/gcn/t-amdgcn b/libgcc/config/gcn/t-amdgcn index e64953e..d1d9a4f 100644 --- a/libgcc/config/gcn/t-amdgcn +++ b/libgcc/config/gcn/t-amdgcn @@ -1,6 +1,10 @@ LIB2ADD += $(srcdir)/config/gcn/atomic.c \ $(srcdir)/config/gcn/lib2-divmod.c \ $(srcdir)/config/gcn/lib2-divmod-di.c \ + $(srcdir)/config/gcn/lib2-vec_divmod.c \ + $(srcdir)/config/gcn/lib2-vec_divmod-qi.c \ + $(srcdir)/config/gcn/lib2-vec_divmod-hi.c \ + $(srcdir)/config/gcn/lib2-vec_divmod-di.c \ $(srcdir)/config/gcn/lib2-bswapti2.c \ $(srcdir)/config/gcn/unwind-gcn.c -- cgit v1.1 From e517d3f57145d4e745318ae674e7febad41cdc69 Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 20 Jun 2023 00:17:14 +0000 Subject: Daily bump. --- libgcc/ChangeLog | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'libgcc') diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog index 0668e26..680a7b2 100644 --- a/libgcc/ChangeLog +++ b/libgcc/ChangeLog @@ -1,3 +1,23 @@ +2023-06-19 Andrew Stubbs + + * config/gcn/lib2-divmod-di.c: Reimplement like lib2-divmod.c. + * config/gcn/lib2-divmod.c: Likewise. + * config/gcn/lib2-gcn.h: Add new types and prototypes for all the + new vector libfuncs. + * config/gcn/t-amdgcn: Add new files. + * config/gcn/amdgcn_veclib.h: New file. + * config/gcn/lib2-vec_divmod-di.c: New file. + * config/gcn/lib2-vec_divmod-hi.c: New file. + * config/gcn/lib2-vec_divmod-qi.c: New file. + * config/gcn/lib2-vec_divmod.c: New file. + +2023-06-19 Andrew Stubbs + + * config/gcn/lib2-gcn.h (QItype, UQItype, HItype, UHItype): Delete. + (__divhi3, __modhi3, __udivhi3, __umodhi3): Delete. + * config/gcn/t-amdgcn: Don't build lib2-divmod-hi.c. + * config/gcn/lib2-divmod-hi.c: Removed. + 2023-06-07 Florian Weimer PR libgcc/109712 -- cgit v1.1