aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndreas Schwab <schwab@suse.de>2025-06-05 10:24:46 +0200
committerAndreas Schwab <schwab@suse.de>2025-06-18 09:29:10 +0200
commiteae5bb0f60205e6f709803cc6bba749daf5ece72 (patch)
treee51fd527b824ac0933b602a43e65a8266f8b73fb
parentc274c3c07f216791d2065e653e418eb36c6dcaca (diff)
downloadglibc-eae5bb0f60205e6f709803cc6bba749daf5ece72.zip
glibc-eae5bb0f60205e6f709803cc6bba749daf5ece72.tar.gz
glibc-eae5bb0f60205e6f709803cc6bba749daf5ece72.tar.bz2
powerpc: Remove assembler workarounds
Now that we require at least binutils 2.39 the support for POWER9 and POWER10 instructions can be assumed.
-rw-r--r--sysdeps/powerpc/powerpc64/le/power10/memcmp.S20
-rw-r--r--sysdeps/powerpc/powerpc64/le/power10/strlen.S28
-rw-r--r--sysdeps/powerpc/powerpc64/le/power9/strcmp.S44
-rw-r--r--sysdeps/powerpc/powerpc64/le/power9/strncmp.S43
4 files changed, 34 insertions, 101 deletions
diff --git a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
index f32dc38..9c1a41b 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/memcmp.S
@@ -18,26 +18,10 @@
#include <sysdep.h>
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
- >= 2.35. This is used to keep compatibility with older versions. */
-#define VEXTRACTBM(rt,vrb) \
- .long(((4)<<(32-6)) \
- | ((rt)<<(32-11)) \
- | ((8)<<(32-16)) \
- | ((vrb)<<(32-21)) \
- | 1602)
-
-#define LXVP(xtp,dq,ra) \
- .long(((6)<<(32-6)) \
- | ((((xtp)-32)>>1)<<(32-10)) \
- | ((1)<<(32-11)) \
- | ((ra)<<(32-16)) \
- | dq)
-
/* Compare 32 bytes. */
#define COMPARE_32(vr1,vr2,offset,tail_1,tail_2)\
- LXVP(32+vr1,offset,r3); \
- LXVP(32+vr2,offset,r4); \
+ lxvp 32+vr1,offset(r3); \
+ lxvp 32+vr2,offset(r4); \
vcmpneb. v5,vr1+1,vr2+1; \
bne cr6,L(tail_2); \
vcmpneb. v4,vr1,vr2; \
diff --git a/sysdeps/powerpc/powerpc64/le/power10/strlen.S b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
index 4985a92..74f572c 100644
--- a/sysdeps/powerpc/powerpc64/le/power10/strlen.S
+++ b/sysdeps/powerpc/powerpc64/le/power10/strlen.S
@@ -63,22 +63,6 @@
blr
#endif /* USE_AS_RAWMEMCHR */
-/* TODO: Replace macros by the actual instructions when minimum binutils becomes
- >= 2.35. This is used to keep compatibility with older versions. */
-#define VEXTRACTBM(rt,vrb) \
- .long(((4)<<(32-6)) \
- | ((rt)<<(32-11)) \
- | ((8)<<(32-16)) \
- | ((vrb)<<(32-21)) \
- | 1602)
-
-#define LXVP(xtp,dq,ra) \
- .long(((6)<<(32-6)) \
- | ((((xtp)-32)>>1)<<(32-10)) \
- | ((1)<<(32-11)) \
- | ((ra)<<(32-16)) \
- | dq)
-
#define CHECK16(vreg,offset,addr,label) \
lxv vreg+32,offset(addr); \
vcmpequb. vreg,vreg,v18; \
@@ -88,8 +72,8 @@
of bytes already checked. */
#define CHECK64(offset,addr,label) \
li r6,offset; \
- LXVP(v4+32,offset,addr); \
- LXVP(v6+32,offset+32,addr); \
+ lxvp v4+32,offset(addr); \
+ lxvp v6+32,offset+32(addr); \
RAWMEMCHR_SUBTRACT_VECTORS; \
vminub v14,v4,v5; \
vminub v15,v6,v7; \
@@ -234,10 +218,10 @@ L(tail_64b):
add r5,r5,r6
/* Extract first bit of each byte. */
- VEXTRACTBM(r7,v1)
- VEXTRACTBM(r8,v2)
- VEXTRACTBM(r9,v3)
- VEXTRACTBM(r10,v4)
+ vextractbm r7,v1
+ vextractbm r8,v2
+ vextractbm r9,v3
+ vextractbm r10,v4
/* Shift each value into their corresponding position. */
sldi r8,r8,16
diff --git a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
index 83b21c6..f0cde81 100644
--- a/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power9/strcmp.S
@@ -28,21 +28,6 @@
The implementation uses unaligned doubleword access for first 32 bytes
as in POWER8 patch and uses vectorised loops after that. */
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
- to 2.27. Macros are defined below for these newer instructions in order
- to maintain compatibility. */
-#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
-
-#define VEXTUBRX(t,a,b) .long (0x1000070d \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
-#define VCMPNEZB(t,a,b) .long (0x10000507 \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
/* Get 16 bytes for unaligned case.
reg1: Vector to hold next 16 bytes.
reg2: Address to read from.
@@ -61,10 +46,7 @@
2: \
vperm reg1, v9, reg1, reg3;
-/* TODO: change this to .machine power9 when the minimum required binutils
- allows it. */
-
- .machine power7
+ .machine power9
ENTRY_TOCLESS (STRCMP, 4)
li r0, 0
@@ -116,7 +98,7 @@ L(align):
/* Both s1 and s2 are unaligned. */
GET16BYTES(v4, r7, v10)
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
beq cr6, L(match)
b L(different)
@@ -136,28 +118,28 @@ L(match):
L(s1_align):
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, r7, r0
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
beq cr6, L(s1_align)
@@ -167,37 +149,37 @@ L(s1_align):
L(aligned):
lvx v4, 0, r7
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, 0, r7
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, 0, r7
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
bne cr6, L(different)
lvx v4, 0, r7
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
addi r7, r7, 16
addi r4, r4, 16
beq cr6, L(aligned)
/* Calculate and return the difference. */
L(different):
- VCTZLSBB(r6, v7)
- VEXTUBRX(r5, r6, v4)
- VEXTUBRX(r4, r6, v5)
+ vctzlsbb r6, v7
+ vextubrx r5, r6, v4
+ vextubrx r4, r6, v5
subf r3, r4, r5
extsw r3, r3
blr
diff --git a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
index 60c74ab..5a25f94 100644
--- a/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
+++ b/sysdeps/powerpc/powerpc64/le/power9/strncmp.S
@@ -29,21 +29,6 @@
# define STRNCMP strncmp
#endif
-/* TODO: Change this to actual instructions when minimum binutils is upgraded
- to 2.27. Macros are defined below for these newer instructions in order
- to maintain compatibility. */
-#define VCTZLSBB(r,v) .long (0x10010602 | ((r)<<(32-11)) | ((v)<<(32-21)))
-
-#define VEXTUBRX(t,a,b) .long (0x1000070d \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
-#define VCMPNEZB(t,a,b) .long (0x10000507 \
- | ((t)<<(32-11)) \
- | ((a)<<(32-16)) \
- | ((b)<<(32-21)) )
-
/* Get 16 bytes for unaligned case.
reg1: Vector to hold next 16 bytes.
reg2: Address to read from.
@@ -64,9 +49,7 @@
2: \
vperm reg1, v9, reg1, reg3;
-/* TODO: change this to .machine power9 when minimum binutils
- is upgraded to 2.27. */
- .machine power7
+ .machine power9
ENTRY_TOCLESS (STRNCMP, 4)
/* Check if size is 0. */
cmpdi cr0, r5, 0
@@ -163,7 +146,7 @@ L(align):
clrldi r6, r3, 60
subfic r11, r6, 16
GET16BYTES(v4, r3, v10)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
beq cr6, L(match)
b L(different)
@@ -186,7 +169,7 @@ L(match):
L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -196,7 +179,7 @@ L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -206,7 +189,7 @@ L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -216,7 +199,7 @@ L(s1_align):
lvx v4, 0, r3
GET16BYTES(v5, r4, v6)
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -228,7 +211,7 @@ L(s1_align):
L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -238,7 +221,7 @@ L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -248,7 +231,7 @@ L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -258,7 +241,7 @@ L(aligned):
lvx v4, 0, r3
lvx v5, 0, r4
- VCMPNEZB(v7, v5, v4)
+ vcmpnezb. v7, v5, v4
bne cr6, L(different)
cmpldi cr7, r5, 16
ble cr7, L(ret0)
@@ -268,11 +251,11 @@ L(aligned):
b L(aligned)
/* Calculate and return the difference. */
L(different):
- VCTZLSBB(r6, v7)
+ vctzlsbb r6, v7
cmplw cr7, r5, r6
ble cr7, L(ret0)
- VEXTUBRX(r5, r6, v4)
- VEXTUBRX(r4, r6, v5)
+ vextubrx r5, r6, v4
+ vextubrx r4, r6, v5
subf r3, r4, r5
extsw r3, r3
blr