aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStefan Liebler <stli@linux.vnet.ibm.com>2017-06-27 17:09:42 +0200
committerStefan Liebler <stli@linux.vnet.ibm.com>2017-06-27 17:09:42 +0200
commit23ea69a9d6e9ab28c66a232b767a800b04eaa938 (patch)
tree539d557f45ce4924699329d64b04b6562bcb7dd8
parentf21f59124d8a4d22b20e7a926462937d89e8988e (diff)
downloadglibc-23ea69a9d6e9ab28c66a232b767a800b04eaa938.zip
glibc-23ea69a9d6e9ab28c66a232b767a800b04eaa938.tar.gz
glibc-23ea69a9d6e9ab28c66a232b767a800b04eaa938.tar.bz2
S390: Use cu41 instruction for converting from utf32 to utf8.
This patch adds an ifunc variant to use the cu instruction on arch12 CPUs. This new ifunc variant can be built if binutils support z13 vector instructions. At runtime, HWCAP_S390_VXE decides if we can use the cu41 instruction. ChangeLog: * sysdeps/s390/utf8-utf32-z9.c (__to_utf8_loop_vx_cu): Use vector and cu41 instruction. * sysdeps/s390/multiarch/utf8-utf32-z9.c: Add __to_utf8_loop_vx_cu in ifunc resolver.
-rw-r--r--ChangeLog7
-rw-r--r--sysdeps/s390/multiarch/utf8-utf32-z9.c8
-rw-r--r--sysdeps/s390/utf8-utf32-z9.c112
3 files changed, 124 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 039366a..dedaa3c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,12 @@
2017-06-27 Stefan Liebler <stli@linux.vnet.ibm.com>
+ * sysdeps/s390/utf8-utf32-z9.c (__to_utf8_loop_vx_cu):
+ Use vector and cu41 instruction.
+ * sysdeps/s390/multiarch/utf8-utf32-z9.c: Add __to_utf8_loop_vx_cu
+ in ifunc resolver.
+
+2017-06-27 Stefan Liebler <stli@linux.vnet.ibm.com>
+
* sysdeps/s390/dl-procinfo.c (_dl_s390_cap_flags):
Add vxd, vxe, gs flag.
* sysdeps/s390/dl-procinfo.h: Add HWCAP_S390_VXD, HWCAP_S390_VXE,
diff --git a/sysdeps/s390/multiarch/utf8-utf32-z9.c b/sysdeps/s390/multiarch/utf8-utf32-z9.c
index faf1f46..0c6d9e9 100644
--- a/sysdeps/s390/multiarch/utf8-utf32-z9.c
+++ b/sysdeps/s390/multiarch/utf8-utf32-z9.c
@@ -41,8 +41,10 @@ s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP,
: FROM_LOOP_DEFAULT);
s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP,
- (HAVE_TO_VX && (hwcap & HWCAP_S390_VX))
- ? TO_LOOP_VX
- : TO_LOOP_DEFAULT);
+ (HAVE_TO_VX_CU && (hwcap & HWCAP_S390_VXE))
+ ? TO_LOOP_VX_CU
+ : (HAVE_TO_VX && (hwcap & HWCAP_S390_VX))
+ ? TO_LOOP_VX
+ : TO_LOOP_DEFAULT);
#include <iconv/skeleton.c>
diff --git a/sysdeps/s390/utf8-utf32-z9.c b/sysdeps/s390/utf8-utf32-z9.c
index 57572fa..3810b65 100644
--- a/sysdeps/s390/utf8-utf32-z9.c
+++ b/sysdeps/s390/utf8-utf32-z9.c
@@ -52,9 +52,11 @@
#if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH
# define HAVE_FROM_VX 1
# define HAVE_TO_VX 1
+# define HAVE_TO_VX_CU 1
#else
# define HAVE_FROM_VX 0
# define HAVE_TO_VX 0
+# define HAVE_TO_VX_CU 0
#endif
#if defined HAVE_S390_VX_GCC_SUPPORT
@@ -863,6 +865,116 @@ gconv_end (struct __gconv_step *data)
# define TO_LOOP_VX NULL
#endif /* HAVE_TO_VX != 1 */
+#if HAVE_TO_VX_CU == 1
+#define BODY_TO_VX_CU \
+ { \
+ register const unsigned char* pInput asm ("8") = inptr; \
+ register size_t inlen asm ("9") = inend - inptr; \
+ register unsigned char* pOutput asm ("10") = outptr; \
+ register size_t outlen asm ("11") = outend - outptr; \
+ unsigned long tmp, tmp2; \
+ asm volatile (".machine push\n\t" \
+ ".machine \"z13\"\n\t" \
+ ".machinemode \"zarch_nohighgprs\"\n\t" \
+ " vleif %%v20,127,0\n\t" /* element 0: 127 */ \
+ " vzero %%v21\n\t" \
+ " vleih %%v21,8192,0\n\t" /* element 0: > */ \
+ " vleih %%v21,-8192,2\n\t" /* element 1: =<> */ \
+ CONVERT_32BIT_SIZE_T ([R_INLEN]) \
+ CONVERT_32BIT_SIZE_T ([R_OUTLEN]) \
+ /* Loop which handles UTF-32 chars <= 0x7f. */ \
+ "0: clgijl %[R_INLEN],64,20f\n\t" \
+ " clgijl %[R_OUTLEN],16,20f\n\t" \
+ "1: vlm %%v16,%%v19,0(%[R_IN])\n\t" \
+ " lghi %[R_TMP],0\n\t" \
+ /* Shorten to byte values. */ \
+ " vpkf %%v23,%%v16,%%v17\n\t" \
+ " vpkf %%v24,%%v18,%%v19\n\t" \
+ " vpkh %%v23,%%v23,%%v24\n\t" \
+ /* Checking for values > 0x7f. */ \
+ " vstrcfs %%v22,%%v16,%%v20,%%v21\n\t" \
+ " jno 10f\n\t" \
+ " vstrcfs %%v22,%%v17,%%v20,%%v21\n\t" \
+ " jno 11f\n\t" \
+ " vstrcfs %%v22,%%v18,%%v20,%%v21\n\t" \
+ " jno 12f\n\t" \
+ " vstrcfs %%v22,%%v19,%%v20,%%v21\n\t" \
+ " jno 13f\n\t" \
+ /* Store 16bytes to outptr. */ \
+ " vst %%v23,0(%[R_OUT])\n\t" \
+ " aghi %[R_INLEN],-64\n\t" \
+ " aghi %[R_OUTLEN],-16\n\t" \
+ " la %[R_IN],64(%[R_IN])\n\t" \
+ " la %[R_OUT],16(%[R_OUT])\n\t" \
+ " clgijl %[R_INLEN],64,20f\n\t" \
+ " clgijl %[R_OUTLEN],16,20f\n\t" \
+ " j 1b\n\t" \
+ /* Found a value > 0x7f. */ \
+ "13: ahi %[R_TMP],4\n\t" \
+ "12: ahi %[R_TMP],4\n\t" \
+ "11: ahi %[R_TMP],4\n\t" \
+ "10: vlgvb %[R_I],%%v22,7\n\t" \
+ " srlg %[R_I],%[R_I],2\n\t" \
+ " agr %[R_I],%[R_TMP]\n\t" \
+ " je 20f\n\t" \
+ /* Store characters before invalid one... */ \
+ " slgr %[R_OUTLEN],%[R_I]\n\t" \
+ "15: aghi %[R_I],-1\n\t" \
+ " vstl %%v23,%[R_I],0(%[R_OUT])\n\t" \
+ /* ... and update pointers. */ \
+ " aghi %[R_I],1\n\t" \
+ " la %[R_OUT],0(%[R_I],%[R_OUT])\n\t" \
+ " sllg %[R_I],%[R_I],2\n\t" \
+ " la %[R_IN],0(%[R_I],%[R_IN])\n\t" \
+ " slgr %[R_INLEN],%[R_I]\n\t" \
+ /* Handle multibyte utf8-char with convert instruction. */ \
+ "20: cu41 %[R_OUT],%[R_IN]\n\t" \
+ " jo 0b\n\t" /* Try vector implemenation again. */ \
+ " lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1. */ \
+ " lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2. */ \
+ ".machine pop" \
+ : /* outputs */ [R_IN] "+a" (pInput) \
+ , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput) \
+ , [R_OUTLEN] "+d" (outlen), [R_TMP] "=d" (tmp) \
+ , [R_I] "=a" (tmp2) \
+ , [R_RES] "+d" (result) \
+ : /* inputs */ \
+ [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT) \
+ , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT) \
+ : /* clobber list */ "memory", "cc" \
+ ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
+ ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
+ ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
+ ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
+ ASM_CLOBBER_VR ("v24") \
+ ); \
+ inptr = pInput; \
+ outptr = pOutput; \
+ \
+ if (__glibc_likely (inptr == inend) \
+ || result == __GCONV_FULL_OUTPUT) \
+ break; \
+ if (inptr + 4 > inend) \
+ { \
+ result = __GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ STANDARD_TO_LOOP_ERR_HANDLER (4); \
+ }
+
+/* Generate loop-function with hardware vector and utf-convert instructions. */
+# define MIN_NEEDED_INPUT MIN_NEEDED_TO
+# define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
+# define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
+# define TO_LOOP_VX_CU __to_utf8_loop_vx_cu
+# define LOOPFCT TO_LOOP_VX_CU
+# define BODY BODY_TO_VX_CU
+# define LOOP_NEED_FLAGS
+# include <iconv/loop.c>
+#else
+# define TO_LOOP_VX_CU NULL
+#endif /* HAVE_TO_VX_CU != 1 */
+
/* This file also exists in sysdeps/s390/multiarch/ which
generates ifunc resolvers for FROM/TO_LOOP functions
and includes iconv/skeleton.c afterwards. */