diff options
author | Feng Xue <feng.xue@amperecomputing.com> | 2019-08-14 10:48:05 +0800 |
---|---|---|
committer | Feng Xue <feng.xue@amperecomputing.com> | 2019-08-14 10:58:21 +0800 |
commit | b68fabfbbc5a4178338e167f5517787b76eb5962 (patch) | |
tree | e1b15af9e5b6397b16dc9021437d96ca5e6037f7 | |
parent | c3ce62cc0bd6e8a33629e2aabb7783a322e9189c (diff) | |
download | glibc-b68fabfbbc5a4178338e167f5517787b76eb5962.zip glibc-b68fabfbbc5a4178338e167f5517787b76eb5962.tar.gz glibc-b68fabfbbc5a4178338e167f5517787b76eb5962.tar.bz2 |
aarch64: Disable using DC ZVA in emag memset
* sysdeps/aarch64/multiarch/memset_base64.S (DC_ZVA_THRESHOLD):
Disable DC ZVA code if this macro is defined as zero.
* sysdeps/aarch64/multiarch/memset_emag.S (DC_ZVA_THRESHOLD):
Change to zero to disable using DC ZVA.
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memset_base64.S | 12 | ||||
-rw-r--r-- | sysdeps/aarch64/multiarch/memset_emag.S | 12 |
3 files changed, 24 insertions, 7 deletions
@@ -1,3 +1,10 @@ +2019-08-14 Feng Xue <fxue@os.amperecomputing.com> + + * sysdeps/aarch64/multiarch/memset_base64.S (DC_ZVA_THRESHOLD): + Disable DC ZVA code if this macro is defined as zero. + * sysdeps/aarch64/multiarch/memset_emag.S (DC_ZVA_THRESHOLD): + Change to zero to disable using DC ZVA. + 2019-08-13 Joseph Myers <joseph@codesourcery.com> * bits/libc-header-start.h (__GLIBC_USE_IEC_60559_FUNCS_EXT): diff --git a/sysdeps/aarch64/multiarch/memset_base64.S b/sysdeps/aarch64/multiarch/memset_base64.S index 9a62325..c0cccba 100644 --- a/sysdeps/aarch64/multiarch/memset_base64.S +++ b/sysdeps/aarch64/multiarch/memset_base64.S @@ -23,6 +23,7 @@ # define MEMSET __memset_base64 #endif +/* To disable DC ZVA, set this threshold to 0. */ #ifndef DC_ZVA_THRESHOLD # define DC_ZVA_THRESHOLD 512 #endif @@ -91,11 +92,12 @@ L(set96): .p2align 4 L(set_long): stp val, val, [dstin] + bic dst, dstin, 15 +#if DC_ZVA_THRESHOLD cmp count, DC_ZVA_THRESHOLD ccmp val, 0, 0, cs - bic dst, dstin, 15 b.eq L(zva_64) - +#endif /* Small-size or non-zero memset does not use DC ZVA. */ sub count, dstend, dst @@ -105,7 +107,11 @@ L(set_long): * count is less than 33 bytes, so as to bypass 2 unneccesary stps. */ sub count, count, 64+16+1 + +#if DC_ZVA_THRESHOLD + /* Align loop on 16-byte boundary, this might be friendly to i-cache. */ nop +#endif 1: stp val, val, [dst, 16] stp val, val, [dst, 32] @@ -121,6 +127,7 @@ L(set_long): stp val, val, [dstend, -16] ret +#if DC_ZVA_THRESHOLD .p2align 3 L(zva_64): stp val, val, [dst, 16] @@ -173,6 +180,7 @@ L(zva_64): 1: stp val, val, [dstend, -32] stp val, val, [dstend, -16] ret +#endif END (MEMSET) libc_hidden_builtin_def (MEMSET) diff --git a/sysdeps/aarch64/multiarch/memset_emag.S b/sysdeps/aarch64/multiarch/memset_emag.S index 1c1fabc..c2aed62 100644 --- a/sysdeps/aarch64/multiarch/memset_emag.S +++ b/sysdeps/aarch64/multiarch/memset_emag.S @@ -21,12 +21,14 @@ # define MEMSET __memset_emag /* - * Using dc zva to zero memory does not produce better performance if + * Using DC ZVA to zero memory does not produce better performance if * memory size is not very large, especially when there are multiple - * processes/threads contending memory/cache. Here we use a somewhat - * large threshold to trigger usage of dc zva. -*/ -# define DC_ZVA_THRESHOLD 1024 + * processes/threads contending memory/cache. Here we set threshold to + * zero to disable using DC ZVA, which is good for multi-process/thread + * workloads. + */ + +# define DC_ZVA_THRESHOLD 0 # include "./memset_base64.S" #endif |