aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSunil K Pandey <skpgkp2@gmail.com>2022-03-07 10:47:10 -0800
committerSunil K Pandey <skpgkp2@gmail.com>2022-03-07 21:14:10 -0800
commit5e837461dcbbe25153db3b8437ac4c0030292b51 (patch)
tree61854571d05b6cc95708bb4cd122448a5cbeafda
parent994266f5019560f26e8d07be7fdf8621903339a1 (diff)
downloadglibc-5e837461dcbbe25153db3b8437ac4c0030292b51.zip
glibc-5e837461dcbbe25153db3b8437ac4c0030292b51.tar.gz
glibc-5e837461dcbbe25153db3b8437ac4c0030292b51.tar.bz2
x86_64: Fix svml_s_cbrtf16_core_avx512.S code formatting
This commit contains following formatting changes 1. Instructions proceeded by a tab. 2. Instruction less than 8 characters in length have a tab between it and the first operand. 3. Instruction greater than 7 characters in length have a space between it and the first operand. 4. Tabs after `#define`d names and their value. 5. 8 space at the beginning of line replaced by tab. 6. Indent comments with code. 7. Remove redundent .text section. 8. 1 space between line content and line comment. 9. Space after all commas. Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
-rw-r--r--sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S377
1 files changed, 188 insertions, 189 deletions
diff --git a/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S b/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S
index 9cf7918..ce10cf1 100644
--- a/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S
+++ b/sysdeps/x86_64/fpu/multiarch/svml_s_cbrtf16_core_avx512.S
@@ -31,205 +31,204 @@
/* Offsets for data table __svml_scbrt_data_internal_avx512
*/
-#define etbl_H 0
-#define etbl_L 64
-#define cbrt_tbl_H 128
-#define BiasL 256
-#define SZero 320
-#define OneThird 384
-#define Bias3 448
-#define Three 512
-#define One 576
-#define poly_coeff3 640
-#define poly_coeff2 704
-#define poly_coeff1 768
+#define etbl_H 0
+#define etbl_L 64
+#define cbrt_tbl_H 128
+#define BiasL 256
+#define SZero 320
+#define OneThird 384
+#define Bias3 448
+#define Three 512
+#define One 576
+#define poly_coeff3 640
+#define poly_coeff2 704
+#define poly_coeff1 768
#include <sysdep.h>
- .text
- .section .text.exex512,"ax",@progbits
+ .section .text.exex512, "ax", @progbits
ENTRY(_ZGVeN16v_cbrtf_skx)
- vgetmantps $0, {sae}, %zmm0, %zmm8
-
-/* GetExp(x) */
- vgetexpps {sae}, %zmm0, %zmm1
- vmovups BiasL+__svml_scbrt_data_internal_avx512(%rip), %zmm2
-
-/* exponent/3 */
- vmovups OneThird+__svml_scbrt_data_internal_avx512(%rip), %zmm3
- vmovups Bias3+__svml_scbrt_data_internal_avx512(%rip), %zmm4
- vmovups One+__svml_scbrt_data_internal_avx512(%rip), %zmm15
-
-/* exponent%3 (to be used as index) */
- vmovups Three+__svml_scbrt_data_internal_avx512(%rip), %zmm5
-
-/* polynomial */
- vmovups poly_coeff3+__svml_scbrt_data_internal_avx512(%rip), %zmm11
- vmovups poly_coeff1+__svml_scbrt_data_internal_avx512(%rip), %zmm14
-
-/* Table lookup */
- vmovups cbrt_tbl_H+__svml_scbrt_data_internal_avx512(%rip), %zmm12
-
-/* DblRcp ~ 1/Mantissa */
- vrcp14ps %zmm8, %zmm7
- vaddps {rn-sae}, %zmm2, %zmm1, %zmm6
- vandps SZero+__svml_scbrt_data_internal_avx512(%rip), %zmm0, %zmm0
-
-/* round DblRcp to 3 fractional bits (RN mode, no Precision exception) */
- vrndscaleps $88, {sae}, %zmm7, %zmm9
- vfmsub231ps {rn-sae}, %zmm6, %zmm3, %zmm4
- vmovups poly_coeff2+__svml_scbrt_data_internal_avx512(%rip), %zmm7
-
-/* Reduced argument: R = DblRcp*Mantissa - 1 */
- vfmsub231ps {rn-sae}, %zmm9, %zmm8, %zmm15
- vrndscaleps $9, {sae}, %zmm4, %zmm13
-
-/* Prepare table index */
- vpsrld $19, %zmm9, %zmm10
- vfmadd231ps {rn-sae}, %zmm15, %zmm11, %zmm7
- vfnmadd231ps {rn-sae}, %zmm13, %zmm5, %zmm6
- vpermt2ps cbrt_tbl_H+64+__svml_scbrt_data_internal_avx512(%rip), %zmm10, %zmm12
- vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm7
- vscalefps {rn-sae}, %zmm13, %zmm12, %zmm2
-
-/* Table lookup: 2^(exponent%3) */
- vpermps __svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm1
- vpermps etbl_L+__svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm6
-
-/* Sh*R */
- vmulps {rn-sae}, %zmm15, %zmm1, %zmm14
-
-/* Sl + (Sh*R)*Poly */
- vfmadd213ps {rn-sae}, %zmm6, %zmm7, %zmm14
-
-/*
- * branch-free
- * scaled_Th*(Sh+Sl+Sh*R*Poly)
- */
- vaddps {rn-sae}, %zmm1, %zmm14, %zmm15
- vmulps {rn-sae}, %zmm2, %zmm15, %zmm3
- vorps %zmm0, %zmm3, %zmm0
- ret
+ vgetmantps $0, {sae}, %zmm0, %zmm8
+
+ /* GetExp(x) */
+ vgetexpps {sae}, %zmm0, %zmm1
+ vmovups BiasL+__svml_scbrt_data_internal_avx512(%rip), %zmm2
+
+ /* exponent/3 */
+ vmovups OneThird+__svml_scbrt_data_internal_avx512(%rip), %zmm3
+ vmovups Bias3+__svml_scbrt_data_internal_avx512(%rip), %zmm4
+ vmovups One+__svml_scbrt_data_internal_avx512(%rip), %zmm15
+
+ /* exponent%3 (to be used as index) */
+ vmovups Three+__svml_scbrt_data_internal_avx512(%rip), %zmm5
+
+ /* polynomial */
+ vmovups poly_coeff3+__svml_scbrt_data_internal_avx512(%rip), %zmm11
+ vmovups poly_coeff1+__svml_scbrt_data_internal_avx512(%rip), %zmm14
+
+ /* Table lookup */
+ vmovups cbrt_tbl_H+__svml_scbrt_data_internal_avx512(%rip), %zmm12
+
+ /* DblRcp ~ 1/Mantissa */
+ vrcp14ps %zmm8, %zmm7
+ vaddps {rn-sae}, %zmm2, %zmm1, %zmm6
+ vandps SZero+__svml_scbrt_data_internal_avx512(%rip), %zmm0, %zmm0
+
+ /* round DblRcp to 3 fractional bits (RN mode, no Precision exception) */
+ vrndscaleps $88, {sae}, %zmm7, %zmm9
+ vfmsub231ps {rn-sae}, %zmm6, %zmm3, %zmm4
+ vmovups poly_coeff2+__svml_scbrt_data_internal_avx512(%rip), %zmm7
+
+ /* Reduced argument: R = DblRcp*Mantissa - 1 */
+ vfmsub231ps {rn-sae}, %zmm9, %zmm8, %zmm15
+ vrndscaleps $9, {sae}, %zmm4, %zmm13
+
+ /* Prepare table index */
+ vpsrld $19, %zmm9, %zmm10
+ vfmadd231ps {rn-sae}, %zmm15, %zmm11, %zmm7
+ vfnmadd231ps {rn-sae}, %zmm13, %zmm5, %zmm6
+ vpermt2ps cbrt_tbl_H+64+__svml_scbrt_data_internal_avx512(%rip), %zmm10, %zmm12
+ vfmadd213ps {rn-sae}, %zmm14, %zmm15, %zmm7
+ vscalefps {rn-sae}, %zmm13, %zmm12, %zmm2
+
+ /* Table lookup: 2^(exponent%3) */
+ vpermps __svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm1
+ vpermps etbl_L+__svml_scbrt_data_internal_avx512(%rip), %zmm6, %zmm6
+
+ /* Sh*R */
+ vmulps {rn-sae}, %zmm15, %zmm1, %zmm14
+
+ /* Sl + (Sh*R)*Poly */
+ vfmadd213ps {rn-sae}, %zmm6, %zmm7, %zmm14
+
+ /*
+ * branch-free
+ * scaled_Th*(Sh+Sl+Sh*R*Poly)
+ */
+ vaddps {rn-sae}, %zmm1, %zmm14, %zmm15
+ vmulps {rn-sae}, %zmm2, %zmm15, %zmm3
+ vorps %zmm0, %zmm3, %zmm0
+ ret
END(_ZGVeN16v_cbrtf_skx)
- .section .rodata, "a"
- .align 64
+ .section .rodata, "a"
+ .align 64
#ifdef __svml_scbrt_data_internal_avx512_typedef
typedef unsigned int VUINT32;
typedef struct {
- __declspec(align(64)) VUINT32 etbl_H[16][1];
- __declspec(align(64)) VUINT32 etbl_L[16][1];
- __declspec(align(64)) VUINT32 cbrt_tbl_H[32][1];
- __declspec(align(64)) VUINT32 BiasL[16][1];
- __declspec(align(64)) VUINT32 SZero[16][1];
- __declspec(align(64)) VUINT32 OneThird[16][1];
- __declspec(align(64)) VUINT32 Bias3[16][1];
- __declspec(align(64)) VUINT32 Three[16][1];
- __declspec(align(64)) VUINT32 One[16][1];
- __declspec(align(64)) VUINT32 poly_coeff3[16][1];
- __declspec(align(64)) VUINT32 poly_coeff2[16][1];
- __declspec(align(64)) VUINT32 poly_coeff1[16][1];
- } __svml_scbrt_data_internal_avx512;
+ __declspec(align(64)) VUINT32 etbl_H[16][1];
+ __declspec(align(64)) VUINT32 etbl_L[16][1];
+ __declspec(align(64)) VUINT32 cbrt_tbl_H[32][1];
+ __declspec(align(64)) VUINT32 BiasL[16][1];
+ __declspec(align(64)) VUINT32 SZero[16][1];
+ __declspec(align(64)) VUINT32 OneThird[16][1];
+ __declspec(align(64)) VUINT32 Bias3[16][1];
+ __declspec(align(64)) VUINT32 Three[16][1];
+ __declspec(align(64)) VUINT32 One[16][1];
+ __declspec(align(64)) VUINT32 poly_coeff3[16][1];
+ __declspec(align(64)) VUINT32 poly_coeff2[16][1];
+ __declspec(align(64)) VUINT32 poly_coeff1[16][1];
+} __svml_scbrt_data_internal_avx512;
#endif
__svml_scbrt_data_internal_avx512:
- /*== etbl_H ==*/
- .long 0x3f800000
- .long 0x3fa14518
- .long 0x3fcb2ff5
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- /*== etbl_L ==*/
- .align 64
- .long 0x00000000
- .long 0xb2ce51af
- .long 0x32a7adc8
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- /*== cbrt_tbl_H ==*/
- .align 64
- .long 0x3fa14518
- .long 0x3f9e0b2b
- .long 0x3f9b0f9b
- .long 0x3f984a9a
- .long 0x3f95b5af
- .long 0x3f934b6c
- .long 0x3f910737
- .long 0x3f8ee526
- .long 0x3f8ce1da
- .long 0x3f8afa6a
- .long 0x3f892c4e
- .long 0x3f87754e
- .long 0x3f85d377
- .long 0x3f844510
- .long 0x3f82c892
- .long 0x3f815c9f
- .long 0x3f800000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- .long 0x00000000
- /*== BiasL ==*/
- .align 64
- .long 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000
- /*== Zero ==*/
- .align 64
- .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
- /*== OneThird ==*/
- .align 64
- .long 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab
- /*== Bias3 ==*/
- .align 64
- .long 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000
- /*== Three ==*/
- .align 64
- .long 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000
- /*==One ==*/
- .align 64
- .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
- /*== poly_coeff3 ==*/
- .align 64
- .long 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c
- /*== poly_coeff2 ==*/
- .align 64
- .long 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363
- /*== poly_coeff1 ==*/
- .align 64
- .long 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa
- .align 64
- .type __svml_scbrt_data_internal_avx512,@object
- .size __svml_scbrt_data_internal_avx512,.-__svml_scbrt_data_internal_avx512
+ /* etbl_H */
+ .long 0x3f800000
+ .long 0x3fa14518
+ .long 0x3fcb2ff5
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ /* etbl_L */
+ .align 64
+ .long 0x00000000
+ .long 0xb2ce51af
+ .long 0x32a7adc8
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ /* cbrt_tbl_H */
+ .align 64
+ .long 0x3fa14518
+ .long 0x3f9e0b2b
+ .long 0x3f9b0f9b
+ .long 0x3f984a9a
+ .long 0x3f95b5af
+ .long 0x3f934b6c
+ .long 0x3f910737
+ .long 0x3f8ee526
+ .long 0x3f8ce1da
+ .long 0x3f8afa6a
+ .long 0x3f892c4e
+ .long 0x3f87754e
+ .long 0x3f85d377
+ .long 0x3f844510
+ .long 0x3f82c892
+ .long 0x3f815c9f
+ .long 0x3f800000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ .long 0x00000000
+ /* BiasL */
+ .align 64
+ .long 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000, 0x4b400000
+ /* Zero */
+ .align 64
+ .long 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000
+ /* OneThird */
+ .align 64
+ .long 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab, 0x3eaaaaab
+ /* Bias3 */
+ .align 64
+ .long 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000, 0x4a800000
+ /* Three */
+ .align 64
+ .long 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000, 0x40400000
+ /* One */
+ .align 64
+ .long 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000
+ /* poly_coeff3 */
+ .align 64
+ .long 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c, 0x3d7d057c
+ /* poly_coeff2 */
+ .align 64
+ .long 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363, 0xbde3a363
+ /* poly_coeff1 */
+ .align 64
+ .long 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa, 0x3eaaaaaa
+ .align 64
+ .type __svml_scbrt_data_internal_avx512, @object
+ .size __svml_scbrt_data_internal_avx512, .-__svml_scbrt_data_internal_avx512