aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorJames Greenhalgh <james.greenhalgh@arm.com>2016-11-24 18:19:29 +0000
committerJames Greenhalgh <jgreenhalgh@gcc.gnu.org>2016-11-24 18:19:29 +0000
commit11e554b3b5528e98f2576a06e8ad4d51fbed0d56 (patch)
treee2fbac9f7d1ee9d902dbaa4a4d85016d6e28e1b7 /gcc
parent2e5f8203d26bec30f790dbde5058e224521b27cb (diff)
downloadgcc-11e554b3b5528e98f2576a06e8ad4d51fbed0d56.zip
gcc-11e554b3b5528e98f2576a06e8ad4d51fbed0d56.tar.gz
gcc-11e554b3b5528e98f2576a06e8ad4d51fbed0d56.tar.bz2
[Patch AArch64 13/17] Enable _Float16 for AArch64
gcc/ * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Update __FLT_EVAL_METHOD__ and __FLT_EVAL_METHOD_C99__ when we switch architecture levels. * config/aarch64/aarch64.c (aarch64_promoted_type): Only promote the aarch64_fp16_type_node, not all HFmode types. (aarch64_libgcc_floating_mode_supported_p): Support HFmode. (aarch64_scalar_mode_supported_p): Likewise. (aarch64_excess_precision): New. (TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P): Define. (TARGET_SCALAR_MODE_SUPPORTED_P): Likewise. (TARGET_C_EXCESS_PRECISION): Likewise. gcc/testsuite/ * gcc.target/aarch64/_Float16_1.c: New. * gcc.target/aarch64/_Float16_2.c: Likewise. * gcc.target/aarch64/_Float16_3.c: Likewise. From-SVN: r242845
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog14
-rw-r--r--gcc/config/aarch64/aarch64-c.c10
-rw-r--r--gcc/config/aarch64/aarch64.c71
-rw-r--r--gcc/testsuite/ChangeLog6
-rw-r--r--gcc/testsuite/gcc.target/aarch64/_Float16_1.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/_Float16_2.c47
-rw-r--r--gcc/testsuite/gcc.target/aarch64/_Float16_3.c46
7 files changed, 239 insertions, 2 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b088f7c..7fb4826 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,19 @@
2016-11-24 James Greenhalgh <james.greenhalgh@arm.com>
+ * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Update
+ __FLT_EVAL_METHOD__ and __FLT_EVAL_METHOD_C99__ when we switch
+ architecture levels.
+ * config/aarch64/aarch64.c (aarch64_promoted_type): Only promote
+ the aarch64_fp16_type_node, not all HFmode types.
+ (aarch64_libgcc_floating_mode_supported_p): Support HFmode.
+ (aarch64_scalar_mode_supported_p): Likewise.
+ (aarch64_excess_precision): New.
+ (TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P): Define.
+ (TARGET_SCALAR_MODE_SUPPORTED_P): Likewise.
+ (TARGET_C_EXCESS_PRECISION): Likewise.
+
+2016-11-24 James Greenhalgh <james.greenhalgh@arm.com>
+
* config/aarch64/aarch64-c.c (aarch64_scalar_mode_supported_p): New.
(TARGET_SCALAR_MODE_SUPPORTED_P): Define.
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 422e322..320b912 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -133,6 +133,16 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_CRYPTO, "__ARM_FEATURE_CRYPTO", pfile);
aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
+
+ /* Not for ACLE, but required to keep "float.h" correct if we switch
+ target between implementations that do or do not support ARMv8.2-A
+ 16-bit floating-point extensions. */
+ cpp_undef (pfile, "__FLT_EVAL_METHOD__");
+ builtin_define_with_int_value ("__FLT_EVAL_METHOD__",
+ c_flt_eval_method (true));
+ cpp_undef (pfile, "__FLT_EVAL_METHOD_C99__");
+ builtin_define_with_int_value ("__FLT_EVAL_METHOD_C99__",
+ c_flt_eval_method (false));
}
/* Implement TARGET_CPU_CPP_BUILTINS. */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f0e10d7..68a3380 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14240,12 +14240,20 @@ aarch64_vec_fpconst_pow_of_2 (rtx x)
return firstval;
}
-/* Implement TARGET_PROMOTED_TYPE to promote __fp16 to float. */
+/* Implement TARGET_PROMOTED_TYPE to promote 16-bit floating point types
+ to float.
+
+ __fp16 always promotes through this hook.
+ _Float16 may promote if TARGET_FLT_EVAL_METHOD is 16, but we do that
+ through the generic excess precision logic rather than here. */
+
static tree
aarch64_promoted_type (const_tree t)
{
- if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
+ if (SCALAR_FLOAT_TYPE_P (t)
+ && TYPE_MAIN_VARIANT (t) == aarch64_fp16_type_node)
return float_type_node;
+
return NULL_TREE;
}
@@ -14265,6 +14273,17 @@ aarch64_optab_supported_p (int op, machine_mode mode1, machine_mode,
}
}
+/* Implement TARGET_LIBGCC_FLOATING_POINT_MODE_SUPPORTED_P - return TRUE
+ if MODE is HFmode, and punt to the generic implementation otherwise. */
+
+static bool
+aarch64_libgcc_floating_mode_supported_p (machine_mode mode)
+{
+ return (mode == HFmode
+ ? true
+ : default_libgcc_floating_mode_supported_p (mode));
+}
+
/* Implement TARGET_SCALAR_MODE_SUPPORTED_P - return TRUE
if MODE is HFmode, and punt to the generic implementation otherwise. */
@@ -14276,6 +14295,47 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
: default_scalar_mode_supported_p (mode));
}
+/* Set the value of FLT_EVAL_METHOD.
+ ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
+
+ 0: evaluate all operations and constants, whose semantic type has at
+ most the range and precision of type float, to the range and
+ precision of float; evaluate all other operations and constants to
+ the range and precision of the semantic type;
+
+ N, where _FloatN is a supported interchange floating type
+ evaluate all operations and constants, whose semantic type has at
+ most the range and precision of _FloatN type, to the range and
+ precision of the _FloatN type; evaluate all other operations and
+ constants to the range and precision of the semantic type;
+
+ If we have the ARMv8.2-A extensions then we support _Float16 in native
+ precision, so we should set this to 16. Otherwise, we support the type,
+ but want to evaluate expressions in float precision, so set this to
+ 0. */
+
+static enum flt_eval_method
+aarch64_excess_precision (enum excess_precision_type type)
+{
+ switch (type)
+ {
+ case EXCESS_PRECISION_TYPE_FAST:
+ case EXCESS_PRECISION_TYPE_STANDARD:
+ /* We can calculate either in 16-bit range and precision or
+ 32-bit range and precision. Make that decision based on whether
+ we have native support for the ARMv8.2-A 16-bit floating-point
+ instructions or not. */
+ return (TARGET_FP_F16INST
+ ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
+ : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
+ case EXCESS_PRECISION_TYPE_IMPLICIT:
+ return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
+ default:
+ gcc_unreachable ();
+ }
+ return FLT_EVAL_METHOD_UNPREDICTABLE;
+}
+
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST aarch64_address_cost
@@ -14354,6 +14414,9 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
#undef TARGET_BUILTIN_RECIPROCAL
#define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
+#undef TARGET_C_EXCESS_PRECISION
+#define TARGET_C_EXCESS_PRECISION aarch64_excess_precision
+
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN aarch64_expand_builtin
@@ -14410,6 +14473,10 @@ aarch64_scalar_mode_supported_p (machine_mode mode)
#undef TARGET_LIBGCC_CMP_RETURN_MODE
#define TARGET_LIBGCC_CMP_RETURN_MODE aarch64_libgcc_cmp_return_mode
+#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
+#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
+aarch64_libgcc_floating_mode_supported_p
+
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE aarch64_mangle_type
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 12a17c9..194e85f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,11 @@
2016-11-24 James Greenhalgh <james.greenhalgh@arm.com>
+ * gcc.target/aarch64/_Float16_1.c: New.
+ * gcc.target/aarch64/_Float16_2.c: Likewise.
+ * gcc.target/aarch64/_Float16_3.c: Likewise.
+
+2016-11-24 James Greenhalgh <james.greenhalgh@arm.com>
+
* gcc.target/aarch64/floatdihf2_1.c: New.
2016-11-24 Kyrylo Tkachov <kyrylo.tkachov@arm.com>
diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_1.c b/gcc/testsuite/gcc.target/aarch64/_Float16_1.c
new file mode 100644
index 0000000..320f154
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/_Float16_1.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+nofp16" } */
+
+#pragma GCC target ("arch=armv8.2-a+nofp16")
+
+_Float16
+foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+__fp16
+bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+_Float16
+foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+__fp16
+bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+/* Test that we merge to FMA operations. This indicates that we are not
+ making extraneous conversions between modes. */
+
+/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82. */
+/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
+
+/* One FMA operation in 16-bit precision, from foo_v82. */
+/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
+
+/* Test that we are resetting the __FLT_EVAL_METHOD__. */
+/* { dg-final { scan-assembler-times "mov\tw\[0-9\]\+, 16" 2 } } */
+/* { dg-final { scan-assembler-times "str\twzr" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_2.c b/gcc/testsuite/gcc.target/aarch64/_Float16_2.c
new file mode 100644
index 0000000..8b2aa1e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/_Float16_2.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+nofp16 -fpermitted-flt-eval-methods=c11" } */
+
+#pragma GCC target ("arch=armv8.2-a+nofp16")
+
+_Float16
+foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+__fp16
+bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+_Float16
+foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+__fp16
+bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+/* Test that we merge to FMA operations. This indicates that we are not
+ making extraneous conversions between modes. */
+
+/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82. */
+/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
+
+/* One FMA operation in 16-bit precision, from foo_v82. */
+/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
+
+/* Test that in -fpermitted-flt-eval-methods=c11 we don't set the
+ __FLT_EVAL_METHOD__ to anything other than 0. */
+/* { dg-final { scan-assembler-times "str\twzr" 4 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/_Float16_3.c b/gcc/testsuite/gcc.target/aarch64/_Float16_3.c
new file mode 100644
index 0000000..2d20250
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/_Float16_3.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=armv8.2-a+nofp16 -std=c11 -ffp-contract=fast" } */
+
+#pragma GCC target ("arch=armv8.2-a+nofp16")
+
+_Float16
+foo_v8 (_Float16 x, _Float16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+__fp16
+bar_v8 (__fp16 x, __fp16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+_Float16
+foo_v82 (_Float16 x, _Float16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+__fp16
+bar_v82 (__fp16 x, __fp16 y, unsigned int *eval)
+{
+ *eval = __FLT_EVAL_METHOD__;
+ return x * x + y;
+}
+
+/* Test that we merge to FMA operations. This indicates that we are not
+ making extraneous conversions between modes. */
+
+/* Three FMA operations in 32-bit precision, from foo_v8, bar_v8, bar_v82. */
+/* { dg-final { scan-assembler-times "fmadd\ts\[0-9\]\+" 3 } } */
+
+/* One FMA operation in 16-bit precision, from foo_v82. */
+/* { dg-final { scan-assembler-times "fmadd\th\[0-9\]\+" 1 } } */
+
+/* Test that in C11 mode, we don't reset __FLT_EVAL_METHOD__. */
+/* { dg-final { scan-assembler-times "str\twzr" 4 } } */