diff options
author | Kwok Cheung Yeung <kcy@codesourcery.com> | 2022-09-09 13:10:07 +0000 |
---|---|---|
committer | Kwok Cheung Yeung <kcy@codesourcery.com> | 2022-09-09 13:10:07 +0000 |
commit | eff73c104a3db882f3bc7f567f322e40470c7571 (patch) | |
tree | e4f565ec2874b77b22fa9e5166a5f2b1cf3d895c /gcc | |
parent | a8b0b13da7379feb31950a9d2ad74b98a29c547f (diff) | |
download | gcc-eff73c104a3db882f3bc7f567f322e40470c7571.zip gcc-eff73c104a3db882f3bc7f567f322e40470c7571.tar.gz gcc-eff73c104a3db882f3bc7f567f322e40470c7571.tar.bz2 |
amdgcn: Add support for additional natively supported floating-point operations
This adds support for the following natively supported floating-point
operations, in scalar and vectorized modes:
floor, ceil, exp2*, log2*, sin*, cos*, ldexp, frexp
* These operations are single-precision float only and are only active
if unsafe_math_optimizations are enabled (due to potential numerical
precision issues).
2022-09-09 Kwok Cheung Yeung <kcy@codesourcery.com>
gcc/
* config/gcn/gcn-builtins.def (FABSVF, LDEXPVF, LDEXPV, FREXPVF_EXP,
FREXPVF_MANT, FREXPV_EXP, FREXPV_MANT): Add new builtins.
* config/gcn/gcn-protos.h (gcn_dconst1over2pi): New prototype.
* config/gcn/gcn-valu.md (MATH_UNOP_1OR2REG, MATH_UNOP_1REG,
MATH_UNOP_TRIG): New iterators.
(math_unop): New attributes.
(<math_unop><mode>2, <math_unop><mode>2<exec>,
<math_unop><mode>2, <math_unop><mode>2<exec>,
*<math_unop><mode>2_insn, *<math_unop><mode>2<exec>_insn,
ldexp<mode>3, ldexp<mode>3<exec>,
frexp<mode>_exp2, frexp<mode>_mant2,
frexp<mode>_exp2<exec>, frexp<mode>_mant2<exec>): New instructions.
(<math_unop><mode>2, <math_unop><mode>2<exec>): New expanders.
* config/gcn/gcn.cc (init_ext_gcn_constants): Update definition of
dconst1over2pi.
(gcn_dconst1over2pi): New.
(gcn_builtin_type_index): Add entry for v64df type.
(v64df_type_node): New.
(gcn_init_builtin_types): Initialize v64df_type_node.
(gcn_expand_builtin_1): Expand new builtins to instructions.
(print_operand): Fix assembler output for 1/(2*PI) constant.
* config/gcn/gcn.md (unspec): Add new entries.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/gcn/gcn-builtins.def | 35 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/gcn/gcn-valu.md | 181 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.cc | 114 | ||||
-rw-r--r-- | gcc/config/gcn/gcn.md | 4 |
5 files changed, 332 insertions, 3 deletions
diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def index 54e4ea4..2769190 100644 --- a/gcc/config/gcn/gcn-builtins.def +++ b/gcc/config/gcn/gcn-builtins.def @@ -59,6 +59,41 @@ DEF_BUILTIN (SQRTF, 3 /*CODE_FOR_sqrtf */, _A2 (GCN_BTI_SF, GCN_BTI_SF), gcn_expand_builtin_1) +DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */, + "fabsvf", B_INSN, + _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */, + "ldexpvf", B_INSN, + _A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (LDEXPV, 3 /*CODE_FOR_ldexpv */, + "ldexpv", B_INSN, + _A3 (GCN_BTI_V64DF, GCN_BTI_V64DF, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPVF_EXP, 3 /*CODE_FOR_frexpvf_exp */, + "frexpvf_exp", B_INSN, + _A2 (GCN_BTI_V64SI, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPVF_MANT, 3 /*CODE_FOR_frexpvf_mant */, + "frexpvf_mant", B_INSN, + _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPV_EXP, 3 /*CODE_FOR_frexpv_exp */, + "frexpv_exp", B_INSN, + _A2 (GCN_BTI_V64SI, GCN_BTI_V64DF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPV_MANT, 3 /*CODE_FOR_frexpv_mant */, + "frexpv_mant", B_INSN, + _A2 (GCN_BTI_V64DF, GCN_BTI_V64DF), + gcn_expand_builtin_1) + DEF_BUILTIN (CMP_SWAP, -1, "cmp_swap", B_INSN, _A4 (GCN_BTI_UINT, GCN_BTI_VOIDPTR, GCN_BTI_UINT, GCN_BTI_UINT), diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index 38197b9..ca80460 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -54,6 +54,7 @@ extern int gcn_hard_regno_nregs (int regno, machine_mode mode); extern void gcn_hsa_declare_function_name (FILE *file, const char *name, tree decl); extern HOST_WIDE_INT gcn_initial_elimination_offset (int, int); +extern REAL_VALUE_TYPE gcn_dconst1over2pi (void); extern bool gcn_inline_constant64_p (rtx, bool); extern bool gcn_inline_constant_p (rtx); extern int gcn_inline_fp_constant_p (rtx, bool); diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 8c33ae0..3bfdf82 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -2290,6 +2290,187 @@ [(set_attr "type" "vop1") (set_attr "length" "8")]) +; These FP unops have f64, f32 and f16 versions. +(define_int_iterator MATH_UNOP_1OR2REG + [UNSPEC_FLOOR UNSPEC_CEIL]) + +; These FP unops only have f16/f32 versions. +(define_int_iterator MATH_UNOP_1REG + [UNSPEC_EXP2 UNSPEC_LOG2]) + +(define_int_iterator MATH_UNOP_TRIG + [UNSPEC_SIN UNSPEC_COS]) + +(define_int_attr math_unop + [(UNSPEC_FLOOR "floor") + (UNSPEC_CEIL "ceil") + (UNSPEC_EXP2 "exp2") + (UNSPEC_LOG2 "log2") + (UNSPEC_SIN "sin") + (UNSPEC_COS "cos")]) + +(define_insn "<math_unop><mode>2" + [(set (match_operand:FP 0 "register_operand" "= v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1OR2REG))] + "" + "v_<math_unop>%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "<math_unop><mode>2<exec>" + [(set (match_operand:V_FP 0 "register_operand" "= v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1OR2REG))] + "" + "v_<math_unop>%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "<math_unop><mode>2" + [(set (match_operand:FP_1REG 0 "register_operand" "= v") + (unspec:FP_1REG + [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1REG))] + "flag_unsafe_math_optimizations" + "v_<math_unop>%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "<math_unop><mode>2<exec>" + [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") + (unspec:V_FP_1REG + [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1REG))] + "flag_unsafe_math_optimizations" + "v_<math_unop>%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "*<math_unop><mode>2_insn" + [(set (match_operand:FP_1REG 0 "register_operand" "= v") + (unspec:FP_1REG + [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + "v_<math_unop>%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "*<math_unop><mode>2<exec>_insn" + [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") + (unspec:V_FP_1REG + [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + "v_<math_unop>%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +; Trigonometric functions need their input scaled by 1/(2*PI) first. + +(define_expand "<math_unop><mode>2" + [(set (match_dup 2) + (mult:FP_1REG + (match_dup 3) + (match_operand:FP_1REG 1 "gcn_alu_operand"))) + (set (match_operand:FP_1REG 0 "register_operand") + (unspec:FP_1REG + [(match_dup 2)] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + { + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = const_double_from_real_value (gcn_dconst1over2pi (), + <MODE>mode); + }) + +(define_expand "<math_unop><mode>2<exec>" + [(set (match_dup 2) + (mult:V_FP_1REG + (match_dup 3) + (match_operand:V_FP_1REG 1 "gcn_alu_operand"))) + (set (match_operand:V_FP_1REG 0 "register_operand") + (unspec:V_FP_1REG + [(match_dup 2)] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + { + operands[2] = gen_reg_rtx (<MODE>mode); + operands[3] = + gcn_vec_constant (<MODE>mode, + const_double_from_real_value (gcn_dconst1over2pi (), + <SCALAR_MODE>mode)); + }) + +; Implement ldexp pattern + +(define_insn "ldexp<mode>3" + [(set (match_operand:FP 0 "register_operand" "=v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vB") + (match_operand:SI 2 "gcn_alu_operand" "vSvA")] + UNSPEC_LDEXP))] + "" + "v_ldexp%i0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "ldexp<mode>3<exec>" + [(set (match_operand:V_FP 0 "register_operand" "=v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vB") + (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")] + UNSPEC_LDEXP))] + "" + "v_ldexp%i0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +; Implement frexp patterns + +(define_insn "frexp<mode>_exp2" + [(set (match_operand:SI 0 "register_operand" "=v") + (unspec:SI + [(match_operand:FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_EXP))] + "" + "v_frexp_exp_i32%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp<mode>_mant2" + [(set (match_operand:FP 0 "register_operand" "=v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_MANT))] + "" + "v_frexp_mant%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp<mode>_exp2<exec>" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (unspec:V64SI + [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_EXP))] + "" + "v_frexp_exp_i32%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp<mode>_mant2<exec>" + [(set (match_operand:V_FP 0 "register_operand" "=v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_MANT))] + "" + "v_frexp_mant%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + ;; }}} ;; {{{ FP fused multiply and add diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 8266755..eb822e2 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -779,12 +779,20 @@ init_ext_gcn_constants (void) /* FIXME: this constant probably does not match what hardware really loads. Reality check it eventually. */ real_from_string (&dconst1over2pi, - "0.1591549430918953357663423455968866839"); + "0.15915494309189532"); real_convert (&dconst1over2pi, SFmode, &dconst1over2pi); ext_gcn_constants_init = 1; } +REAL_VALUE_TYPE +gcn_dconst1over2pi (void) +{ + if (!ext_gcn_constants_init) + init_ext_gcn_constants (); + return dconst1over2pi; +} + /* Return non-zero if X is a constant that can appear as an inline operand. This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi) Or a vector of those. @@ -3605,6 +3613,7 @@ enum gcn_builtin_type_index GCN_BTI_SF, GCN_BTI_V64SI, GCN_BTI_V64SF, + GCN_BTI_V64DF, GCN_BTI_V64PTR, GCN_BTI_SIPTR, GCN_BTI_SFPTR, @@ -3621,6 +3630,7 @@ static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX]; #define sf_type_node (gcn_builtin_types[GCN_BTI_SF]) #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI]) #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF]) +#define v64df_type_node (gcn_builtin_types[GCN_BTI_V64DF]) #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR]) #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR]) #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR]) @@ -3710,6 +3720,7 @@ gcn_init_builtin_types (void) sf_type_node = float32_type_node; v64si_type_node = build_vector_type (intSI_type_node, 64); v64sf_type_node = build_vector_type (float_type_node, 64); + v64df_type_node = build_vector_type (double_type_node, 64); v64ptr_type_node = build_vector_type (unsigned_intDI_type_node /*build_pointer_type (integer_type_node) */ @@ -3977,6 +3988,105 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , emit_insn (gen_sqrtsf2 (target, arg)); return target; } + case GCN_BUILTIN_FABSVF: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_absv64sf2_exec + (target, arg, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_LDEXPVF: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg1 = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + rtx arg2 = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, + V64SImode, + EXPAND_NORMAL)); + emit_insn (gen_ldexpv64sf3_exec + (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_LDEXPV: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg1 = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + rtx arg2 = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, + V64SImode, + EXPAND_NORMAL)); + emit_insn (gen_ldexpv64df3_exec + (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec)); + return target; + } + case GCN_BUILTIN_FREXPVF_EXP: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64sf_exp2_exec + (target, arg, gcn_gen_undef (V64SImode), exec)); + return target; + } + case GCN_BUILTIN_FREXPVF_MANT: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64sf_mant2_exec + (target, arg, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_FREXPV_EXP: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64DFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64df_exp2_exec + (target, arg, gcn_gen_undef (V64SImode), exec)); + return target; + } + case GCN_BUILTIN_FREXPV_MANT: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64DFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64df_mant2_exec + (target, arg, gcn_gen_undef (V64DFmode), exec)); + return target; + } case GCN_BUILTIN_OMP_DIM_SIZE: { if (ignore) @@ -6476,7 +6586,7 @@ print_operand (FILE *file, rtx x, int code) str = "-4.0"; break; case 248: - str = "1/pi"; + str = "0.15915494"; break; default: rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 7805e86..a3c9523 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -82,7 +82,9 @@ UNSPEC_GATHER UNSPEC_SCATTER UNSPEC_RCP - UNSPEC_FLBIT_INT]) + UNSPEC_FLBIT_INT + UNSPEC_FLOOR UNSPEC_CEIL UNSPEC_SIN UNSPEC_COS UNSPEC_EXP2 UNSPEC_LOG2 + UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT]) ;; }}} ;; {{{ Attributes |