diff options
author | Sandra Loosemore <sandra@codesourcery.com> | 2009-06-18 08:24:10 -0400 |
---|---|---|
committer | Sandra Loosemore <sandra@gcc.gnu.org> | 2009-06-18 08:24:10 -0400 |
commit | 0fd8c3ad1ef97e592cb302d764b9e69f85cb7d6f (patch) | |
tree | c59b4303d74346dd274c9504a6302946ba670574 /gcc/config/arm | |
parent | 2c2f70e1f266b6582398248096feccaceb2dd70c (diff) | |
download | gcc-0fd8c3ad1ef97e592cb302d764b9e69f85cb7d6f.zip gcc-0fd8c3ad1ef97e592cb302d764b9e69f85cb7d6f.tar.gz gcc-0fd8c3ad1ef97e592cb302d764b9e69f85cb7d6f.tar.bz2 |
extend.texi (Half-Precision): New section.
2009-06-18 Sandra Loosemore <sandra@codesourcery.com>
gcc/
* doc/extend.texi (Half-Precision): New section.
* doc/invoke.texi (Option Summary): List -mfp16-format.
(ARM Options): List neon-fp16 as -mfpu value. Document -mfp16-format.
* config/arm/arm.opt (mfp16-format=): New.
* config/arm/arm.c: Include intl.h.
(TARGET_INVALID_PARAMETER_TYPE): Redefine.
(TARGET_INVALID_RETURN_TYPE): Redefine.
(TARGET_PROMOTED_TYPE): Redefine.
(TARGET_CONVERT_TO_TYPE): Redefine.
(arm_fp16_format): Define.
(all_fpus): Add entry for neon-fp16.
(fp_model_for_fpu): Likewise.
(struct fp16_format): Declare.
(all_fp16_formats): Define.
(arm_init_libfuncs): Add entries for HFmode conversions and arithmetic
functions.
(arm_override_options): Set arm_fp16_format. Call sorry for fp16
and no ldrh.
(arm_legitimate_index_p): Treat HFmode like HImode.
(thumb1_legitimate_address_p): Make it recognize HFmode constants.
(coproc_secondary_reload_class): Special-case HFmode.
(arm_print_operand): Add 'z' specifier for vld1.16/vst1.16.
(arm_hard_regno_mode_ok): Allow HFmode values in VFP registers.
(arm_init_fp16_builtins): New.
(arm_init_builtins): Call it.
(arm_invalid_parameter_type): New.
(arm_invalid_return_type): New.
(arm_promoted_type): New.
(arm_convert_to_type).
(arm_file_start): Deal with neon-fp16 as fpu_name. Emit tag for fp16
format.
(arm_emit_fp16_const): New function.
(arm_mangle_type): Mangle __fp16 as "Dh".
* config/arm/arm.h (TARGET_VFPD32): Make it know about
FPUTYPE_NEON_FP16.
(TARGET_NEON_FP16): New.
(TARGET_NEON): Make it know about FPUTYPE_NEON_FP16.
(enum fputype): Add FPUTYPE_NEON_FP16.
(enum arm_fp16_format_type): Declare.
(arm_fp16_format): Declare.
(LARGEST_EXPONENT_IS_NORMAL): Define.
* config/arm/arm-protos.h (arm_emit_fp16_const): Declare.
* config/arm/arm-modes.def (HFmode): Define.
* config/arm/vfp.md: (*movhf_vfp): New.
(extendhfsf2): New.
(truncsfhf2): New.
* config/arm/arm.md: (fpu): Add neon_fp16.
(floatsihf2, floatdihf2): New.
(fix_trunchfsi2, fix_trunchfdi2): New.
(truncdfhf2): New.
(extendhfdf2): New.
(movhf): New.
(*arm32_movhf): New.
(*thumb1_movhf): New.
(consttable_2): Add check for HFmode constants.
(consttable_4): Handle HFmode constants.
From-SVN: r148654
Diffstat (limited to 'gcc/config/arm')
-rw-r--r-- | gcc/config/arm/arm-modes.def | 5 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 255 | ||||
-rw-r--r-- | gcc/config/arm/arm.h | 28 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 209 | ||||
-rw-r--r-- | gcc/config/arm/arm.opt | 4 | ||||
-rw-r--r-- | gcc/config/arm/vfp.md | 73 |
7 files changed, 549 insertions, 26 deletions
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def index 73b5b4d..813ce8e 100644 --- a/gcc/config/arm/arm-modes.def +++ b/gcc/config/arm/arm-modes.def @@ -25,6 +25,11 @@ FIXME What format is this? */ FLOAT_MODE (XF, 12, 0); +/* Half-precision floating point */ +FLOAT_MODE (HF, 2, 0); +ADJUST_FLOAT_FORMAT (HF, ((arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + ? &arm_half_format : &ieee_half_format)); + /* CCFPEmode should be used with floating inequalities, CCFPmode should be used with floating equalities. CC_NOOVmode should be used with SImode integer equalities. diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 857d575..b911a97 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -140,6 +140,7 @@ extern void arm_final_prescan_insn (rtx); extern int arm_debugger_arg_offset (int, rtx); extern bool arm_is_long_call_p (tree); extern int arm_emit_vector_const (FILE *, rtx); +extern void arm_emit_fp16_const (rtx c); extern const char * arm_output_load_gr (rtx *); extern const char *vfp_output_fstmd (rtx *); extern void arm_set_return_address (rtx, rtx); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 3809270..e6b2edb 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -53,6 +53,7 @@ #include "debug.h" #include "langhooks.h" #include "df.h" +#include "intl.h" /* Forward definitions of types. */ typedef struct minipool_node Mnode; @@ -200,6 +201,10 @@ static bool arm_tls_symbol_p (rtx x); static int arm_issue_rate (void); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; static bool arm_allocate_stack_slots_for_args (void); +static const char *arm_invalid_parameter_type (const_tree t); +static const char *arm_invalid_return_type (const_tree t); +static tree arm_promoted_type (const_tree t); +static tree arm_convert_to_type (tree type, tree expr); /* Initialize the GCC target structure. */ @@ -407,6 +412,18 @@ static bool arm_allocate_stack_slots_for_args (void); #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p +#undef TARGET_INVALID_PARAMETER_TYPE +#define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type + +#undef TARGET_INVALID_RETURN_TYPE +#define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type + +#undef TARGET_PROMOTED_TYPE +#define TARGET_PROMOTED_TYPE arm_promoted_type + +#undef TARGET_CONVERT_TO_TYPE +#define TARGET_CONVERT_TO_TYPE arm_convert_to_type + struct gcc_target targetm = TARGET_INITIALIZER; /* Obstack for minipool constant handling. */ @@ -440,6 +457,9 @@ enum fputype arm_fpu_tune; /* Whether to use floating point hardware. */ enum float_abi_type arm_float_abi; +/* Which __fp16 format to use. */ +enum arm_fp16_format_type arm_fp16_format; + /* Which ABI to use. */ enum arm_abi_type arm_abi; @@ -719,15 +739,16 @@ struct fpu_desc static const struct fpu_desc all_fpus[] = { - {"fpa", FPUTYPE_FPA}, - {"fpe2", FPUTYPE_FPA_EMU2}, - {"fpe3", FPUTYPE_FPA_EMU2}, - {"maverick", FPUTYPE_MAVERICK}, - {"vfp", FPUTYPE_VFP}, - {"vfp3", FPUTYPE_VFP3}, - {"vfpv3", FPUTYPE_VFP3}, - {"vfpv3-d16", FPUTYPE_VFP3D16}, - {"neon", FPUTYPE_NEON} + {"fpa", FPUTYPE_FPA}, + {"fpe2", FPUTYPE_FPA_EMU2}, + {"fpe3", FPUTYPE_FPA_EMU2}, + {"maverick", FPUTYPE_MAVERICK}, + {"vfp", FPUTYPE_VFP}, + {"vfp3", FPUTYPE_VFP3}, + {"vfpv3", FPUTYPE_VFP3}, + {"vfpv3-d16", FPUTYPE_VFP3D16}, + {"neon", FPUTYPE_NEON}, + {"neon-fp16", FPUTYPE_NEON_FP16} }; @@ -745,7 +766,8 @@ static const enum arm_fp_model fp_model_for_fpu[] = ARM_FP_MODEL_VFP, /* FPUTYPE_VFP */ ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3D16 */ ARM_FP_MODEL_VFP, /* FPUTYPE_VFP3 */ - ARM_FP_MODEL_VFP /* FPUTYPE_NEON */ + ARM_FP_MODEL_VFP, /* FPUTYPE_NEON */ + ARM_FP_MODEL_VFP /* FPUTYPE_NEON_FP16 */ }; @@ -766,6 +788,23 @@ static const struct float_abi all_float_abis[] = }; +struct fp16_format +{ + const char *name; + enum arm_fp16_format_type fp16_format_type; +}; + + +/* Available values for -mfp16-format=. */ + +static const struct fp16_format all_fp16_formats[] = +{ + {"none", ARM_FP16_FORMAT_NONE}, + {"ieee", ARM_FP16_FORMAT_IEEE}, + {"alternative", ARM_FP16_FORMAT_ALTERNATIVE} +}; + + struct abi_name { const char *name; @@ -923,6 +962,44 @@ arm_init_libfuncs (void) set_optab_libfunc (umod_optab, DImode, NULL); set_optab_libfunc (smod_optab, SImode, NULL); set_optab_libfunc (umod_optab, SImode, NULL); + + /* Half-precision float operations. The compiler handles all operations + with NULL libfuncs by converting the SFmode. */ + switch (arm_fp16_format) + { + case ARM_FP16_FORMAT_IEEE: + case ARM_FP16_FORMAT_ALTERNATIVE: + + /* Conversions. */ + set_conv_libfunc (trunc_optab, HFmode, SFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_f2h_ieee" + : "__gnu_f2h_alternative")); + set_conv_libfunc (sext_optab, SFmode, HFmode, + (arm_fp16_format == ARM_FP16_FORMAT_IEEE + ? "__gnu_h2f_ieee" + : "__gnu_h2f_alternative")); + + /* Arithmetic. */ + set_optab_libfunc (add_optab, HFmode, NULL); + set_optab_libfunc (sdiv_optab, HFmode, NULL); + set_optab_libfunc (smul_optab, HFmode, NULL); + set_optab_libfunc (neg_optab, HFmode, NULL); + set_optab_libfunc (sub_optab, HFmode, NULL); + + /* Comparisons. */ + set_optab_libfunc (eq_optab, HFmode, NULL); + set_optab_libfunc (ne_optab, HFmode, NULL); + set_optab_libfunc (lt_optab, HFmode, NULL); + set_optab_libfunc (le_optab, HFmode, NULL); + set_optab_libfunc (ge_optab, HFmode, NULL); + set_optab_libfunc (gt_optab, HFmode, NULL); + set_optab_libfunc (unord_optab, HFmode, NULL); + break; + + default: + break; + } } /* On AAPCS systems, this is the "struct __va_list". */ @@ -1294,6 +1371,23 @@ arm_override_options (void) tune_flags = all_cores[(int)arm_tune].flags; + if (target_fp16_format_name) + { + for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) + { + if (streq (all_fp16_formats[i].name, target_fp16_format_name)) + { + arm_fp16_format = all_fp16_formats[i].fp16_format_type; + break; + } + } + if (i == ARRAY_SIZE (all_fp16_formats)) + error ("invalid __fp16 format option: -mfp16-format=%s", + target_fp16_format_name); + } + else + arm_fp16_format = ARM_FP16_FORMAT_NONE; + if (target_abi_name) { for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++) @@ -1525,6 +1619,10 @@ arm_override_options (void) if (TARGET_THUMB2 && TARGET_IWMMXT) sorry ("Thumb-2 iWMMXt"); + /* __fp16 support currently assumes the core has ldrh. */ + if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) + sorry ("__fp16 and no ldrh"); + /* If soft-float is specified then don't use FPU. */ if (TARGET_SOFT_FLOAT) arm_fpu_arch = FPUTYPE_NONE; @@ -4173,6 +4271,7 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, if (GET_MODE_SIZE (mode) <= 4 && ! (arm_arch4 && (mode == HImode + || mode == HFmode || (mode == QImode && outer == SIGN_EXTEND)))) { if (code == MULT) @@ -4201,13 +4300,15 @@ arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer, load. */ if (arm_arch4) { - if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode)) + if (mode == HImode + || mode == HFmode + || (outer == SIGN_EXTEND && mode == QImode)) range = 256; else range = 4096; } else - range = (mode == HImode) ? 4095 : 4096; + range = (mode == HImode || mode == HFmode) ? 4095 : 4096; return (code == CONST_INT && INTVAL (index) < range @@ -4380,7 +4481,8 @@ thumb1_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p) return 1; /* This is PC relative data after arm_reorg runs. */ - else if (GET_MODE_SIZE (mode) >= 4 && reload_completed + else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) + && reload_completed && (GET_CODE (x) == LABEL_REF || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS @@ -7121,6 +7223,13 @@ arm_eliminable_register (rtx x) enum reg_class coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb) { + if (mode == HFmode) + { + if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2)) + return NO_REGS; + return GENERAL_REGS; + } + if (TARGET_NEON && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) @@ -13926,6 +14035,31 @@ arm_print_operand (FILE *stream, rtx x, int code) } return; + /* Register specifier for vld1.16/vst1.16. Translate the S register + number into a D register number and element index. */ + case 'z': + { + int mode = GET_MODE (x); + int regno; + + if (GET_MODE_SIZE (mode) != 2 || GET_CODE (x) != REG) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = REGNO (x); + if (!VFP_REGNO_OK_FOR_SINGLE (regno)) + { + output_operand_lossage ("invalid operand for code '%c'", code); + return; + } + + regno = regno - FIRST_VFP_REGNUM; + fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0)); + } + return; + default: if (x == 0) { @@ -14723,6 +14857,12 @@ arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode) if (mode == DFmode) return VFP_REGNO_OK_FOR_DOUBLE (regno); + /* VFP registers can hold HFmode values, but there is no point in + putting them there unless we have the NEON extensions for + loading/storing them, too. */ + if (mode == HFmode) + return TARGET_NEON_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno); + if (TARGET_NEON) return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno)) || (VALID_NEON_QREG_MODE (mode) @@ -16209,6 +16349,15 @@ arm_init_neon_builtins (void) } static void +arm_init_fp16_builtins (void) +{ + tree fp16_type = make_node (REAL_TYPE); + TYPE_PRECISION (fp16_type) = 16; + layout_type (fp16_type); + (*lang_hooks.types.register_builtin_type) (fp16_type, "__fp16"); +} + +static void arm_init_builtins (void) { arm_init_tls_builtins (); @@ -16218,6 +16367,56 @@ arm_init_builtins (void) if (TARGET_NEON) arm_init_neon_builtins (); + + if (arm_fp16_format) + arm_init_fp16_builtins (); +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_parameter_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("function parameters cannot have __fp16 type"); + return NULL; +} + +/* Implement TARGET_INVALID_PARAMETER_TYPE. */ + +static const char * +arm_invalid_return_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return N_("functions cannot return __fp16 type"); + return NULL; +} + +/* Implement TARGET_PROMOTED_TYPE. */ + +static tree +arm_promoted_type (const_tree t) +{ + if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16) + return float_type_node; + return NULL_TREE; +} + +/* Implement TARGET_CONVERT_TO_TYPE. + Specifically, this hook implements the peculiarity of the ARM + half-precision floating-point C semantics that requires conversions between + __fp16 to or from double to do an intermediate conversion to float. */ + +static tree +arm_convert_to_type (tree type, tree expr) +{ + tree fromtype = TREE_TYPE (expr); + if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type)) + return NULL_TREE; + if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32) + || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32)) + return convert (type, convert (float_type_node, expr)); + return NULL_TREE; } /* Errors in the source file can cause expand_expr to return const0_rtx @@ -18413,6 +18612,10 @@ arm_file_start (void) fpu_name = "neon"; set_float_abi_attributes = 1; break; + case FPUTYPE_NEON_FP16: + fpu_name = "neon-fp16"; + set_float_abi_attributes = 1; + break; default: abort(); } @@ -18466,6 +18669,11 @@ arm_file_start (void) val = 6; asm_fprintf (asm_out_file, "\t.eabi_attribute 30, %d\n", val); + /* Tag_ABI_FP_16bit_format. */ + if (arm_fp16_format) + asm_fprintf (asm_out_file, "\t.eabi_attribute 38, %d\n", + (int)arm_fp16_format); + if (arm_lang_output_object_attributes_hook) arm_lang_output_object_attributes_hook(); } @@ -18695,6 +18903,23 @@ arm_emit_vector_const (FILE *file, rtx x) return 1; } +/* Emit a fp16 constant appropriately padded to occupy a 4-byte word. + HFmode constant pool entries are actually loaded with ldr. */ +void +arm_emit_fp16_const (rtx c) +{ + REAL_VALUE_TYPE r; + long bits; + + REAL_VALUE_FROM_CONST_DOUBLE (r, c); + bits = real_to_target (NULL, &r, HFmode); + if (WORDS_BIG_ENDIAN) + assemble_zeros (2); + assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1); + if (!WORDS_BIG_ENDIAN) + assemble_zeros (2); +} + const char * arm_output_load_gr (rtx *operands) { @@ -19724,6 +19949,10 @@ arm_mangle_type (const_tree type) return "St9__va_list"; } + /* Half-precision float. */ + if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) + return "Dh"; + if (TREE_CODE (type) != VECTOR_TYPE) return NULL; diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index ee0eee6..2632a19 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -215,20 +215,25 @@ extern void (*arm_lang_output_object_attributes_hook)(void); /* FPU is has the full VFPv3/NEON register file of 32 D registers. */ #define TARGET_VFPD32 (arm_fp_model == ARM_FP_MODEL_VFP \ && (arm_fpu_arch == FPUTYPE_VFP3 \ - || arm_fpu_arch == FPUTYPE_NEON)) + || arm_fpu_arch == FPUTYPE_NEON \ + || arm_fpu_arch == FPUTYPE_NEON_FP16)) /* FPU supports VFPv3 instructions. */ #define TARGET_VFP3 (arm_fp_model == ARM_FP_MODEL_VFP \ && (arm_fpu_arch == FPUTYPE_VFP3D16 \ || TARGET_VFPD32)) +/* FPU supports NEON/VFP half-precision floating-point. */ +#define TARGET_NEON_FP16 (arm_fpu_arch == FPUTYPE_NEON_FP16) + /* FPU supports Neon instructions. The setting of this macro gets revealed via __ARM_NEON__ so we add extra guards upon TARGET_32BIT and TARGET_HARD_FLOAT to ensure that NEON instructions are available. */ #define TARGET_NEON (TARGET_32BIT && TARGET_HARD_FLOAT \ && arm_fp_model == ARM_FP_MODEL_VFP \ - && arm_fpu_arch == FPUTYPE_NEON) + && (arm_fpu_arch == FPUTYPE_NEON \ + || arm_fpu_arch == FPUTYPE_NEON_FP16)) /* "DSP" multiply instructions, eg. SMULxy. */ #define TARGET_DSP_MULTIPLY \ @@ -308,7 +313,9 @@ enum fputype /* VFPv3. */ FPUTYPE_VFP3, /* Neon. */ - FPUTYPE_NEON + FPUTYPE_NEON, + /* Neon with half-precision float extensions. */ + FPUTYPE_NEON_FP16 }; /* Recast the floating point class to be the floating point attribute. */ @@ -333,6 +340,21 @@ extern enum float_abi_type arm_float_abi; #define TARGET_DEFAULT_FLOAT_ABI ARM_FLOAT_ABI_SOFT #endif +/* Which __fp16 format to use. + The enumeration values correspond to the numbering for the + Tag_ABI_FP_16bit_format attribute. + */ +enum arm_fp16_format_type +{ + ARM_FP16_FORMAT_NONE = 0, + ARM_FP16_FORMAT_IEEE = 1, + ARM_FP16_FORMAT_ALTERNATIVE = 2 +}; + +extern enum arm_fp16_format_type arm_fp16_format; +#define LARGEST_EXPONENT_IS_NORMAL(bits) \ + ((bits) == 16 && arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) + /* Which ABI to use. */ enum arm_abi_type { diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 40e41c5..fc2ce3c9 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -158,7 +158,7 @@ ; Floating Point Unit. If we only have floating point emulation, then there ; is no point in scheduling the floating point insns. (Well, for best ; performance we should try and group them together). -(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon" +(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon,neon_fp16" (const (symbol_ref "arm_fpu_attr"))) ; LENGTH of an instruction (in bytes) @@ -3734,6 +3734,34 @@ ;; Fixed <--> Floating conversion insns +(define_expand "floatsihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:SI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + +(define_expand "floatdihf2" + [(set (match_operand:HF 0 "general_operand" "") + (float:HF (match_operand:DI 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1 = gen_reg_rtx (SFmode); + expand_float (op1, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) + (define_expand "floatsisf2" [(set (match_operand:SF 0 "s_register_operand" "") (float:SF (match_operand:SI 1 "s_register_operand" "")))] @@ -3758,6 +3786,30 @@ } ") +(define_expand "fix_trunchfsi2" + [(set (match_operand:SI 0 "general_operand" "") + (fix:SI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + +(define_expand "fix_trunchfdi2" + [(set (match_operand:DI 0 "general_operand" "") + (fix:DI (fix:HF (match_operand:HF 1 "general_operand" ""))))] + "TARGET_EITHER" + " + { + rtx op1 = convert_to_mode (SFmode, operands[1], 0); + expand_fix (operands[0], op1, 0); + DONE; + }" +) + (define_expand "fix_truncsfsi2" [(set (match_operand:SI 0 "s_register_operand" "") (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" ""))))] @@ -3797,6 +3849,22 @@ "TARGET_32BIT && TARGET_HARD_FLOAT" "" ) + +/* DFmode -> HFmode conversions have to go through SFmode. */ +(define_expand "truncdfhf2" + [(set (match_operand:HF 0 "general_operand" "") + (float_truncate:HF + (match_operand:DF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (HFmode, op1, 0); + emit_move_insn (operands[0], op1); + DONE; + }" +) ;; Zero and sign extension instructions. @@ -4660,6 +4728,21 @@ "TARGET_32BIT && TARGET_HARD_FLOAT" "" ) + +/* HFmode -> DFmode conversions have to go through SFmode. */ +(define_expand "extendhfdf2" + [(set (match_operand:DF 0 "general_operand" "") + (float_extend:DF (match_operand:HF 1 "general_operand" "")))] + "TARGET_EITHER" + " + { + rtx op1; + op1 = convert_to_mode (SFmode, operands[1], 0); + op1 = convert_to_mode (DFmode, op1, 0); + emit_insn (gen_movdf (operands[0], op1)); + DONE; + }" +) ;; Move insns (including loads and stores) @@ -5808,6 +5891,107 @@ (set_attr "pool_range" "*,32,*,*,*,*")] ) +;; HFmode moves +(define_expand "movhf" + [(set (match_operand:HF 0 "general_operand" "") + (match_operand:HF 1 "general_operand" ""))] + "TARGET_EITHER" + " + if (TARGET_32BIT) + { + if (GET_CODE (operands[0]) == MEM) + operands[1] = force_reg (HFmode, operands[1]); + } + else /* TARGET_THUMB1 */ + { + if (can_create_pseudo_p ()) + { + if (GET_CODE (operands[0]) != REG) + operands[1] = force_reg (HFmode, operands[1]); + } + } + " +) + +(define_insn "*arm32_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=r,m,r,r") + (match_operand:HF 1 "general_operand" " m,r,r,F"))] + "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_NEON_FP16) + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* ARM register from memory */ + return \"ldr%(h%)\\t%0, %1\\t%@ __fp16\"; + case 1: /* memory from ARM register */ + return \"str%(h%)\\t%1, %0\\t%@ __fp16\"; + case 2: /* ARM register from ARM register */ + return \"mov%?\\t%0, %1\\t%@ __fp16\"; + case 3: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw%?\\t%0, %1\", ops); + else + output_asm_insn (\"mov%?\\t%0, %2\;orr%?\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "load1,store1,*,*") + (set_attr "length" "4,4,4,8") + (set_attr "predicable" "yes") + ] +) + +(define_insn "*thumb1_movhf" + [(set (match_operand:HF 0 "nonimmediate_operand" "=l,l,m,*r,*h") + (match_operand:HF 1 "general_operand" "l,mF,l,*h,*r"))] + "TARGET_THUMB1 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 1: + { + rtx addr; + gcc_assert (GET_CODE(operands[1]) == MEM); + addr = XEXP (operands[1], 0); + if (GET_CODE (addr) == LABEL_REF + || (GET_CODE (addr) == CONST + && GET_CODE (XEXP (addr, 0)) == PLUS + && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF + && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)) + { + /* Constant pool entry. */ + return \"ldr\\t%0, %1\"; + } + return \"ldrh\\t%0, %1\"; + } + case 2: return \"strh\\t%1, %0\"; + default: return \"mov\\t%0, %1\"; + } + " + [(set_attr "length" "2") + (set_attr "type" "*,load1,store1,*,*") + (set_attr "pool_range" "*,1020,*,*,*")] +) + (define_expand "movsf" [(set (match_operand:SF 0 "general_operand" "") (match_operand:SF 1 "general_operand" ""))] @@ -10674,6 +10858,7 @@ "TARGET_THUMB1" "* making_const_table = TRUE; + gcc_assert (GET_MODE_CLASS (GET_MODE (operands[0])) != MODE_FLOAT); assemble_integer (operands[0], 2, BITS_PER_WORD, 1); assemble_zeros (2); return \"\"; @@ -10686,19 +10871,23 @@ "TARGET_EITHER" "* { + rtx x = operands[0]; making_const_table = TRUE; - switch (GET_MODE_CLASS (GET_MODE (operands[0]))) + switch (GET_MODE_CLASS (GET_MODE (x))) { case MODE_FLOAT: - { - REAL_VALUE_TYPE r; - REAL_VALUE_FROM_CONST_DOUBLE (r, operands[0]); - assemble_real (r, GET_MODE (operands[0]), BITS_PER_WORD); - break; - } + if (GET_MODE (x) == HFmode) + arm_emit_fp16_const (x); + else + { + REAL_VALUE_TYPE r; + REAL_VALUE_FROM_CONST_DOUBLE (r, x); + assemble_real (r, GET_MODE (x), BITS_PER_WORD); + } + break; default: - assemble_integer (operands[0], 4, BITS_PER_WORD, 1); - mark_symbol_refs_as_used (operands[0]); + assemble_integer (x, 4, BITS_PER_WORD, 1); + mark_symbol_refs_as_used (x); break; } return \"\"; diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index 6aca395..a39bb3a 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -78,6 +78,10 @@ Specify if floating point hardware should be used mfp= Target RejectNegative Joined Undocumented Var(target_fpe_name) +mfp16-format= +Target RejectNegative Joined Var(target_fp16_format_name) +Specify the __fp16 floating-point format + ;; Now ignored. mfpe Target RejectNegative Mask(FPE) Undocumented diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 737f81c..eb18864 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -185,6 +185,61 @@ (set_attr "neg_pool_range" "*, 0,*,*,*,*,1008,*")] ) +;; HFmode moves +(define_insn "*movhf_vfp" + [(set (match_operand:HF 0 "nonimmediate_operand" "= t,Um,r,m,t,r,t,r,r") + (match_operand:HF 1 "general_operand" " Um, t,m,r,t,r,r,t,F"))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16 + && ( s_register_operand (operands[0], HFmode) + || s_register_operand (operands[1], HFmode))" + "* + switch (which_alternative) + { + case 0: /* S register from memory */ + return \"vld1.16\\t{%z0}, %A1\"; + case 1: /* memory from S register */ + return \"vst1.16\\t{%z1}, %A0\"; + case 2: /* ARM register from memory */ + return \"ldrh\\t%0, %1\\t%@ __fp16\"; + case 3: /* memory from ARM register */ + return \"strh\\t%1, %0\\t%@ __fp16\"; + case 4: /* S register from S register */ + return \"fcpys\\t%0, %1\"; + case 5: /* ARM register from ARM register */ + return \"mov\\t%0, %1\\t%@ __fp16\"; + case 6: /* S register from ARM register */ + return \"fmsr\\t%0, %1\"; + case 7: /* ARM register from S register */ + return \"fmrs\\t%0, %1\"; + case 8: /* ARM register from constant */ + { + REAL_VALUE_TYPE r; + long bits; + rtx ops[4]; + + REAL_VALUE_FROM_CONST_DOUBLE (r, operands[1]); + bits = real_to_target (NULL, &r, HFmode); + ops[0] = operands[0]; + ops[1] = GEN_INT (bits); + ops[2] = GEN_INT (bits & 0xff00); + ops[3] = GEN_INT (bits & 0x00ff); + + if (arm_arch_thumb2) + output_asm_insn (\"movw\\t%0, %1\", ops); + else + output_asm_insn (\"mov\\t%0, %2\;orr\\t%0, %0, %3\", ops); + return \"\"; + } + default: + gcc_unreachable (); + } + " + [(set_attr "conds" "unconditional") + (set_attr "type" "*,*,load1,store1,fcpys,*,r_2_f,f_2_r,*") + (set_attr "neon_type" "neon_vld1_1_2_regs,neon_vst1_1_2_regs_vst2_2_regs,*,*,*,*,*,*,*") + (set_attr "length" "4,4,4,4,4,4,4,4,8")] +) + ;; SFmode moves ;; Disparage the w<->r cases because reloading an invalid address is @@ -736,6 +791,24 @@ (set_attr "type" "f_cvt")] ) +(define_insn "extendhfsf2" + [(set (match_operand:SF 0 "s_register_operand" "=t") + (float_extend:SF (match_operand:HF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16" + "vcvtb%?.f32.f16\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + +(define_insn "truncsfhf2" + [(set (match_operand:HF 0 "s_register_operand" "=t") + (float_truncate:HF (match_operand:SF 1 "s_register_operand" "t")))] + "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_NEON_FP16" + "vcvtb%?.f16.f32\\t%0, %1" + [(set_attr "predicable" "yes") + (set_attr "type" "f_cvt")] +) + (define_insn "*truncsisf2_vfp" [(set (match_operand:SI 0 "s_register_operand" "=t") (fix:SI (fix:SF (match_operand:SF 1 "s_register_operand" "t"))))] |