diff options
author | Stam Markianos-Wright <stam.markianos-wright@arm.com> | 2020-01-10 19:23:41 +0000 |
---|---|---|
committer | Stam Markianos-Wright <stammark@gcc.gnu.org> | 2020-01-10 19:23:41 +0000 |
commit | abbe1ed27355178223cd099fb73227f392416ea6 (patch) | |
tree | 48d44a41894a46d1897b6b55956de9f942fa6171 | |
parent | 337ea6b216afd412b85f3fda78a36467ffe4a817 (diff) | |
download | gcc-abbe1ed27355178223cd099fb73227f392416ea6.zip gcc-abbe1ed27355178223cd099fb73227f392416ea6.tar.gz gcc-abbe1ed27355178223cd099fb73227f392416ea6.tar.bz2 |
config.gcc: Add arm_bf16.h.
2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* config.gcc: Add arm_bf16.h.
* config/aarch64/aarch64-builtins.c
(aarch64_simd_builtin_std_type): Add BFmode.
(aarch64_init_simd_builtin_types): Define element types for vector
types.
(aarch64_init_bf16_types): New function.
(aarch64_general_init_builtins): Add arm_init_bf16_types function call.
* config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector
modes.
* config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types.
* config/aarch64/aarch64-simd.md: Add BF vector types to NEON move
patterns.
* config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF.
(AARCH64_VALID_SIMD_QREG_MODE): Add V8BF.
* config/aarch64/aarch64.c
(aarch64_classify_vector_mode): Add support for BF types.
(aarch64_gimplify_va_arg_expr): Add support for BF types.
(aarch64_vq_mode): Add support for BF types.
(aarch64_simd_container_mode): Add support for BF types.
(aarch64_mangle_type): Add support for BF scalar type.
* config/aarch64/aarch64.md: Add BFmode to movhf pattern.
* config/aarch64/arm_bf16.h: New file.
* config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types.
* config/aarch64/iterators.md: Add BF types to mode attributes.
(HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New.
2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com>
* g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test.
* g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test.
* gcc.target/aarch64/bfloat16_scalar_1.c: New test.
* gcc.target/aarch64/bfloat16_scalar_2.c: New test.
* gcc.target/aarch64/bfloat16_scalar_3.c: New test.
* gcc.target/aarch64/bfloat16_scalar_4.c: New test.
* gcc.target/aarch64/bfloat16_simd_1.c: New test.
* gcc.target/aarch64/bfloat16_simd_2.c: New test.
* gcc.target/aarch64/bfloat16_simd_3.c: New test.
From-SVN: r280129
22 files changed, 829 insertions, 46 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 75c87718..0f10afc 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,31 @@ +2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> + + * config.gcc: Add arm_bf16.h. + * config/aarch64/aarch64-builtins.c + (aarch64_simd_builtin_std_type): Add BFmode. + (aarch64_init_simd_builtin_types): Define element types for vector + types. + (aarch64_init_bf16_types): New function. + (aarch64_general_init_builtins): Add arm_init_bf16_types function call. + * config/aarch64/aarch64-modes.def: Add BFmode and V4BF, V8BF vector + modes. + * config/aarch64/aarch64-simd-builtin-types.def: Add BF SIMD types. + * config/aarch64/aarch64-simd.md: Add BF vector types to NEON move + patterns. + * config/aarch64/aarch64.h (AARCH64_VALID_SIMD_DREG_MODE): Add V4BF. + (AARCH64_VALID_SIMD_QREG_MODE): Add V8BF. + * config/aarch64/aarch64.c + (aarch64_classify_vector_mode): Add support for BF types. + (aarch64_gimplify_va_arg_expr): Add support for BF types. + (aarch64_vq_mode): Add support for BF types. + (aarch64_simd_container_mode): Add support for BF types. + (aarch64_mangle_type): Add support for BF scalar type. + * config/aarch64/aarch64.md: Add BFmode to movhf pattern. + * config/aarch64/arm_bf16.h: New file. + * config/aarch64/arm_neon.h: Add arm_bf16.h and Bfloat vector types. + * config/aarch64/iterators.md: Add BF types to mode attributes. + (HFBF, GPF_TF_F16_MOV, VDMOV, VQMOV, VQMOV_NO2Em VALL_F16MOV): New. + 2020-01-10 Jason Merrill <jason@redhat.com> PR c++/93173 - incorrect tree sharing. diff --git a/gcc/config.gcc b/gcc/config.gcc index 2c57c24..30bea51 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -315,7 +315,7 @@ m32c*-*-*) ;; aarch64*-*-*) cpu_type=aarch64 - extra_headers="arm_fp16.h arm_neon.h arm_acle.h arm_sve.h" + extra_headers="arm_fp16.h arm_neon.h arm_bf16.h arm_acle.h arm_sve.h" c_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o" d_target_objs="aarch64-d.o" diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 33d4c31..f0e0461 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -68,6 +68,9 @@ #define hi_UP E_HImode #define hf_UP E_HFmode #define qi_UP E_QImode +#define bf_UP E_BFmode +#define v4bf_UP E_V4BFmode +#define v8bf_UP E_V8BFmode #define UP(X) X##_UP #define SIMD_MAX_BUILTIN_ARGS 5 @@ -568,6 +571,10 @@ static tree aarch64_simd_intXI_type_node = NULL_TREE; tree aarch64_fp16_type_node = NULL_TREE; tree aarch64_fp16_ptr_type_node = NULL_TREE; +/* Back-end node type for brain float (bfloat) types. */ +tree aarch64_bf16_type_node = NULL_TREE; +tree aarch64_bf16_ptr_type_node = NULL_TREE; + /* Wrapper around add_builtin_function. NAME is the name of the built-in function, TYPE is the function type, and CODE is the function subcode (relative to AARCH64_BUILTIN_GENERAL). */ @@ -659,6 +666,8 @@ aarch64_simd_builtin_std_type (machine_mode mode, return float_type_node; case E_DFmode: return double_type_node; + case E_BFmode: + return aarch64_bf16_type_node; default: gcc_unreachable (); } @@ -750,6 +759,10 @@ aarch64_init_simd_builtin_types (void) aarch64_simd_types[Float64x1_t].eltype = double_type_node; aarch64_simd_types[Float64x2_t].eltype = double_type_node; + /* Init Bfloat vector types with underlying __bf16 type. */ + aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node; + aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node; + for (i = 0; i < nelts; i++) { tree eltype = aarch64_simd_types[i].eltype; @@ -1059,6 +1072,19 @@ aarch64_init_fp16_types (void) aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node); } +/* Initialize the backend REAL_TYPE type supporting bfloat types. */ +static void +aarch64_init_bf16_types (void) +{ + aarch64_bf16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (aarch64_bf16_type_node) = 16; + SET_TYPE_MODE (aarch64_bf16_type_node, BFmode); + layout_type (aarch64_bf16_type_node); + + lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16"); + aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node); +} + /* Pointer authentication builtins that will become NOP on legacy platform. Currently, these builtins are for internal use only (libgcc EH unwinder). */ @@ -1214,6 +1240,8 @@ aarch64_general_init_builtins (void) aarch64_init_fp16_types (); + aarch64_init_bf16_types (); + if (TARGET_SIMD) aarch64_init_simd_builtins (); diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 6cd8ed0..1eeb8d8 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -69,6 +69,13 @@ VECTOR_MODES (FLOAT, 16); /* V4SF V2DF. */ VECTOR_MODE (FLOAT, DF, 1); /* V1DF. */ VECTOR_MODE (FLOAT, HF, 2); /* V2HF. */ +/* Bfloat16 modes. */ +FLOAT_MODE (BF, 2, 0); +ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format); + +VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */ +VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */ + /* Oct Int: 256-bit integer mode needed for 32-byte vector arguments. */ INT_MODE (OI, 32); diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def index 76d4d13..e885755 100644 --- a/gcc/config/aarch64/aarch64-simd-builtin-types.def +++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def @@ -50,3 +50,5 @@ ENTRY (Float32x4_t, V4SF, none, 13) ENTRY (Float64x1_t, V1DF, none, 13) ENTRY (Float64x2_t, V2DF, none, 13) + ENTRY (Bfloat16x4_t, V4BF, none, 14) + ENTRY (Bfloat16x8_t, V8BF, none, 14) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index b82d7b6..2989096 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -19,8 +19,8 @@ ;; <http://www.gnu.org/licenses/>. (define_expand "mov<mode>" - [(set (match_operand:VALL_F16 0 "nonimmediate_operand") - (match_operand:VALL_F16 1 "general_operand"))] + [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand") + (match_operand:VALL_F16MOV 1 "general_operand"))] "TARGET_SIMD" " /* Force the operand into a register if it is not an @@ -101,10 +101,10 @@ [(set_attr "type" "neon_dup<q>")] ) -(define_insn "*aarch64_simd_mov<VD:mode>" - [(set (match_operand:VD 0 "nonimmediate_operand" +(define_insn "*aarch64_simd_mov<VDMOV:mode>" + [(set (match_operand:VDMOV 0 "nonimmediate_operand" "=w, m, m, w, ?r, ?w, ?r, w") - (match_operand:VD 1 "general_operand" + (match_operand:VDMOV 1 "general_operand" "m, Dz, w, w, w, r, r, Dn"))] "TARGET_SIMD && (register_operand (operands[0], <MODE>mode) @@ -129,10 +129,10 @@ mov_reg, neon_move<q>")] ) -(define_insn "*aarch64_simd_mov<VQ:mode>" - [(set (match_operand:VQ 0 "nonimmediate_operand" +(define_insn "*aarch64_simd_mov<VQMOV:mode>" + [(set (match_operand:VQMOV 0 "nonimmediate_operand" "=w, Umn, m, w, ?r, ?w, ?r, w") - (match_operand:VQ 1 "general_operand" + (match_operand:VQMOV 1 "general_operand" "m, Dz, w, w, w, r, r, Dn"))] "TARGET_SIMD && (register_operand (operands[0], <MODE>mode) @@ -234,8 +234,8 @@ (define_split - [(set (match_operand:VQ 0 "register_operand" "") - (match_operand:VQ 1 "register_operand" ""))] + [(set (match_operand:VQMOV 0 "register_operand" "") + (match_operand:VQMOV 1 "register_operand" ""))] "TARGET_SIMD && reload_completed && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))" @@ -246,8 +246,8 @@ }) (define_split - [(set (match_operand:VQ 0 "register_operand" "") - (match_operand:VQ 1 "register_operand" ""))] + [(set (match_operand:VQMOV 0 "register_operand" "") + (match_operand:VQMOV 1 "register_operand" ""))] "TARGET_SIMD && reload_completed && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))" @@ -258,8 +258,8 @@ }) (define_expand "@aarch64_split_simd_mov<mode>" - [(set (match_operand:VQ 0) - (match_operand:VQ 1))] + [(set (match_operand:VQMOV 0) + (match_operand:VQMOV 1))] "TARGET_SIMD" { rtx dst = operands[0]; @@ -295,8 +295,8 @@ (define_insn "aarch64_simd_mov_from_<mode>low" [(set (match_operand:<VHALF> 0 "register_operand" "=r") (vec_select:<VHALF> - (match_operand:VQ 1 "register_operand" "w") - (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))] + (match_operand:VQMOV 1 "register_operand" "w") + (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")))] "TARGET_SIMD && reload_completed" "umov\t%0, %1.d[0]" [(set_attr "type" "neon_to_gp<q>") @@ -306,8 +306,8 @@ (define_insn "aarch64_simd_mov_from_<mode>high" [(set (match_operand:<VHALF> 0 "register_operand" "=r") (vec_select:<VHALF> - (match_operand:VQ 1 "register_operand" "w") - (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))] + (match_operand:VQMOV 1 "register_operand" "w") + (match_operand:VQMOV 2 "vect_par_cnst_hi_half" "")))] "TARGET_SIMD && reload_completed" "umov\t%0, %1.d[1]" [(set_attr "type" "neon_to_gp<q>") @@ -1471,8 +1471,8 @@ ;; On big-endian this is { zeroes, operand } (define_insn "move_lo_quad_internal_<mode>" - [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") - (vec_concat:VQ_NO2E + [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w") + (vec_concat:VQMOV_NO2E (match_operand:<VHALF> 1 "register_operand" "w,r,r") (vec_duplicate:<VHALF> (const_int 0))))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" @@ -1501,8 +1501,8 @@ ) (define_insn "move_lo_quad_internal_be_<mode>" - [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") - (vec_concat:VQ_NO2E + [(set (match_operand:VQMOV_NO2E 0 "register_operand" "=w,w,w") + (vec_concat:VQMOV_NO2E (vec_duplicate:<VHALF> (const_int 0)) (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" @@ -1531,8 +1531,8 @@ ) (define_expand "move_lo_quad_<mode>" - [(match_operand:VQ 0 "register_operand") - (match_operand:VQ 1 "register_operand")] + [(match_operand:VQMOV 0 "register_operand") + (match_operand:VQMOV 1 "register_operand")] "TARGET_SIMD" { if (BYTES_BIG_ENDIAN) @@ -1549,11 +1549,11 @@ ;; For big-endian this is { operand1, operand2 } (define_insn "aarch64_simd_move_hi_quad_<mode>" - [(set (match_operand:VQ 0 "register_operand" "+w,w") - (vec_concat:VQ + [(set (match_operand:VQMOV 0 "register_operand" "+w,w") + (vec_concat:VQMOV (vec_select:<VHALF> (match_dup 0) - (match_operand:VQ 2 "vect_par_cnst_lo_half" "")) + (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")) (match_operand:<VHALF> 1 "register_operand" "w,r")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ @@ -1563,12 +1563,12 @@ ) (define_insn "aarch64_simd_move_hi_quad_be_<mode>" - [(set (match_operand:VQ 0 "register_operand" "+w,w") - (vec_concat:VQ + [(set (match_operand:VQMOV 0 "register_operand" "+w,w") + (vec_concat:VQMOV (match_operand:<VHALF> 1 "register_operand" "w,r") (vec_select:<VHALF> (match_dup 0) - (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))] + (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "@ ins\\t%0.d[1], %1.d[0] @@ -1577,7 +1577,7 @@ ) (define_expand "move_hi_quad_<mode>" - [(match_operand:VQ 0 "register_operand") + [(match_operand:VQMOV 0 "register_operand") (match_operand:<VHALF> 1 "register_operand")] "TARGET_SIMD" { diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 4288aaa..47eb0bb 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1693,6 +1693,7 @@ aarch64_classify_vector_mode (machine_mode mode) case E_V2SImode: /* ...E_V1DImode doesn't exist. */ case E_V4HFmode: + case E_V4BFmode: case E_V2SFmode: case E_V1DFmode: /* 128-bit Advanced SIMD vectors. */ @@ -1701,6 +1702,7 @@ aarch64_classify_vector_mode (machine_mode mode) case E_V4SImode: case E_V2DImode: case E_V8HFmode: + case E_V8BFmode: case E_V4SFmode: case E_V2DFmode: return TARGET_SIMD ? VEC_ADVSIMD : 0; @@ -15596,6 +15598,10 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, field_t = aarch64_fp16_type_node; field_ptr_t = aarch64_fp16_ptr_type_node; break; + case E_BFmode: + field_t = aarch64_bf16_type_node; + field_ptr_t = aarch64_bf16_ptr_type_node; + break; case E_V2SImode: case E_V4SImode: { @@ -16109,6 +16115,8 @@ aarch64_vq_mode (scalar_mode mode) return V4SFmode; case E_HFmode: return V8HFmode; + case E_BFmode: + return V8BFmode; case E_SImode: return V4SImode; case E_HImode: @@ -16144,6 +16152,8 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width) return V2SFmode; case E_HFmode: return V4HFmode; + case E_BFmode: + return V4BFmode; case E_SImode: return V2SImode; case E_HImode: @@ -16258,9 +16268,14 @@ aarch64_mangle_type (const_tree type) if (lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) return "St9__va_list"; - /* Half-precision float. */ + /* Half-precision floating point types. */ if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16) - return "Dh"; + { + if (TYPE_MODE (type) == BFmode) + return "u6__bf16"; + else + return "Dh"; + } /* Mangle AArch64-specific internal types. TYPE_NAME is non-NULL_TREE for builtin types. */ diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index af5b00c..eb1eca4 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -1136,13 +1136,13 @@ extern enum aarch64_code_model aarch64_cmodel; #define AARCH64_VALID_SIMD_DREG_MODE(MODE) \ ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \ || (MODE) == V2SFmode || (MODE) == V4HFmode || (MODE) == DImode \ - || (MODE) == DFmode) + || (MODE) == DFmode || (MODE) == V4BFmode) /* Modes valid for AdvSIMD Q registers. */ #define AARCH64_VALID_SIMD_QREG_MODE(MODE) \ ((MODE) == V4SImode || (MODE) == V8HImode || (MODE) == V16QImode \ || (MODE) == V4SFmode || (MODE) == V8HFmode || (MODE) == V2DImode \ - || (MODE) == V2DFmode) + || (MODE) == V2DFmode || (MODE) == V8BFmode) #define ENDIAN_LANE_N(NUNITS, N) \ (BYTES_BIG_ENDIAN ? NUNITS - 1 - N : N) @@ -1190,6 +1190,11 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); extern tree aarch64_fp16_type_node; extern tree aarch64_fp16_ptr_type_node; +/* This type is the user-visible __bf16, and a pointer to that type. Defined + in aarch64-builtins.c. */ +extern tree aarch64_bf16_type_node; +extern tree aarch64_bf16_ptr_type_node; + /* The generic unwind code in libgcc does not initialize the frame pointer. So in order to unwind a function using a frame pointer, the very first function that is unwound must save the frame pointer. That way the frame diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a144e24..c0b7010 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1310,8 +1310,8 @@ }) (define_expand "mov<mode>" - [(set (match_operand:GPF_TF_F16 0 "nonimmediate_operand") - (match_operand:GPF_TF_F16 1 "general_operand"))] + [(set (match_operand:GPF_TF_F16_MOV 0 "nonimmediate_operand") + (match_operand:GPF_TF_F16_MOV 1 "general_operand"))] "" { if (!TARGET_FLOAT) @@ -1327,11 +1327,11 @@ } ) -(define_insn "*movhf_aarch64" - [(set (match_operand:HF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") - (match_operand:HF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] - "TARGET_FLOAT && (register_operand (operands[0], HFmode) - || aarch64_reg_or_fp_zero (operands[1], HFmode))" +(define_insn "*mov<mode>_aarch64" + [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r") + (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))] + "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode) + || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))" "@ movi\\t%0.4h, #0 fmov\\t%h0, %w1 diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h new file mode 100644 index 0000000..3759c0d --- /dev/null +++ b/gcc/config/aarch64/arm_bf16.h @@ -0,0 +1,32 @@ +/* Arm BF16 instrinsics include file. + + Copyright (C) 2019-2020 Free Software Foundation, Inc. + Contributed by Arm. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + <http://www.gnu.org/licenses/>. */ + +#ifndef _AARCH64_BF16_H_ +#define _AARCH64_BF16_H_ + +typedef __bf16 bfloat16_t; + +#endif diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index c742534..eaba156 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -73,6 +73,9 @@ typedef __fp16 float16_t; typedef float float32_t; typedef double float64_t; +typedef __Bfloat16x4_t bfloat16x4_t; +typedef __Bfloat16x8_t bfloat16x8_t; + typedef struct int8x8x2_t { int8x8_t val[2]; @@ -34606,6 +34609,8 @@ vrnd64xq_f64 (float64x2_t __a) #pragma GCC pop_options +#include "arm_bf16.h" + #undef __aarch64_vget_lane_any #undef __aarch64_vdup_lane_any diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 058c6bc..b9843b8 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -57,9 +57,16 @@ ;; Iterator for all scalar floating point modes (HF, SF, DF) (define_mode_iterator GPF_HF [HF SF DF]) +;; Iterator for all 16-bit scalar floating point modes (HF, BF) +(define_mode_iterator HFBF [HF BF]) + ;; Iterator for all scalar floating point modes (HF, SF, DF and TF) (define_mode_iterator GPF_TF_F16 [HF SF DF TF]) +;; Iterator for all scalar floating point modes suitable for moving, including +;; special BF type (HF, SF, DF, TF and BF) +(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF]) + ;; Double vector modes. (define_mode_iterator VDF [V2SF V4HF]) @@ -79,6 +86,9 @@ ;; Double vector modes. (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF]) +;; Double vector modes suitable for moving. Includes BFmode. +(define_mode_iterator VDMOV [V8QI V4HI V4HF V4BF V2SI V2SF]) + ;; All modes stored in registers d0-d31. (define_mode_iterator DREG [V8QI V4HI V4HF V2SI V2SF DF]) @@ -97,6 +107,12 @@ ;; Copy of the above. (define_mode_iterator VQ2 [V16QI V8HI V4SI V2DI V8HF V4SF V2DF]) +;; Quad vector modes suitable for moving. Includes BFmode. +(define_mode_iterator VQMOV [V16QI V8HI V4SI V2DI V8HF V8BF V4SF V2DF]) + +;; VQMOV without 2-element modes. +(define_mode_iterator VQMOV_NO2E [V16QI V8HI V4SI V8HF V8BF V4SF]) + ;; Quad integer vector modes. (define_mode_iterator VQ_I [V16QI V8HI V4SI V2DI]) @@ -160,6 +176,11 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V4HF V8HF V2SF V4SF V2DF]) +;; All Advanced SIMD modes suitable for moving, loading, and storing, +;; including special Bfloat vector types. +(define_mode_iterator VALL_F16MOV [V8QI V16QI V4HI V8HI V2SI V4SI V2DI + V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) + ;; The VALL_F16 modes except the 128-bit 2-element ones. (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF]) @@ -873,6 +894,7 @@ (V2SI "2") (V4SI "4") (V2DI "2") (V4HF "4") (V8HF "8") + (V4BF "4") (V8BF "8") (V2SF "2") (V4SF "4") (V1DF "1") (V2DF "2") (DI "1") (DF "1")]) @@ -1013,7 +1035,8 @@ (V8HF "16b") (V2SF "8b") (V4SF "16b") (V2DF "16b") (DI "8b") (DF "8b") - (SI "8b") (SF "8b")]) + (SI "8b") (SF "8b") + (V4BF "8b") (V8BF "16b")]) ;; Define element mode for each vector mode. (define_mode_attr VEL [(V8QI "QI") (V16QI "QI") @@ -1093,12 +1116,13 @@ (V2SI "SI") (V4SI "V2SI") (V2DI "DI") (V2SF "SF") (V4SF "V2SF") (V4HF "V2HF") - (V8HF "V4HF") (V2DF "DF")]) + (V8HF "V4HF") (V2DF "DF") + (V8BF "V4BF")]) ;; Half modes of all vector modes, in lower-case. (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi") (V4HI "v2hi") (V8HI "v4hi") - (V8HF "v4hf") + (V8HF "v4hf") (V8BF "v4bf") (V2SI "si") (V4SI "v2si") (V2DI "di") (V2SF "sf") (V4SF "v2sf") (V2DF "df")]) @@ -1404,6 +1428,7 @@ (V2SI "") (V4SI "_q") (DI "") (V2DI "_q") (V4HF "") (V8HF "_q") + (V4BF "") (V8BF "_q") (V2SF "") (V4SF "_q") (V2DF "_q") (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 948eb74..45a04f1 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,15 @@ +2020-01-10 Stam Markianos-Wright <stam.markianos-wright@arm.com> + + * g++.dg/abi/mangle-neon-aarch64.C: Add Bfloat SIMD types to test. + * g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C: New test. + * gcc.target/aarch64/bfloat16_scalar_1.c: New test. + * gcc.target/aarch64/bfloat16_scalar_2.c: New test. + * gcc.target/aarch64/bfloat16_scalar_3.c: New test. + * gcc.target/aarch64/bfloat16_scalar_4.c: New test. + * gcc.target/aarch64/bfloat16_simd_1.c: New test. + * gcc.target/aarch64/bfloat16_simd_2.c: New test. + * gcc.target/aarch64/bfloat16_simd_3.c: New test. + 2020-01-10 Richard Sandiford <richard.sandiford@arm.com> * gcc.target/aarch64/sve/struct_vect_1.c (N): Protect with #ifndef. diff --git a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C index 5740c02..50c1452 100644 --- a/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C +++ b/gcc/testsuite/g++.dg/abi/mangle-neon-aarch64.C @@ -14,6 +14,7 @@ void f4 (uint16x4_t a) {} void f5 (uint32x2_t a) {} void f23 (uint64x1_t a) {} void f61 (float16x4_t a) {} +void f62 (bfloat16x4_t a) {} void f6 (float32x2_t a) {} void f7 (poly8x8_t a) {} void f8 (poly16x4_t a) {} @@ -27,6 +28,7 @@ void f14 (uint16x8_t a) {} void f15 (uint32x4_t a) {} void f16 (uint64x2_t a) {} void f171 (float16x8_t a) {} +void f172 (bfloat16x8_t a) {} void f17 (float32x4_t a) {} void f18 (float64x2_t a) {} void f19 (poly8x16_t a) {} @@ -45,6 +47,7 @@ void g1 (int8x16_t, int8x16_t) {} // { dg-final { scan-assembler "_Z2f512__Uint32x2_t:" } } // { dg-final { scan-assembler "_Z3f2312__Uint64x1_t:" } } // { dg-final { scan-assembler "_Z3f6113__Float16x4_t:" } } +// { dg-final { scan-assembler "_Z3f6214__Bfloat16x4_t:" } } // { dg-final { scan-assembler "_Z2f613__Float32x2_t:" } } // { dg-final { scan-assembler "_Z2f711__Poly8x8_t:" } } // { dg-final { scan-assembler "_Z2f812__Poly16x4_t:" } } @@ -57,6 +60,7 @@ void g1 (int8x16_t, int8x16_t) {} // { dg-final { scan-assembler "_Z3f1512__Uint32x4_t:" } } // { dg-final { scan-assembler "_Z3f1612__Uint64x2_t:" } } // { dg-final { scan-assembler "_Z4f17113__Float16x8_t:" } } +// { dg-final { scan-assembler "_Z4f17214__Bfloat16x8_t:" } } // { dg-final { scan-assembler "_Z3f1713__Float32x4_t:" } } // { dg-final { scan-assembler "_Z3f1813__Float64x2_t:" } } // { dg-final { scan-assembler "_Z3f1912__Poly8x16_t:" } } diff --git a/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C b/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C new file mode 100644 index 0000000..5426a18 --- /dev/null +++ b/gcc/testsuite/g++.dg/ext/arm-bf16/bf16-mangle-aarch64-1.C @@ -0,0 +1,13 @@ +/* { dg-do compile { target aarch64*-*-* } } */ + +/* Test mangling */ + +/* { dg-final { scan-assembler "\t.global\t_Z1fPu6__bf16" } } */ +void f (__bf16 *x) { } + +/* { dg-final { scan-assembler "\t.global\t_Z1gPu6__bf16S_" } } */ +void g (__bf16 *x, __bf16 *y) { } + +/* { dg-final { scan-assembler "\t.global\t_ZN1SIu6__bf16u6__bf16E1iE" } } */ +template <typename T, typename U> struct S { static int i; }; +template <> int S<__bf16, __bf16>::i = 3; diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c new file mode 100644 index 0000000..ef43766 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_1.c @@ -0,0 +1,102 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-O3 --save-temps -std=gnu90" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_bf16.h> + +/* +**stacktest1: +** sub sp, sp, #16 +** str h0, \[sp, 14\] +** ldr h0, \[sp, 14\] +** add sp, sp, 16 +** ret +*/ +bfloat16_t stacktest1 (bfloat16_t __a) +{ + volatile bfloat16_t b = __a; + return b; +} + +/* +**bfloat_mov_ww: +** mov v1.h\[0\], v2.h\[0\] +** ret +*/ +void bfloat_mov_ww (void) +{ + register bfloat16_t x asm ("h2"); + register bfloat16_t y asm ("h1"); + asm volatile ("" : "=w" (x)); + y = x; + asm volatile ("" :: "w" (y)); +} + +/* +**bfloat_mov_rw: +** dup v1.4h, w1 +** ret +*/ +void bfloat_mov_rw (void) +{ + register bfloat16_t x asm ("w1"); + register bfloat16_t y asm ("h1"); + asm volatile ("" : "=r" (x)); + y = x; + asm volatile ("" :: "w" (y)); +} + +/* +**bfloat_mov_wr: +** umov w1, v1.h\[0\] +** ret +*/ +void bfloat_mov_wr (void) +{ + register bfloat16_t x asm ("h1"); + register bfloat16_t y asm ("w1"); + asm volatile ("" : "=w" (x)); + y = x; + asm volatile ("" :: "r" (y)); +} + +/* +**bfloat_mov_rr: +** mov w1, w2 +** ret +*/ +void bfloat_mov_rr (void) +{ + register bfloat16_t x asm ("w2"); + register bfloat16_t y asm ("w1"); + asm volatile ("" : "=r" (x)); + y = x; + asm volatile ("" :: "r" (y)); +} + +/* +**bfloat_mov_rm: +** strh w2, \[x0\] +** ret +*/ +void bfloat_mov_rm (bfloat16_t *ptr) +{ + register bfloat16_t x asm ("w2"); + asm volatile ("" : "=r" (x)); + *ptr = x; +} + +/* +**bfloat_mov_mr: +** ldrh w2, \[x0\] +** ret +*/ +void bfloat_mov_mr (bfloat16_t *ptr) +{ + register bfloat16_t y asm ("w2"); + y = *ptr; + asm volatile ("" :: "r" (y)); +} + diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c new file mode 100644 index 0000000..df8e751 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_2.c @@ -0,0 +1,106 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_bf16.h> + +#pragma GCC push_options +#pragma GCC target ("+bf16") + +/* +**stacktest1: +** sub sp, sp, #16 +** str h0, \[sp, 14\] +** ldr h0, \[sp, 14\] +** add sp, sp, 16 +** ret +*/ +bfloat16_t stacktest1 (bfloat16_t __a) +{ + volatile bfloat16_t b = __a; + return b; +} + +/* +**bfloat_mov_ww: +** mov v1.h\[0\], v2.h\[0\] +** ret +*/ +void bfloat_mov_ww (void) +{ + register bfloat16_t x asm ("h2"); + register bfloat16_t y asm ("h1"); + asm volatile ("" : "=w" (x)); + y = x; + asm volatile ("" :: "w" (y)); +} + +/* +**bfloat_mov_rw: +** dup v1.4h, w1 +** ret +*/ +void bfloat_mov_rw (void) +{ + register bfloat16_t x asm ("w1"); + register bfloat16_t y asm ("h1"); + asm volatile ("" : "=r" (x)); + y = x; + asm volatile ("" :: "w" (y)); +} + +/* +**bfloat_mov_wr: +** umov w1, v1.h\[0\] +** ret +*/ +void bfloat_mov_wr (void) +{ + register bfloat16_t x asm ("h1"); + register bfloat16_t y asm ("w1"); + asm volatile ("" : "=w" (x)); + y = x; + asm volatile ("" :: "r" (y)); +} + +/* +**bfloat_mov_rr: +** mov w1, w2 +** ret +*/ +void bfloat_mov_rr (void) +{ + register bfloat16_t x asm ("w2"); + register bfloat16_t y asm ("w1"); + asm volatile ("" : "=r" (x)); + y = x; + asm volatile ("" :: "r" (y)); +} + +/* +**bfloat_mov_rm: +** strh w2, \[x0\] +** ret +*/ +void bfloat_mov_rm (bfloat16_t *ptr) +{ + register bfloat16_t x asm ("w2"); + asm volatile ("" : "=r" (x)); + *ptr = x; +} + +/* +**bfloat_mov_mr: +** ldrh w2, \[x0\] +** ret +*/ +void bfloat_mov_mr (bfloat16_t *ptr) +{ + register bfloat16_t y asm ("w2"); + y = *ptr; + asm volatile ("" :: "r" (y)); +} + +#pragma GCC pop_options + diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c new file mode 100644 index 0000000..5d7a431 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_3.c @@ -0,0 +1,101 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps -std=gnu90" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_bf16.h> + +/* +**stacktest1: +** sub sp, sp, #16 +** str h0, \[sp, 14\] +** ldr h0, \[sp, 14\] +** add sp, sp, 16 +** ret +*/ +bfloat16_t stacktest1 (bfloat16_t __a) +{ + volatile bfloat16_t b = __a; + return b; +} + +/* +**bfloat_mov_ww: +** mov v1.h\[0\], v2.h\[0\] +** ret +*/ +void bfloat_mov_ww (void) +{ + register bfloat16_t x asm ("h2"); + register bfloat16_t y asm ("h1"); + asm volatile ("" : "=w" (x)); + y = x; + asm volatile ("" :: "w" (y)); +} + +/* +**bfloat_mov_rw: +** dup v1.4h, w1 +** ret +*/ +void bfloat_mov_rw (void) +{ + register bfloat16_t x asm ("w1"); + register bfloat16_t y asm ("h1"); + asm volatile ("" : "=r" (x)); + y = x; + asm volatile ("" :: "w" (y)); +} + +/* +**bfloat_mov_wr: +** umov w1, v1.h\[0\] +** ret +*/ +void bfloat_mov_wr (void) +{ + register bfloat16_t x asm ("h1"); + register bfloat16_t y asm ("w1"); + asm volatile ("" : "=w" (x)); + y = x; + asm volatile ("" :: "r" (y)); +} + +/* +**bfloat_mov_rr: +** mov w1, w2 +** ret +*/ +void bfloat_mov_rr (void) +{ + register bfloat16_t x asm ("w2"); + register bfloat16_t y asm ("w1"); + asm volatile ("" : "=r" (x)); + y = x; + asm volatile ("" :: "r" (y)); +} + +/* +**bfloat_mov_rm: +** strh w2, \[x0\] +** ret +*/ +void bfloat_mov_rm (bfloat16_t *ptr) +{ + register bfloat16_t x asm ("w2"); + asm volatile ("" : "=r" (x)); + *ptr = x; +} + +/* +**bfloat_mov_mr: +** ldrh w2, \[x0\] +** ret +*/ +void bfloat_mov_mr (bfloat16_t *ptr) +{ + register bfloat16_t y asm ("w2"); + y = *ptr; + asm volatile ("" :: "r" (y)); +} + diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c new file mode 100644 index 0000000..b812011 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_scalar_4.c @@ -0,0 +1,16 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-std=c99 -pedantic-errors -O3 --save-temps" } */ + +#include <arm_bf16.h> + +_Complex bfloat16_t stacktest1 (_Complex bfloat16_t __a) +{ + volatile _Complex bfloat16_t b = __a; + return b; +} + +/* { dg-error {ISO C does not support plain 'complex' meaning 'double complex'} "" { target *-*-* } 8 } */ +/* { dg-error {expected '=', ',', ';', 'asm' or '__attribute__' before 'stacktest1'} "" { target *-*-* } 8 } */ + diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c new file mode 100644 index 0000000..6cad557 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_1.c @@ -0,0 +1,93 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-add-options arm_v8_2a_bf16_neon } */ +/* { dg-additional-options "-O3 --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_neon.h> + +/* +**stacktest1: +** sub sp, sp, #16 +** str h0, \[sp, 14\] +** ldr h0, \[sp, 14\] +** add sp, sp, 16 +** ret +*/ +bfloat16_t stacktest1 (bfloat16_t __a) +{ + volatile bfloat16_t b = __a; + return b; +} + +/* +**stacktest2: +** sub sp, sp, #16 +** str d0, \[sp, 8\] +** ldr d0, \[sp, 8\] +** add sp, sp, 16 +** ret +*/ +bfloat16x4_t stacktest2 (bfloat16x4_t __a) +{ + volatile bfloat16x4_t b = __a; + return b; +} + +/* +**stacktest3: +** sub sp, sp, #16 +** str q0, \[sp\] +** ldr q0, \[sp\] +** add sp, sp, 16 +** ret +*/ +bfloat16x8_t stacktest3 (bfloat16x8_t __a) +{ + volatile bfloat16x8_t b = __a; + return b; +} + +/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ +typedef bfloat16_t v8bf __attribute__((vector_size(16))); +typedef bfloat16_t v16bf __attribute__((vector_size(32))); +typedef bfloat16_t v32bf __attribute__((vector_size(64))); +typedef bfloat16_t v64bf __attribute__((vector_size(128))); +typedef bfloat16_t v128bf __attribute__((vector_size(256))); + +v8bf stacktest4 (v8bf __a) +{ + volatile v8bf b = __a; + return b; +} + +v16bf stacktest5 (v16bf __a) +{ + volatile v16bf b = __a; + return b; +} + +v32bf stacktest6 (v32bf __a) +{ + volatile v32bf b = __a; + return b; +} + +v64bf stacktest7 (v64bf __a) +{ + volatile v64bf b = __a; + return b; +} + +v128bf stacktest8 (v128bf __a) +{ + volatile v128bf b = __a; + return b; +} + +/* Test use of constant values to assign values to vectors. */ + +typedef bfloat16_t v2bf __attribute__((vector_size(4))); +v2bf c2 (void) { return (v2bf) 0x12345678; } + +bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c new file mode 100644 index 0000000..3891dcf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_2.c @@ -0,0 +1,97 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_neon.h> + +#pragma GCC push_options +#pragma GCC target ("+bf16") + +/* +**stacktest1: +** sub sp, sp, #16 +** str h0, \[sp, 14\] +** ldr h0, \[sp, 14\] +** add sp, sp, 16 +** ret +*/ +bfloat16_t stacktest1 (bfloat16_t __a) +{ + volatile bfloat16_t b = __a; + return b; +} + +/* +**stacktest2: +** sub sp, sp, #16 +** str d0, \[sp, 8\] +** ldr d0, \[sp, 8\] +** add sp, sp, 16 +** ret +*/ +bfloat16x4_t stacktest2 (bfloat16x4_t __a) +{ + volatile bfloat16x4_t b = __a; + return b; +} + +/* +**stacktest3: +** sub sp, sp, #16 +** str q0, \[sp\] +** ldr q0, \[sp\] +** add sp, sp, 16 +** ret +*/ +bfloat16x8_t stacktest3 (bfloat16x8_t __a) +{ + volatile bfloat16x8_t b = __a; + return b; +} + +/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ +typedef bfloat16_t v8bf __attribute__((vector_size(16))); +typedef bfloat16_t v16bf __attribute__((vector_size(32))); +typedef bfloat16_t v32bf __attribute__((vector_size(64))); +typedef bfloat16_t v64bf __attribute__((vector_size(128))); +typedef bfloat16_t v128bf __attribute__((vector_size(256))); + +v8bf stacktest4 (v8bf __a) +{ + volatile v8bf b = __a; + return b; +} + +v16bf stacktest5 (v16bf __a) +{ + volatile v16bf b = __a; + return b; +} + +v32bf stacktest6 (v32bf __a) +{ + volatile v32bf b = __a; + return b; +} + +v64bf stacktest7 (v64bf __a) +{ + volatile v64bf b = __a; + return b; +} + +v128bf stacktest8 (v128bf __a) +{ + volatile v128bf b = __a; + return b; +} + +/* Test use of constant values to assign values to vectors. */ + +typedef bfloat16_t v2bf __attribute__((vector_size(4))); +v2bf c2 (void) { return (v2bf) 0x12345678; } + +bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } + +#pragma GCC pop_options diff --git a/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c new file mode 100644 index 0000000..b35f5e5 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/bfloat16_simd_3.c @@ -0,0 +1,92 @@ +/* { dg-do assemble { target { aarch64*-*-* } } } */ +/* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ +/* { dg-additional-options "-march=armv8.2-a -O3 --save-temps" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +#include <arm_neon.h> + +/* +**stacktest1: +** sub sp, sp, #16 +** str h0, \[sp, 14\] +** ldr h0, \[sp, 14\] +** add sp, sp, 16 +** ret +*/ +bfloat16_t stacktest1 (bfloat16_t __a) +{ + volatile bfloat16_t b = __a; + return b; +} + +/* +**stacktest2: +** sub sp, sp, #16 +** str d0, \[sp, 8\] +** ldr d0, \[sp, 8\] +** add sp, sp, 16 +** ret +*/ +bfloat16x4_t stacktest2 (bfloat16x4_t __a) +{ + volatile bfloat16x4_t b = __a; + return b; +} + +/* +**stacktest3: +** sub sp, sp, #16 +** str q0, \[sp\] +** ldr q0, \[sp\] +** add sp, sp, 16 +** ret +*/ +bfloat16x8_t stacktest3 (bfloat16x8_t __a) +{ + volatile bfloat16x8_t b = __a; + return b; +} + +/* Test compilation of __attribute__ vectors of 8, 16, 32, etc. BFloats. */ +typedef bfloat16_t v8bf __attribute__((vector_size(16))); +typedef bfloat16_t v16bf __attribute__((vector_size(32))); +typedef bfloat16_t v32bf __attribute__((vector_size(64))); +typedef bfloat16_t v64bf __attribute__((vector_size(128))); +typedef bfloat16_t v128bf __attribute__((vector_size(256))); + +v8bf stacktest4 (v8bf __a) +{ + volatile v8bf b = __a; + return b; +} + +v16bf stacktest5 (v16bf __a) +{ + volatile v16bf b = __a; + return b; +} + +v32bf stacktest6 (v32bf __a) +{ + volatile v32bf b = __a; + return b; +} + +v64bf stacktest7 (v64bf __a) +{ + volatile v64bf b = __a; + return b; +} + +v128bf stacktest8 (v128bf __a) +{ + volatile v128bf b = __a; + return b; +} + +/* Test use of constant values to assign values to vectors. */ + +typedef bfloat16_t v2bf __attribute__((vector_size(4))); +v2bf c2 (void) { return (v2bf) 0x12345678; } + +bfloat16x4_t c3 (void) { return (bfloat16x4_t) 0x1234567812345678; } |