diff options
-rw-r--r-- | gcc/ChangeLog | 172 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.h | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/altivec.md | 102 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-builtin.def | 56 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-c.c | 106 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-modes.def | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 232 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 11 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 21 | ||||
-rw-r--r-- | gcc/config/rs6000/vector.md | 11 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 30 | ||||
-rw-r--r-- | gcc/doc/extend.texi | 45 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 13 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/p8vector-int128-1.c | 85 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/p8vector-int128-2.c | 177 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/timode_off.c | 8 |
17 files changed, 1029 insertions, 51 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index de18eda..c199901 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,175 @@ +2014-03-12 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/vector.md (VEC_L): Add V1TI mode to vector types. + (VEC_M): Likewise. + (VEC_N): Likewise. + (VEC_R): Likewise. + (VEC_base): Likewise. + (mov<MODE>, VEC_M modes): If we are loading TImode into VSX + registers, we need to swap double words in little endian mode. + + * config/rs6000/rs6000-modes.def (V1TImode): Add new vector mode + to be a container mode for 128-bit integer operations added in ISA + 2.07. Unlike TImode and PTImode, the preferred register set is + the Altivec/VMX registers for the 128-bit operations. + + * config/rs6000/rs6000-protos.h (rs6000_move_128bit_ok_p): Add + declarations. + (rs6000_split_128bit_ok_p): Likewise. + + * config/rs6000/rs6000-builtin.def (BU_P8V_AV_3): Add new support + macros for creating ISA 2.07 normal and overloaded builtin + functions with 3 arguments. + (BU_P8V_OVERLOAD_3): Likewise. + (VPERM_1T): Add support for V1TImode in 128-bit vector operations + for use as overloaded functions. + (VPERM_1TI_UNS): Likewise. + (VSEL_1TI): Likewise. + (VSEL_1TI_UNS): Likewise. + (ST_INTERNAL_1ti): Likewise. + (LD_INTERNAL_1ti): Likewise. + (XXSEL_1TI): Likewise. + (XXSEL_1TI_UNS): Likewise. + (VPERM_1TI): Likewise. + (VPERM_1TI_UNS): Likewise. + (XXPERMDI_1TI): Likewise. + (SET_1TI): Likewise. + (LXVD2X_V1TI): Likewise. + (STXVD2X_V1TI): Likewise. + (VEC_INIT_V1TI): Likewise. + (VEC_SET_V1TI): Likewise. + (VEC_EXT_V1TI): Likewise. + (EQV_V1TI): Likewise. + (NAND_V1TI): Likewise. + (ORC_V1TI): Likewise. + (VADDCUQ): Add support for 128-bit integer arithmetic instructions + added in ISA 2.07. Add both normal 'altivec' builtins, and the + overloaded builtin. + (VADDUQM): Likewise. + (VSUBCUQ): Likewise. + (VADDEUQM): Likewise. + (VADDECUQ): Likewise. + (VSUBEUQM): Likewise. + (VSUBECUQ): Likewise. + + * config/rs6000/rs6000-c.c (__int128_type): New static to hold + __int128_t and __uint128_t types. + (__uint128_type): Likewise. + (altivec_categorize_keyword): Add support for vector __int128_t, + vector __uint128_t, vector __int128, and vector unsigned __int128 + as a container type for TImode operations that need to be done in + VSX/Altivec registers. + (rs6000_macro_to_expand): Likewise. + (altivec_overloaded_builtins): Add ISA 2.07 overloaded functions + to support 128-bit integer instructions vaddcuq, vadduqm, + vaddecuq, vaddeuqm, vsubcuq, vsubuqm, vsubecuq, vsubeuqm. + (altivec_resolve_overloaded_builtin): Add support for V1TImode. + + * config/rs6000/rs6000.c (rs6000_hard_regno_mode_ok): Add support + for V1TImode, and set up preferences to use VSX/Altivec + registers. Setup VSX reload handlers. + (rs6000_debug_reg_global): Likewise. + (rs6000_init_hard_regno_mode_ok): Likewise. + (rs6000_preferred_simd_mode): Likewise. + (vspltis_constant): Do not allow V1TImode as easy altivec + constants. + (easy_altivec_constant): Likewise. + (output_vec_const_move): Likewise. + (rs6000_expand_vector_set): Convert V1TImode set and extract to + simple move. + (rs6000_expand_vector_extract): Likewise. + (reg_offset_addressing_ok_p): Setup V1TImode to use VSX reg+reg + addressing. + (rs6000_const_vec): Add support for V1TImode. + (rs6000_emit_le_vsx_load): Swap double words when loading or + storing TImode/V1TImode. + (rs6000_emit_le_vsx_store): Likewise. + (rs6000_emit_le_vsx_move): Likewise. + (rs6000_emit_move): Add support for V1TImode. + (altivec_expand_ld_builtin): Likewise. + (altivec_expand_st_builtin): Likewise. + (altivec_expand_vec_init_builtin): Likewise. + (altivec_expand_builtin): Likewise. + (rs6000_init_builtins): Add support for V1TImode type. Add + support for ISA 2.07 128-bit integer builtins. Define type names + for the VSX/Altivec vector types. + (altivec_init_builtins): Add support for overloaded vector + functions with V1TImode type. + (rs6000_preferred_reload_class): Prefer Altivec registers for + V1TImode. + (rs6000_move_128bit_ok_p): Move 128-bit move/split validation to + external function. + (rs6000_split_128bit_ok_p): Likewise. + (rs6000_handle_altivec_attribute): Create V1TImode from vector + __int128_t and vector __uint128_t. + + * config/rs6000/vsx.md (VSX_L): Add V1TImode to vector iterators + and mode attributes. + (VSX_M): Likewise. + (VSX_M2): Likewise. + (VSm): Likewise. + (VSs): Likewise. + (VSr): Likewise. + (VSv): Likewise. + (VS_scalar): Likewise. + (VS_double): Likewise. + (vsx_set_v1ti): New builtin function to create V1TImode from + TImode. + + * config/rs6000/rs6000.h (TARGET_VADDUQM): New macro to say + whether we support the ISA 2.07 128-bit integer arithmetic + instructions. + (ALTIVEC_OR_VSX_VECTOR_MODE): Add V1TImode. + (enum rs6000_builtin_type_index): Add fields to hold V1TImode + and TImode types for use with the builtin functions. + (V1TI_type_node): Likewise. + (unsigned_V1TI_type_node): Likewise. + (intTI_type_internal_node): Likewise. + (uintTI_type_internal_node): Likewise. + + * config/rs6000/altivec.md (UNSPEC_VADDCUQ): New unspecs for ISA + 2.07 128-bit builtin functions. + (UNSPEC_VADDEUQM): Likewise. + (UNSPEC_VADDECUQ): Likewise. + (UNSPEC_VSUBCUQ): Likewise. + (UNSPEC_VSUBEUQM): Likewise. + (UNSPEC_VSUBECUQ): Likewise. + (VM): Add V1TImode to vector mode iterators. + (VM2): Likewise. + (VI_unit): Likewise. + (altivec_vadduqm): Add ISA 2.07 128-bit binary builtins. + (altivec_vaddcuq): Likewise. + (altivec_vsubuqm): Likewise. + (altivec_vsubcuq): Likewise. + (altivec_vaddeuqm): Likewise. + (altivec_vaddecuq): Likewise. + (altivec_vsubeuqm): Likewise. + (altivec_vsubecuq): Likewise. + + * config/rs6000/rs6000.md (FMOVE128_GPR): Add V1TImode to vector + mode iterators. + (BOOL_128): Likewise. + (BOOL_REGS_OUTPUT): Likewise. + (BOOL_REGS_OP1): Likewise. + (BOOL_REGS_OP2): Likewise. + (BOOL_REGS_UNARY): Likewise. + (BOOL_REGS_AND_CR0): Likewise. + + * config/rs6000/altivec.h (vec_vaddcuq): Add support for ISA 2.07 + 128-bit integer builtin support. + (vec_vadduqm): Likewise. + (vec_vaddecuq): Likewise. + (vec_vaddeuqm): Likewise. + (vec_vsubecuq): Likewise. + (vec_vsubeuqm): Likewise. + (vec_vsubcuq): Likewise. + (vec_vsubuqm): Likewise. + + * doc/extend.texi (PowerPC AltiVec/VSX Built-in Functions): + Document vec_vaddcuq, vec_vadduqm, vec_vaddecuq, vec_vaddeuqm, + vec_subecuq, vec_subeuqm, vec_vsubcuq, vec_vsubeqm builtins adding + 128-bit integer add/subtract to ISA 2.07. + 2014-03-12 Joern Rennecke <joern.rennecke@embecosm.com> * config/arc/arc.c (arc_predicate_delay_insns): diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h index 3bbd300..49c250c 100644 --- a/gcc/config/rs6000/altivec.h +++ b/gcc/config/rs6000/altivec.h @@ -326,12 +326,18 @@ #define vec_eqv __builtin_vec_eqv #define vec_nand __builtin_vec_nand #define vec_orc __builtin_vec_orc +#define vec_vaddcuq __builtin_vec_vaddcuq #define vec_vaddudm __builtin_vec_vaddudm +#define vec_vadduqm __builtin_vec_vadduqm #define vec_vclz __builtin_vec_vclz #define vec_vclzb __builtin_vec_vclzb #define vec_vclzd __builtin_vec_vclzd #define vec_vclzh __builtin_vec_vclzh #define vec_vclzw __builtin_vec_vclzw +#define vec_vaddecuq __builtin_vec_vaddecuq +#define vec_vaddeuqm __builtin_vec_vaddeuqm +#define vec_vsubecuq __builtin_vec_vsubecuq +#define vec_vsubeuqm __builtin_vec_vsubeuqm #define vec_vgbbd __builtin_vec_vgbbd #define vec_vmaxsd __builtin_vec_vmaxsd #define vec_vmaxud __builtin_vec_vmaxud @@ -352,7 +358,9 @@ #define vec_vsld __builtin_vec_vsld #define vec_vsrad __builtin_vec_vsrad #define vec_vsrd __builtin_vec_vsrd +#define vec_vsubcuq __builtin_vec_vsubcuq #define vec_vsubudm __builtin_vec_vsubudm +#define vec_vsubuqm __builtin_vec_vsubuqm #define vec_vupkhsw __builtin_vec_vupkhsw #define vec_vupklsw __builtin_vec_vupklsw #endif diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 0835779..faa88d0 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -136,6 +136,12 @@ UNSPEC_VMRGL_DIRECT UNSPEC_VSPLT_DIRECT UNSPEC_VSUMSWS_DIRECT + UNSPEC_VADDCUQ + UNSPEC_VADDEUQM + UNSPEC_VADDECUQ + UNSPEC_VSUBCUQ + UNSPEC_VSUBEUQM + UNSPEC_VSUBECUQ ]) (define_c_enum "unspecv" @@ -158,17 +164,18 @@ (define_mode_iterator V [V4SI V8HI V16QI V4SF]) ;; Vec modes for move/logical/permute ops, include vector types for move not ;; otherwise handled by altivec (v2df, v2di, ti) -(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI TI]) +(define_mode_iterator VM [V4SI V8HI V16QI V4SF V2DF V2DI V1TI TI]) ;; Like VM, except don't do TImode -(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI]) +(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI V1TI]) (define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")]) (define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")]) (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)") (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)") (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)") - (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")]) + (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)") + (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")]) ;; Vector pack/unpack (define_mode_iterator VP [V2DI V4SI V8HI]) @@ -3226,3 +3233,92 @@ "vgbbd %0,%1" [(set_attr "length" "4") (set_attr "type" "vecsimple")]) + + +;; 128-bit binary integer arithmetic +;; We have a special container type (V1TImode) to allow operations using the +;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without +;; having to worry about the register allocator deciding GPRs are better. + +(define_insn "altivec_vadduqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (plus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vadduqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VADDCUQ))] + "TARGET_VADDUQM" + "vaddcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (minus:V1TI (match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")))] + "TARGET_VADDUQM" + "vsubuqm %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubcuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v")] + UNSPEC_VSUBCUQ))] + "TARGET_VADDUQM" + "vsubcuq %0,%1,%2" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDEUQM))] + "TARGET_VADDUQM" + "vaddeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vaddecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VADDECUQ))] + "TARGET_VADDUQM" + "vaddecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubeuqm" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBEUQM))] + "TARGET_VADDUQM" + "vsubeuqm %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + +(define_insn "altivec_vsubecuq" + [(set (match_operand:V1TI 0 "register_operand" "=v") + (unspec:V1TI [(match_operand:V1TI 1 "register_operand" "v") + (match_operand:V1TI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VSUBECUQ))] + "TARGET_VADDUQM" + "vsubecuq %0,%1,%2,%3" + [(set_attr "length" "4") + (set_attr "type" "vecsimple")]) + diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index 46df66b..9226035 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -325,6 +325,14 @@ | RS6000_BTC_BINARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +#define BU_P8V_AV_3(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_altivec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_altivec_" NAME, /* NAME */ \ @@ -359,6 +367,14 @@ | RS6000_BTC_BINARY), \ CODE_FOR_nothing) /* ICODE */ +#define BU_P8V_OVERLOAD_3(ENUM, NAME) \ + RS6000_BUILTIN_3 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \ + "__builtin_vec_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + /* Crypto convenience macros. */ #define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ @@ -571,12 +587,14 @@ BU_ALTIVEC_3 (VMSUMSHM, "vmsumshm", CONST, altivec_vmsumshm) BU_ALTIVEC_3 (VMSUMUHS, "vmsumuhs", SAT, altivec_vmsumuhs) BU_ALTIVEC_3 (VMSUMSHS, "vmsumshs", SAT, altivec_vmsumshs) BU_ALTIVEC_3 (VNMSUBFP, "vnmsubfp", FP, nfmsv4sf4) +BU_ALTIVEC_3 (VPERM_1TI, "vperm_1ti", CONST, altivec_vperm_v1ti) BU_ALTIVEC_3 (VPERM_2DF, "vperm_2df", CONST, altivec_vperm_v2df) BU_ALTIVEC_3 (VPERM_2DI, "vperm_2di", CONST, altivec_vperm_v2di) BU_ALTIVEC_3 (VPERM_4SF, "vperm_4sf", CONST, altivec_vperm_v4sf) BU_ALTIVEC_3 (VPERM_4SI, "vperm_4si", CONST, altivec_vperm_v4si) BU_ALTIVEC_3 (VPERM_8HI, "vperm_8hi", CONST, altivec_vperm_v8hi) BU_ALTIVEC_3 (VPERM_16QI, "vperm_16qi", CONST, altivec_vperm_v16qi_uns) +BU_ALTIVEC_3 (VPERM_1TI_UNS, "vperm_1ti_uns", CONST, altivec_vperm_v1ti_uns) BU_ALTIVEC_3 (VPERM_2DI_UNS, "vperm_2di_uns", CONST, altivec_vperm_v2di_uns) BU_ALTIVEC_3 (VPERM_4SI_UNS, "vperm_4si_uns", CONST, altivec_vperm_v4si_uns) BU_ALTIVEC_3 (VPERM_8HI_UNS, "vperm_8hi_uns", CONST, altivec_vperm_v8hi_uns) @@ -587,10 +605,12 @@ BU_ALTIVEC_3 (VSEL_8HI, "vsel_8hi", CONST, vector_select_v8hi) BU_ALTIVEC_3 (VSEL_16QI, "vsel_16qi", CONST, vector_select_v16qi) BU_ALTIVEC_3 (VSEL_2DF, "vsel_2df", CONST, vector_select_v2df) BU_ALTIVEC_3 (VSEL_2DI, "vsel_2di", CONST, vector_select_v2di) +BU_ALTIVEC_3 (VSEL_1TI, "vsel_1ti", CONST, vector_select_v1ti) BU_ALTIVEC_3 (VSEL_4SI_UNS, "vsel_4si_uns", CONST, vector_select_v4si_uns) BU_ALTIVEC_3 (VSEL_8HI_UNS, "vsel_8hi_uns", CONST, vector_select_v8hi_uns) BU_ALTIVEC_3 (VSEL_16QI_UNS, "vsel_16qi_uns", CONST, vector_select_v16qi_uns) BU_ALTIVEC_3 (VSEL_2DI_UNS, "vsel_2di_uns", CONST, vector_select_v2di_uns) +BU_ALTIVEC_3 (VSEL_1TI_UNS, "vsel_1ti_uns", CONST, vector_select_v1ti_uns) BU_ALTIVEC_3 (VSLDOI_16QI, "vsldoi_16qi", CONST, altivec_vsldoi_v16qi) BU_ALTIVEC_3 (VSLDOI_8HI, "vsldoi_8hi", CONST, altivec_vsldoi_v8hi) BU_ALTIVEC_3 (VSLDOI_4SI, "vsldoi_4si", CONST, altivec_vsldoi_v4si) @@ -783,6 +803,8 @@ BU_ALTIVEC_X (ST_INTERNAL_2df, "st_internal_4sf", MEM) BU_ALTIVEC_X (LD_INTERNAL_2df, "ld_internal_2df", MEM) BU_ALTIVEC_X (ST_INTERNAL_2di, "st_internal_2di", MEM) BU_ALTIVEC_X (LD_INTERNAL_2di, "ld_internal_2di", MEM) +BU_ALTIVEC_X (ST_INTERNAL_1ti, "st_internal_1ti", MEM) +BU_ALTIVEC_X (LD_INTERNAL_1ti, "ld_internal_1ti", MEM) BU_ALTIVEC_X (MTVSCR, "mtvscr", MISC) BU_ALTIVEC_X (MFVSCR, "mfvscr", MISC) BU_ALTIVEC_X (DSSALL, "dssall", MISC) @@ -1085,34 +1107,40 @@ BU_VSX_3 (XVMSUBDP, "xvmsubdp", CONST, fmsv2df4) BU_VSX_3 (XVNMADDDP, "xvnmadddp", CONST, nfmav2df4) BU_VSX_3 (XVNMSUBDP, "xvnmsubdp", CONST, nfmsv2df4) +BU_VSX_3 (XXSEL_1TI, "xxsel_1ti", CONST, vector_select_v1ti) BU_VSX_3 (XXSEL_2DI, "xxsel_2di", CONST, vector_select_v2di) BU_VSX_3 (XXSEL_2DF, "xxsel_2df", CONST, vector_select_v2df) BU_VSX_3 (XXSEL_4SF, "xxsel_4sf", CONST, vector_select_v4sf) BU_VSX_3 (XXSEL_4SI, "xxsel_4si", CONST, vector_select_v4si) BU_VSX_3 (XXSEL_8HI, "xxsel_8hi", CONST, vector_select_v8hi) BU_VSX_3 (XXSEL_16QI, "xxsel_16qi", CONST, vector_select_v16qi) +BU_VSX_3 (XXSEL_1TI_UNS, "xxsel_1ti_uns", CONST, vector_select_v1ti_uns) BU_VSX_3 (XXSEL_2DI_UNS, "xxsel_2di_uns", CONST, vector_select_v2di_uns) BU_VSX_3 (XXSEL_4SI_UNS, "xxsel_4si_uns", CONST, vector_select_v4si_uns) BU_VSX_3 (XXSEL_8HI_UNS, "xxsel_8hi_uns", CONST, vector_select_v8hi_uns) BU_VSX_3 (XXSEL_16QI_UNS, "xxsel_16qi_uns", CONST, vector_select_v16qi_uns) +BU_VSX_3 (VPERM_1TI, "vperm_1ti", CONST, altivec_vperm_v1ti) BU_VSX_3 (VPERM_2DI, "vperm_2di", CONST, altivec_vperm_v2di) BU_VSX_3 (VPERM_2DF, "vperm_2df", CONST, altivec_vperm_v2df) BU_VSX_3 (VPERM_4SF, "vperm_4sf", CONST, altivec_vperm_v4sf) BU_VSX_3 (VPERM_4SI, "vperm_4si", CONST, altivec_vperm_v4si) BU_VSX_3 (VPERM_8HI, "vperm_8hi", CONST, altivec_vperm_v8hi) BU_VSX_3 (VPERM_16QI, "vperm_16qi", CONST, altivec_vperm_v16qi) +BU_VSX_3 (VPERM_1TI_UNS, "vperm_1ti_uns", CONST, altivec_vperm_v1ti_uns) BU_VSX_3 (VPERM_2DI_UNS, "vperm_2di_uns", CONST, altivec_vperm_v2di_uns) BU_VSX_3 (VPERM_4SI_UNS, "vperm_4si_uns", CONST, altivec_vperm_v4si_uns) BU_VSX_3 (VPERM_8HI_UNS, "vperm_8hi_uns", CONST, altivec_vperm_v8hi_uns) BU_VSX_3 (VPERM_16QI_UNS, "vperm_16qi_uns", CONST, altivec_vperm_v16qi_uns) +BU_VSX_3 (XXPERMDI_1TI, "xxpermdi_1ti", CONST, vsx_xxpermdi_v1ti) BU_VSX_3 (XXPERMDI_2DF, "xxpermdi_2df", CONST, vsx_xxpermdi_v2df) BU_VSX_3 (XXPERMDI_2DI, "xxpermdi_2di", CONST, vsx_xxpermdi_v2di) BU_VSX_3 (XXPERMDI_4SF, "xxpermdi_4sf", CONST, vsx_xxpermdi_v4sf) BU_VSX_3 (XXPERMDI_4SI, "xxpermdi_4si", CONST, vsx_xxpermdi_v4si) BU_VSX_3 (XXPERMDI_8HI, "xxpermdi_8hi", CONST, vsx_xxpermdi_v8hi) BU_VSX_3 (XXPERMDI_16QI, "xxpermdi_16qi", CONST, vsx_xxpermdi_v16qi) +BU_VSX_3 (SET_1TI, "set_1ti", CONST, vsx_set_v1ti) BU_VSX_3 (SET_2DF, "set_2df", CONST, vsx_set_v2df) BU_VSX_3 (SET_2DI, "set_2di", CONST, vsx_set_v2di) BU_VSX_3 (XXSLDWI_2DI, "xxsldwi_2di", CONST, vsx_xxsldwi_v2di) @@ -1247,6 +1275,7 @@ BU_VSX_P (XVCMPGTDP_P, "xvcmpgtdp_p", CONST, vector_gt_v2df_p) /* VSX builtins that are handled as special cases. */ BU_VSX_X (LXSDX, "lxsdx", MEM) +BU_VSX_X (LXVD2X_V1TI, "lxvd2x_v1ti", MEM) BU_VSX_X (LXVD2X_V2DF, "lxvd2x_v2df", MEM) BU_VSX_X (LXVD2X_V2DI, "lxvd2x_v2di", MEM) BU_VSX_X (LXVDSX, "lxvdsx", MEM) @@ -1255,6 +1284,7 @@ BU_VSX_X (LXVW4X_V4SI, "lxvw4x_v4si", MEM) BU_VSX_X (LXVW4X_V8HI, "lxvw4x_v8hi", MEM) BU_VSX_X (LXVW4X_V16QI, "lxvw4x_v16qi", MEM) BU_VSX_X (STXSDX, "stxsdx", MEM) +BU_VSX_X (STXVD2X_V1TI, "stxsdx_v1ti", MEM) BU_VSX_X (STXVD2X_V2DF, "stxsdx_v2df", MEM) BU_VSX_X (STXVD2X_V2DI, "stxsdx_v2di", MEM) BU_VSX_X (STXVW4X_V4SF, "stxsdx_v4sf", MEM) @@ -1285,10 +1315,13 @@ BU_VSX_X (XSNMADDMDP, "xsnmaddmdp", FP) BU_VSX_X (XSNMSUBADP, "xsnmsubadp", FP) BU_VSX_X (XSNMSUBMDP, "xsnmsubmdp", FP) BU_VSX_X (XSSUBDP, "xssubdp", FP) +BU_VSX_X (VEC_INIT_V1TI, "vec_init_v1ti", CONST) BU_VSX_X (VEC_INIT_V2DF, "vec_init_v2df", CONST) BU_VSX_X (VEC_INIT_V2DI, "vec_init_v2di", CONST) +BU_VSX_X (VEC_SET_V1TI, "vec_set_v1ti", CONST) BU_VSX_X (VEC_SET_V2DF, "vec_set_v2df", CONST) BU_VSX_X (VEC_SET_V2DI, "vec_set_v2di", CONST) +BU_VSX_X (VEC_EXT_V1TI, "vec_ext_v1ti", CONST) BU_VSX_X (VEC_EXT_V2DF, "vec_ext_v2df", CONST) BU_VSX_X (VEC_EXT_V2DI, "vec_ext_v2di", CONST) @@ -1332,7 +1365,9 @@ BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2) BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd) /* 2 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_2 (VADDCUQ, "vaddcuq", CONST, altivec_vaddcuq) BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3) +BU_P8V_AV_2 (VADDUQM, "vadduqm", CONST, altivec_vadduqm) BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3) BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3) BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3) @@ -1347,12 +1382,15 @@ BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3) BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3) BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3) BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3) +BU_P8V_AV_2 (VSUBCUQ, "vsubcuq", CONST, altivec_vsubcuq) BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3) +BU_P8V_AV_2 (VSUBUQM, "vsubuqm", CONST, altivec_vsubuqm) BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3) BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3) BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3) BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3) +BU_P8V_AV_2 (EQV_V1TI, "eqv_v1ti", CONST, eqvv1ti3) BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3) BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3) @@ -1360,6 +1398,7 @@ BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3) BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3) BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3) BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3) +BU_P8V_AV_2 (NAND_V1TI, "nand_v1ti", CONST, nandv1ti3) BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3) BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3) @@ -1367,9 +1406,16 @@ BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3) BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3) BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3) BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3) +BU_P8V_AV_2 (ORC_V1TI, "orc_v1ti", CONST, orcv1ti3) BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3) BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3) +/* 3 argument altivec instructions added in ISA 2.07. */ +BU_P8V_AV_3 (VADDEUQM, "vaddeuqm", CONST, altivec_vaddeuqm) +BU_P8V_AV_3 (VADDECUQ, "vaddecuq", CONST, altivec_vaddecuq) +BU_P8V_AV_3 (VSUBEUQM, "vsubeuqm", CONST, altivec_vsubeuqm) +BU_P8V_AV_3 (VSUBECUQ, "vsubecuq", CONST, altivec_vsubecuq) + /* Vector comparison instructions added in ISA 2.07. */ BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di) BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di) @@ -1399,7 +1445,9 @@ BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd") BU_P8V_OVERLOAD_2 (EQV, "eqv") BU_P8V_OVERLOAD_2 (NAND, "nand") BU_P8V_OVERLOAD_2 (ORC, "orc") +BU_P8V_OVERLOAD_2 (VADDCUQ, "vaddcuq") BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm") +BU_P8V_OVERLOAD_2 (VADDUQM, "vadduqm") BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd") BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud") BU_P8V_OVERLOAD_2 (VMINSD, "vminsd") @@ -1414,7 +1462,15 @@ BU_P8V_OVERLOAD_2 (VRLD, "vrld") BU_P8V_OVERLOAD_2 (VSLD, "vsld") BU_P8V_OVERLOAD_2 (VSRAD, "vsrad") BU_P8V_OVERLOAD_2 (VSRD, "vsrd") +BU_P8V_OVERLOAD_2 (VSUBCUQ, "vsubcuq") BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm") +BU_P8V_OVERLOAD_2 (VSUBUQM, "vsubuqm") + +/* ISA 2.07 vector overloaded 3 argument functions. */ +BU_P8V_OVERLOAD_3 (VADDECUQ, "vaddecuq") +BU_P8V_OVERLOAD_3 (VADDEUQM, "vaddeuqm") +BU_P8V_OVERLOAD_3 (VSUBECUQ, "vsubecuq") +BU_P8V_OVERLOAD_3 (VSUBEUQM, "vsubeuqm") /* 1 argument crypto functions. */ diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c index 73edd2b..0f1dafc 100644 --- a/gcc/config/rs6000/rs6000-c.c +++ b/gcc/config/rs6000/rs6000-c.c @@ -92,6 +92,8 @@ static GTY(()) tree pixel_keyword; static GTY(()) tree __bool_keyword; static GTY(()) tree bool_keyword; static GTY(()) tree _Bool_keyword; +static GTY(()) tree __int128_type; +static GTY(()) tree __uint128_type; /* Preserved across calls. */ static tree expand_bool_pixel; @@ -124,9 +126,10 @@ altivec_categorize_keyword (const cpp_token *tok) static void init_vector_keywords (void) { - /* Keywords without two leading underscores are context-sensitive, - and hence implemented as conditional macros, controlled by the - rs6000_macro_to_expand() function below. */ + /* Keywords without two leading underscores are context-sensitive, and hence + implemented as conditional macros, controlled by the + rs6000_macro_to_expand() function below. If we have ISA 2.07 64-bit + support, record the __int128_t and __uint128_t types. */ __vector_keyword = get_identifier ("__vector"); C_CPP_HASHNODE (__vector_keyword)->flags |= NODE_CONDITIONAL; @@ -148,6 +151,12 @@ init_vector_keywords (void) _Bool_keyword = get_identifier ("_Bool"); C_CPP_HASHNODE (_Bool_keyword)->flags |= NODE_CONDITIONAL; + + if (TARGET_VADDUQM) + { + __int128_type = get_identifier ("__int128_t"); + __uint128_type = get_identifier ("__uint128_t"); + } } /* Called to decide whether a conditional macro should be expanded. @@ -223,7 +232,8 @@ rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok) || rid_code == RID_SHORT || rid_code == RID_SIGNED || rid_code == RID_INT || rid_code == RID_CHAR || rid_code == RID_FLOAT - || (rid_code == RID_DOUBLE && TARGET_VSX)) + || (rid_code == RID_DOUBLE && TARGET_VSX) + || (rid_code == RID_INT128 && TARGET_VADDUQM)) { expand_this = C_CPP_HASHNODE (__vector_keyword); /* If the next keyword is bool or pixel, it @@ -250,6 +260,13 @@ rs6000_macro_to_expand (cpp_reader *pfile, const cpp_token *tok) expand_bool_pixel = __bool_keyword; } } + + /* Support vector __int128_t, but we don't need to worry about bool + or pixel on this type. */ + else if (TARGET_VADDUQM + && (ident == C_CPP_HASHNODE (__int128_type) + || ident == C_CPP_HASHNODE (__uint128_type))) + expand_this = C_CPP_HASHNODE (__vector_keyword); } } else if (expand_bool_pixel && ident == C_CPP_HASHNODE (__pixel_keyword)) @@ -706,6 +723,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VADDFP, ALTIVEC_BUILTIN_VADDFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VADDUWM, ALTIVEC_BUILTIN_VADDUWM, @@ -2327,6 +2349,11 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, { ALTIVEC_BUILTIN_VEC_VSUBFP, ALTIVEC_BUILTIN_VSUBFP, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 }, { ALTIVEC_BUILTIN_VEC_VSUBUWM, ALTIVEC_BUILTIN_VSUBUWM, @@ -3726,6 +3753,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 }, + { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VADDCUQ, P8V_BUILTIN_VADDCUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, @@ -3739,6 +3772,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VADDUQM, P8V_BUILTIN_VADDUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 }, { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB, @@ -3781,6 +3820,30 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD, RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 }, + { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VADDECUQ, P8V_BUILTIN_VADDECUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + + { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VADDEUQM, P8V_BUILTIN_VADDEUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + + { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VSUBECUQ, P8V_BUILTIN_VSUBECUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + + { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI }, + { P8V_BUILTIN_VEC_VSUBEUQM, P8V_BUILTIN_VSUBEUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI }, + { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD, @@ -3900,6 +3963,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VSUBCUQ, P8V_BUILTIN_VSUBCUQ, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 }, { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, @@ -3913,6 +3982,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = { { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 }, + { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_V1TI, RS6000_BTI_V1TI, RS6000_BTI_V1TI, 0 }, + { P8V_BUILTIN_VEC_VSUBUQM, P8V_BUILTIN_VSUBUQM, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V1TI, + RS6000_BTI_unsigned_V1TI, 0 }, + { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 }, { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW, @@ -4145,6 +4220,10 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, unsigned_p = TYPE_UNSIGNED (type); switch (TYPE_MODE (type)) { + case TImode: + type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); + size = 1; + break; case DImode: type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); size = 2; @@ -4232,6 +4311,14 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, if (call) return build_call_expr (call, 2, arg1, arg2); } + else if (mode == V1TImode && VECTOR_MEM_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST + && TREE_INT_CST_HIGH (arg2) == 0 + && TREE_INT_CST_LOW (arg2) == 0) + { + tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI]; + return build_call_expr (call, 2, arg1, arg2); + } /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */ arg1_inner_type = TREE_TYPE (arg1_type); @@ -4331,6 +4418,17 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, if (call) return build_call_expr (call, 3, arg1, arg0, arg2); } + else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode) + && TREE_CODE (arg2) == INTEGER_CST + && TREE_INT_CST_HIGH (arg2) == 0 + && TREE_INT_CST_LOW (arg2) == 0) + { + tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI]; + + /* Note, __builtin_vec_insert_<xxx> has vector and scalar types + reversed. */ + return build_call_expr (call, 3, arg1, arg0, arg2); + } /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */ arg1_inner_type = TREE_TYPE (arg1_type); diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index 3de7184..30a4dd3 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -38,6 +38,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */ VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */ VECTOR_MODES (INT, 32); /* V32QI V16HI V8SI V4DI */ VECTOR_MODE (INT, DI, 1); +VECTOR_MODE (INT, TI, 1); VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 3f971f0..69bb263 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -51,6 +51,8 @@ extern rtx find_addr_reg (rtx); extern rtx gen_easy_altivec_constant (rtx); extern const char *output_vec_const_move (rtx *); extern const char *rs6000_output_move_128bit (rtx *); +extern bool rs6000_move_128bit_ok_p (rtx []); +extern bool rs6000_split_128bit_ok_p (rtx []); extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d9e4fc5..036a2af 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1725,7 +1725,8 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) && (VECTOR_MEM_VSX_P (mode) || (TARGET_VSX_SCALAR_FLOAT && mode == SFmode) || (TARGET_VSX_SCALAR_DOUBLE && (mode == DFmode || mode == DImode)) - || (TARGET_VSX_TIMODE && mode == TImode))) + || (TARGET_VSX_TIMODE && mode == TImode) + || (TARGET_VADDUQM && mode == V1TImode))) { if (FP_REGNO_P (regno)) return FP_REGNO_P (last_regno); @@ -1776,7 +1777,8 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode) /* AltiVec only in AldyVec registers. */ if (ALTIVEC_REGNO_P (regno)) - return VECTOR_MEM_ALTIVEC_OR_VSX_P (mode); + return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) + || mode == V1TImode); /* ...but GPRs can hold SIMD data on the SPE in one register. */ if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) @@ -1971,10 +1973,12 @@ rs6000_debug_reg_global (void) V8HImode, V4SImode, V2DImode, + V1TImode, V32QImode, V16HImode, V8SImode, V4DImode, + V2TImode, V2SFmode, V4SFmode, V2DFmode, @@ -2553,6 +2557,11 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) rs6000_vector_unit[V2DImode] = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; rs6000_vector_align[V2DImode] = align64; + + rs6000_vector_mem[V1TImode] = VECTOR_VSX; + rs6000_vector_unit[V1TImode] + = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE; + rs6000_vector_align[V1TImode] = 128; } /* DFmode, see if we want to use the VSX unit. */ @@ -2676,6 +2685,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load; reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store; reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load; + reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store; + reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load; reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store; reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load; reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store; @@ -2704,6 +2715,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) if (TARGET_POWERPC64) { reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti; + reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti; reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df; reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di; reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf; @@ -2713,6 +2725,7 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf; reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti; + reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti; reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df; reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di; reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf; @@ -2739,6 +2752,8 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p) reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load; reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store; reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load; + reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store; + reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load; reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store; reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load; reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store; @@ -4250,6 +4265,8 @@ rs6000_preferred_simd_mode (enum machine_mode mode) { case SFmode: return V4SFmode; + case TImode: + return V1TImode; case DImode: return V2DImode; case SImode: @@ -5011,7 +5028,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies) HOST_WIDE_INT splat_val; HOST_WIDE_INT msb_val; - if (mode == V2DImode || mode == V2DFmode) + if (mode == V2DImode || mode == V2DFmode || mode == V1TImode) return false; nunits = GET_MODE_NUNITS (mode); @@ -5090,7 +5107,7 @@ easy_altivec_constant (rtx op, enum machine_mode mode) if (mode == V2DFmode) return zero_constant (op, mode); - if (mode == V2DImode) + else if (mode == V2DImode) { /* In case the compiler is built 32-bit, CONST_DOUBLE constants are not easy. */ @@ -5108,6 +5125,10 @@ easy_altivec_constant (rtx op, enum machine_mode mode) return false; } + /* V1TImode is a special container for TImode. Ignore for now. */ + else if (mode == V1TImode) + return false; + /* Start with a vspltisw. */ step = GET_MODE_NUNITS (mode) / 4; copies = 1; @@ -5189,7 +5210,7 @@ output_vec_const_move (rtx *operands) if (zero_constant (vec, mode)) return "xxlxor %x0,%x0,%x0"; - if (mode == V2DImode + if ((mode == V2DImode || mode == V1TImode) && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1) return "vspltisw %0,-1"; @@ -5575,6 +5596,13 @@ rs6000_expand_vector_set (rtx target, rtx val, int elt) return; } + /* Simplify setting single element vectors like V1TImode. */ + if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0) + { + emit_move_insn (target, gen_lowpart (mode, val)); + return; + } + /* Load single variable value. */ mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode)); emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val); @@ -5635,6 +5663,10 @@ rs6000_expand_vector_extract (rtx target, rtx vec, int elt) { default: break; + case V1TImode: + gcc_assert (elt == 0 && inner_mode == TImode); + emit_move_insn (target, gen_lowpart (TImode, vec)); + break; case V2DFmode: emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt))); return; @@ -6076,6 +6108,7 @@ reg_offset_addressing_ok_p (enum machine_mode mode) case V4SImode: case V2DFmode: case V2DImode: + case V1TImode: case TImode: /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While TImode is not a vector mode, if we want to use the VSX registers to @@ -7960,6 +7993,9 @@ rs6000_const_vec (enum machine_mode mode) switch (mode) { + case V1TImode: + subparts = 1; + break; case V2DFmode: case V2DImode: subparts = 2; @@ -8003,9 +8039,20 @@ rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode) void rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode) { - rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; - rtx permute_mem = rs6000_gen_le_vsx_permute (source, mode); - rtx permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); + rtx tmp, permute_mem, permute_reg; + + /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, + V1TImode). */ + if (mode == TImode || mode == V1TImode) + { + mode = V2DImode; + dest = gen_lowpart (V2DImode, dest); + source = adjust_address (source, V2DImode, 0); + } + + tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest; + permute_mem = rs6000_gen_le_vsx_permute (source, mode); + permute_reg = rs6000_gen_le_vsx_permute (tmp, mode); emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem)); emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg)); } @@ -8016,9 +8063,20 @@ rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode) void rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode) { - rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; - rtx permute_src = rs6000_gen_le_vsx_permute (source, mode); - rtx permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); + rtx tmp, permute_src, permute_tmp; + + /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, + V1TImode). */ + if (mode == TImode || mode == V1TImode) + { + mode = V2DImode; + dest = adjust_address (dest, V2DImode, 0); + source = gen_lowpart (V2DImode, source); + } + + tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source; + permute_src = rs6000_gen_le_vsx_permute (source, mode); + permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode); emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src)); emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp)); } @@ -8034,7 +8092,6 @@ rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode) { gcc_assert (!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (mode) - && mode != TImode && !gpr_or_gpr_p (dest, source) && (MEM_P (source) ^ MEM_P (dest))); @@ -8318,6 +8375,7 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) case V1DImode: case V2DFmode: case V2DImode: + case V1TImode: if (CONSTANT_P (operands[1]) && !easy_vector_constant (operands[1], mode)) operands[1] = force_const_mem (mode, operands[1]); @@ -12403,6 +12461,8 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) break; case ALTIVEC_BUILTIN_LD_INTERNAL_2di: icode = CODE_FOR_vector_altivec_load_v2di; + case ALTIVEC_BUILTIN_LD_INTERNAL_1ti: + icode = CODE_FOR_vector_altivec_load_v1ti; break; default: *expandedp = false; @@ -12462,6 +12522,8 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, break; case ALTIVEC_BUILTIN_ST_INTERNAL_2di: icode = CODE_FOR_vector_altivec_store_v2di; + case ALTIVEC_BUILTIN_ST_INTERNAL_1ti: + icode = CODE_FOR_vector_altivec_store_v1ti; break; default: *expandedp = false; @@ -12554,21 +12616,33 @@ altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) enum machine_mode tmode = TYPE_MODE (type); enum machine_mode inner_mode = GET_MODE_INNER (tmode); int i, n_elt = GET_MODE_NUNITS (tmode); - rtvec v = rtvec_alloc (n_elt); gcc_assert (VECTOR_MODE_P (tmode)); gcc_assert (n_elt == call_expr_nargs (exp)); - for (i = 0; i < n_elt; ++i) + if (!target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + /* If we have a vector compromised of a single element, such as V1TImode, do + the initialization directly. */ + if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode)) { - rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); - RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); + rtx x = expand_normal (CALL_EXPR_ARG (exp, 0)); + emit_move_insn (target, gen_lowpart (tmode, x)); } + else + { + rtvec v = rtvec_alloc (n_elt); - if (!target || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); + for (i = 0; i < n_elt; ++i) + { + rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); + RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); + } + + rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); + } - rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); return target; } @@ -12733,6 +12807,8 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_STVRXL: return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp); + case VSX_BUILTIN_STXVD2X_V1TI: + return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp); case VSX_BUILTIN_STXVD2X_V2DF: return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp); case VSX_BUILTIN_STXVD2X_V2DI: @@ -12813,6 +12889,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_VEC_INIT_V4SF: case VSX_BUILTIN_VEC_INIT_V2DF: case VSX_BUILTIN_VEC_INIT_V2DI: + case VSX_BUILTIN_VEC_INIT_V1TI: return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); case ALTIVEC_BUILTIN_VEC_SET_V4SI: @@ -12821,6 +12898,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_VEC_SET_V4SF: case VSX_BUILTIN_VEC_SET_V2DF: case VSX_BUILTIN_VEC_SET_V2DI: + case VSX_BUILTIN_VEC_SET_V1TI: return altivec_expand_vec_set_builtin (exp); case ALTIVEC_BUILTIN_VEC_EXT_V4SI: @@ -12829,6 +12907,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_VEC_EXT_V4SF: case VSX_BUILTIN_VEC_EXT_V2DF: case VSX_BUILTIN_VEC_EXT_V2DI: + case VSX_BUILTIN_VEC_EXT_V1TI: return altivec_expand_vec_ext_builtin (exp, target); default: @@ -12916,6 +12995,9 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_LVRXL: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl, exp, target, true); + case VSX_BUILTIN_LXVD2X_V1TI: + return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti, + exp, target, false); case VSX_BUILTIN_LXVD2X_V2DF: return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df, exp, target, false); @@ -13640,6 +13722,14 @@ rs6000_init_builtins (void) opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node); opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4); + /* We use V1TI mode as a special container to hold __int128_t items that + must live in VSX registers. */ + if (intTI_type_node) + { + V1TI_type_node = build_vector_type (intTI_type_node, 1); + unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1); + } + /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...' types, especially in C++ land. Similarly, 'vector pixel' is distinct from 'vector unsigned short'. */ @@ -13662,6 +13752,8 @@ rs6000_init_builtins (void) uintSI_type_internal_node = unsigned_intSI_type_node; intDI_type_internal_node = intDI_type_node; uintDI_type_internal_node = unsigned_intDI_type_node; + intTI_type_internal_node = intTI_type_node; + uintTI_type_internal_node = unsigned_intTI_type_node; float_type_internal_node = float_type_node; double_type_internal_node = double_type_node; void_type_internal_node = void_type_node; @@ -13674,8 +13766,12 @@ rs6000_init_builtins (void) builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node; builtin_mode_to_type[DImode][0] = intDI_type_node; builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node; + builtin_mode_to_type[TImode][0] = intTI_type_node; + builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node; builtin_mode_to_type[SFmode][0] = float_type_node; builtin_mode_to_type[DFmode][0] = double_type_node; + builtin_mode_to_type[V1TImode][0] = V1TI_type_node; + builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node; builtin_mode_to_type[V2SImode][0] = V2SI_type_node; builtin_mode_to_type[V2SFmode][0] = V2SF_type_node; builtin_mode_to_type[V2DImode][0] = V2DI_type_node; @@ -13744,14 +13840,41 @@ rs6000_init_builtins (void) tdecl = add_builtin_type ("__vector double", V2DF_type_node); TYPE_NAME (V2DF_type_node) = tdecl; - tdecl = add_builtin_type ("__vector long", V2DI_type_node); - TYPE_NAME (V2DI_type_node) = tdecl; + if (TARGET_POWERPC64) + { + tdecl = add_builtin_type ("__vector long", V2DI_type_node); + TYPE_NAME (V2DI_type_node) = tdecl; + + tdecl = add_builtin_type ("__vector unsigned long", + unsigned_V2DI_type_node); + TYPE_NAME (unsigned_V2DI_type_node) = tdecl; - tdecl = add_builtin_type ("__vector unsigned long", unsigned_V2DI_type_node); - TYPE_NAME (unsigned_V2DI_type_node) = tdecl; + tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node); + TYPE_NAME (bool_V2DI_type_node) = tdecl; + } + else + { + tdecl = add_builtin_type ("__vector long long", V2DI_type_node); + TYPE_NAME (V2DI_type_node) = tdecl; - tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node); - TYPE_NAME (bool_V2DI_type_node) = tdecl; + tdecl = add_builtin_type ("__vector unsigned long long", + unsigned_V2DI_type_node); + TYPE_NAME (unsigned_V2DI_type_node) = tdecl; + + tdecl = add_builtin_type ("__vector __bool long long", + bool_V2DI_type_node); + TYPE_NAME (bool_V2DI_type_node) = tdecl; + } + + if (V1TI_type_node) + { + tdecl = add_builtin_type ("__vector __int128", V1TI_type_node); + TYPE_NAME (V1TI_type_node) = tdecl; + + tdecl = add_builtin_type ("__vector unsigned __int128", + unsigned_V1TI_type_node); + TYPE_NAME (unsigned_V1TI_type_node) = tdecl; + } /* Paired and SPE builtins are only available if you build a compiler with the appropriate options, so only create those builtins with the @@ -14560,6 +14683,34 @@ altivec_init_builtins (void) ftype = build_function_type_list (intDI_type_node, V2DI_type_node, integer_type_node, NULL_TREE); def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI); + + + if (V1TI_type_node) + { + tree v1ti_ftype_long_pcvoid + = build_function_type_list (V1TI_type_node, + long_integer_type_node, pcvoid_type_node, + NULL_TREE); + tree void_ftype_v1ti_long_pvoid + = build_function_type_list (void_type_node, + V1TI_type_node, long_integer_type_node, + pvoid_type_node, NULL_TREE); + def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid, + VSX_BUILTIN_LXVD2X_V1TI); + def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid, + VSX_BUILTIN_STXVD2X_V1TI); + ftype = build_function_type_list (V1TI_type_node, intTI_type_node, + NULL_TREE, NULL_TREE); + def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI); + ftype = build_function_type_list (V1TI_type_node, V1TI_type_node, + intTI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI); + ftype = build_function_type_list (intTI_type_node, V1TI_type_node, + integer_type_node, NULL_TREE); + def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI); + } + } static void @@ -16772,7 +16923,8 @@ rs6000_preferred_reload_class (rtx x, enum reg_class rclass) if (GET_MODE_SIZE (mode) <= 8) return FLOAT_REGS; - if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)) + if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode) + || mode == V1TImode) return ALTIVEC_REGS; return rclass; @@ -17209,6 +17361,31 @@ rs6000_output_move_128bit (rtx operands[]) gcc_unreachable (); } +/* Validate a 128-bit move. */ +bool +rs6000_move_128bit_ok_p (rtx operands[]) +{ + enum machine_mode mode = GET_MODE (operands[0]); + return (gpc_reg_operand (operands[0], mode) + || gpc_reg_operand (operands[1], mode)); +} + +/* Return true if a 128-bit move needs to be split. */ +bool +rs6000_split_128bit_ok_p (rtx operands[]) +{ + if (!reload_completed) + return false; + + if (!gpr_or_gpr_p (operands[0], operands[1])) + return false; + + if (quad_load_store_p (operands[0], operands[1])) + return false; + + return true; +} + /* Given a comparison operation, return the bit number in CCR to test. We know this is a valid comparison. @@ -27796,6 +27973,9 @@ rs6000_handle_altivec_attribute (tree *node, unsigned_p = TYPE_UNSIGNED (type); switch (mode) { + case TImode: + result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node); + break; case DImode: result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node); break; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 5e30879..a6afb6c 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -529,6 +529,7 @@ extern int rs6000_vector_align[]; #define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) #define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR) +#define TARGET_VADDUQM (TARGET_P8_VECTOR && TARGET_POWERPC64) /* Byte/char syncs were added as phased in for ISA 2.06B, but are not present in power7, so conditionalize them on p8 features. TImode syncs need quad @@ -1194,7 +1195,7 @@ enum data_align { align_abi, align_opt, align_both }; #define ALTIVEC_OR_VSX_VECTOR_MODE(MODE) \ (ALTIVEC_VECTOR_MODE (MODE) || VSX_VECTOR_MODE (MODE) \ - || (MODE) == V2DImode) + || (MODE) == V2DImode || (MODE) == V1TImode) #define SPE_VECTOR_MODE(MODE) \ ((MODE) == V4HImode \ @@ -2577,6 +2578,7 @@ enum rs6000_builtin_type_index RS6000_BTI_opaque_p_V2SI, RS6000_BTI_opaque_V4SI, RS6000_BTI_V16QI, + RS6000_BTI_V1TI, RS6000_BTI_V2SI, RS6000_BTI_V2SF, RS6000_BTI_V2DI, @@ -2586,6 +2588,7 @@ enum rs6000_builtin_type_index RS6000_BTI_V4SF, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V16QI, + RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, @@ -2611,6 +2614,8 @@ enum rs6000_builtin_type_index RS6000_BTI_UINTSI, /* unsigned_intSI_type_node */ RS6000_BTI_INTDI, /* intDI_type_node */ RS6000_BTI_UINTDI, /* unsigned_intDI_type_node */ + RS6000_BTI_INTTI, /* intTI_type_node */ + RS6000_BTI_UINTTI, /* unsigned_intTI_type_node */ RS6000_BTI_float, /* float_type_node */ RS6000_BTI_double, /* double_type_node */ RS6000_BTI_void, /* void_type_node */ @@ -2623,6 +2628,7 @@ enum rs6000_builtin_type_index #define opaque_p_V2SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_p_V2SI]) #define opaque_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_opaque_V4SI]) #define V16QI_type_node (rs6000_builtin_types[RS6000_BTI_V16QI]) +#define V1TI_type_node (rs6000_builtin_types[RS6000_BTI_V1TI]) #define V2DI_type_node (rs6000_builtin_types[RS6000_BTI_V2DI]) #define V2DF_type_node (rs6000_builtin_types[RS6000_BTI_V2DF]) #define V2SI_type_node (rs6000_builtin_types[RS6000_BTI_V2SI]) @@ -2632,6 +2638,7 @@ enum rs6000_builtin_type_index #define V4SF_type_node (rs6000_builtin_types[RS6000_BTI_V4SF]) #define V8HI_type_node (rs6000_builtin_types[RS6000_BTI_V8HI]) #define unsigned_V16QI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V16QI]) +#define unsigned_V1TI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V1TI]) #define unsigned_V8HI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V8HI]) #define unsigned_V4SI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V4SI]) #define unsigned_V2DI_type_node (rs6000_builtin_types[RS6000_BTI_unsigned_V2DI]) @@ -2658,6 +2665,8 @@ enum rs6000_builtin_type_index #define uintSI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTSI]) #define intDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTDI]) #define uintDI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTDI]) +#define intTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_INTTI]) +#define uintTI_type_internal_node (rs6000_builtin_types[RS6000_BTI_UINTTI]) #define float_type_internal_node (rs6000_builtin_types[RS6000_BTI_float]) #define double_type_internal_node (rs6000_builtin_types[RS6000_BTI_double]) #define void_type_internal_node (rs6000_builtin_types[RS6000_BTI_void]) diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index b0d44c9..4bab959 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -288,7 +288,8 @@ (V4SI "") (V4SF "") (V2DI "") - (V2DF "")]) + (V2DF "") + (V1TI "")]) ; Whether a floating point move is ok, don't allow SD without hardware FP (define_mode_attr fmove_ok [(SF "") @@ -412,7 +413,8 @@ (V4SI "TARGET_ALTIVEC") (V4SF "TARGET_ALTIVEC") (V2DI "TARGET_ALTIVEC") - (V2DF "TARGET_ALTIVEC")]) + (V2DF "TARGET_ALTIVEC") + (V1TI "TARGET_ALTIVEC")]) ;; For the GPRs we use 3 constraints for register outputs, two that are the ;; same as the output register, and a third where the output register is an @@ -428,7 +430,8 @@ (V4SI "wa,v,&?r,?r,?r") (V4SF "wa,v,&?r,?r,?r") (V2DI "wa,v,&?r,?r,?r") - (V2DF "wa,v,&?r,?r,?r")]) + (V2DF "wa,v,&?r,?r,?r") + (V1TI "wa,v,&?r,?r,?r")]) ;; Mode attribute for boolean operation register constraints for operand1 (define_mode_attr BOOL_REGS_OP1 [(TI "r,0,r,wa,v") @@ -438,7 +441,8 @@ (V4SI "wa,v,r,0,r") (V4SF "wa,v,r,0,r") (V2DI "wa,v,r,0,r") - (V2DF "wa,v,r,0,r")]) + (V2DF "wa,v,r,0,r") + (V1TI "wa,v,r,0,r")]) ;; Mode attribute for boolean operation register constraints for operand2 (define_mode_attr BOOL_REGS_OP2 [(TI "r,r,0,wa,v") @@ -448,7 +452,8 @@ (V4SI "wa,v,r,r,0") (V4SF "wa,v,r,r,0") (V2DI "wa,v,r,r,0") - (V2DF "wa,v,r,r,0")]) + (V2DF "wa,v,r,r,0") + (V1TI "wa,v,r,r,0")]) ;; Mode attribute for boolean operation register constraints for operand1 ;; for one_cmpl. To simplify things, we repeat the constraint where 0 @@ -460,7 +465,8 @@ (V4SI "wa,v,r,0,0") (V4SF "wa,v,r,0,0") (V2DI "wa,v,r,0,0") - (V2DF "wa,v,r,0,0")]) + (V2DF "wa,v,r,0,0") + (V1TI "wa,v,r,0,0")]) ;; Mode attribute for the clobber of CC0 for AND expansion. ;; For the 128-bit types, we never do AND immediate, but we need to @@ -472,7 +478,8 @@ (V4SI "X,X,X,X,X") (V4SF "X,X,X,X,X") (V2DI "X,X,X,X,X") - (V2DF "X,X,X,X,X")]) + (V2DF "X,X,X,X,X") + (V1TI "X,X,X,X,X")]) ;; Start with fixed-point load and store insns. Here we put only the more diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index 13cb20e..edbb831 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -36,13 +36,13 @@ (define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF]) ;; Vector logical modes -(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) +(define_mode_iterator VEC_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI]) ;; Vector modes for moves. Don't do TImode here. -(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VEC_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) ;; Vector modes for types that don't need a realignment under VSX -(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF]) +(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF V1TI]) ;; Vector comparison modes (define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF]) @@ -54,7 +54,8 @@ (define_mode_iterator VEC_64 [V2DI V2DF]) ;; Vector reload iterator -(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF SF SD SI DF DD DI TI]) +(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF V1TI + SF SD SI DF DD DI TI]) ;; Base type from vector mode (define_mode_attr VEC_base [(V16QI "QI") @@ -63,6 +64,7 @@ (V2DI "DI") (V4SF "SF") (V2DF "DF") + (V1TI "TI") (TI "TI")]) ;; Same size integer type for floating point data @@ -107,7 +109,6 @@ } if (!BYTES_BIG_ENDIAN && VECTOR_MEM_VSX_P (<MODE>mode) - && <MODE>mode != TImode && !gpr_or_gpr_p (operands[0], operands[1]) && (memory_operand (operands[0], <MODE>mode) ^ memory_operand (operands[1], <MODE>mode))) diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index d7450a4..93c8c3b 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -34,11 +34,11 @@ (define_mode_iterator VSX_F [V4SF V2DF]) ;; Iterator for logical types supported by VSX -(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI]) +(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF V1TI TI]) ;; Iterator for memory move. Handle TImode specially to allow ;; it to use gprs as well as vsx registers. -(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF V1TI]) (define_mode_iterator VSX_M2 [V16QI V8HI @@ -46,6 +46,7 @@ V2DI V4SF V2DF + V1TI (TI "TARGET_VSX_TIMODE")]) ;; Map into the appropriate load/store name based on the type @@ -56,6 +57,7 @@ (V2DF "vd2") (V2DI "vd2") (DF "d") + (V1TI "vd2") (TI "vd2")]) ;; Map into the appropriate suffix based on the type @@ -67,6 +69,7 @@ (V2DI "dp") (DF "dp") (SF "sp") + (V1TI "dp") (TI "dp")]) ;; Map the register class used @@ -78,6 +81,7 @@ (V2DF "wd") (DF "ws") (SF "d") + (V1TI "v") (TI "wt")]) ;; Map the register class used for float<->int conversions @@ -123,6 +127,7 @@ (V4SF "v") (V2DI "v") (V2DF "v") + (V1TI "v") (DF "s")]) ;; Appropriate type for add ops (and other simple FP ops) @@ -180,7 +185,8 @@ (V2DF "vecdouble")]) ;; Map the scalar mode for a vector type -(define_mode_attr VS_scalar [(V2DF "DF") +(define_mode_attr VS_scalar [(V1TI "TI") + (V2DF "DF") (V2DI "DI") (V4SF "SF") (V4SI "SI") @@ -191,7 +197,8 @@ (define_mode_attr VS_double [(V4SI "V8SI") (V4SF "V8SF") (V2DI "V4DI") - (V2DF "V4DF")]) + (V2DF "V4DF") + (V1TI "V2TI")]) ;; Constants for creating unspecs (define_c_enum "unspec" @@ -1489,6 +1496,21 @@ "stxvd2x %x1,%y0" [(set_attr "type" "vecstore")]) +;; Convert a TImode value into V1TImode +(define_expand "vsx_set_v1ti" + [(match_operand:V1TI 0 "nonimmediate_operand" "") + (match_operand:V1TI 1 "nonimmediate_operand" "") + (match_operand:TI 2 "input_operand" "") + (match_operand:QI 3 "u5bit_cint_operand" "")] + "VECTOR_MEM_VSX_P (V1TImode)" +{ + if (operands[3] != const0_rtx) + gcc_unreachable (); + + emit_move_insn (operands[0], gen_lowpart (V1TImode, operands[1])); + DONE; +}) + ;; Set the element of a V2DI/VD2F mode (define_insn "vsx_set_<mode>" [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?wa") diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index a969fb4..986cc94 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -15127,6 +15127,51 @@ vector long long vec_vupklsw (vector int); vector unsigned long long vec_vupklsw (vector int); @end smallexample +If the ISA 2.07 additions to the vector/scalar (power8-vector) +instruction set is available, the following additional functions are +available for 64-bit targets. New vector types +(@var{vector __int128_t} and @var{vector __uint128_t}) are available +to hold the @var{__int128_t} and @var{__uint128_t} types to use these +builtins. + +The normal vector extract, and set operations work on +@var{vector __int128_t} and @var{vector __uint128_t} types, +but the index value must be 0. + +@smallexample +vector __int128_t vec_vaddcuq (vector __int128_t, vector __int128_t); +vector __uint128_t vec_vaddcuq (vector __uint128_t, vector __uint128_t); + +vector __int128_t vec_vadduqm (vector __int128_t, vector __int128_t); +vector __uint128_t vec_vadduqm (vector __uint128_t, vector __uint128_t); + +vector __int128_t vec_vaddecuq (vector __int128_t, vector __int128_t, + vector __int128_t); +vector __uint128_t vec_vaddecuq (vector __uint128_t, vector __uint128_t, + vector __uint128_t); + +vector __int128_t vec_vaddeuqm (vector __int128_t, vector __int128_t, + vector __int128_t); +vector __uint128_t vec_vaddeuqm (vector __uint128_t, vector __uint128_t, + vector __uint128_t); + +vector __int128_t vec_vsubecuq (vector __int128_t, vector __int128_t, + vector __int128_t); +vector __uint128_t vec_vsubecuq (vector __uint128_t, vector __uint128_t, + vector __uint128_t); + +vector __int128_t vec_vsubeuqm (vector __int128_t, vector __int128_t, + vector __int128_t); +vector __uint128_t vec_vsubeuqm (vector __uint128_t, vector __uint128_t, + vector __uint128_t); + +vector __int128_t vec_vsubcuq (vector __int128_t, vector __int128_t); +vector __uint128_t vec_vsubcuq (vector __uint128_t, vector __uint128_t); + +__int128_t vec_vsubuqm (__int128_t, __int128_t); +__uint128_t vec_vsubuqm (__uint128_t, __uint128_t); +@end smallexample + If the cryptographic instructions are enabled (@option{-mcrypto} or @option{-mcpu=power8}), the following builtins are enabled. diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6fa6c67..3891070 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,16 @@ +2014-03-12 Michael Meissner <meissner@linux.vnet.ibm.com> + + * gcc.target/powerpc/p8vector-int128-1.c: New test to test ISA + 2.07 128-bit arithmetic. + * gcc.target/powerpc/p8vector-int128-2.c: Likewise. + + * gcc.target/powerpc/timode_off.c: Restrict cpu type to power5, + due to when TImode is allowed in VSX registers, the allowable + address modes for TImode is just a single indirect address in + order for the value to be loaded and store in either GPR or VSX + registers. This affects the generated code, and it would cause + this test to fail, when such an option is used. + 2014-03-12 Marcus Shawcroft <marcus.shawcroft@arm.com> * lib/profopt.exp (profopt-execute): Use $testcase in diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-int128-1.c b/gcc/testsuite/gcc.target/powerpc/p8vector-int128-1.c new file mode 100644 index 0000000..86bde32 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p8vector-int128-1.c @@ -0,0 +1,85 @@ +/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power8 -O3 -mvsx-timode" } */ + +#include <altivec.h> + +#ifndef TYPE +#define TYPE vector __int128_t +#endif + +TYPE +do_addcuq (TYPE p, TYPE q) +{ + return __builtin_vec_vaddcuq (p, q); +} + +TYPE +do_adduqm (TYPE p, TYPE q) +{ + return __builtin_vec_add (p, q); +} + +TYPE +do_addeuqm (TYPE p, TYPE q, TYPE r) +{ + return __builtin_vec_vaddeuqm (p, q, r); +} + +TYPE +do_addecuq (TYPE p, TYPE q, TYPE r) +{ + return __builtin_vec_vaddecuq (p, q, r); +} + +TYPE +do_subeuqm (TYPE p, TYPE q, TYPE r) +{ + return __builtin_vec_vsubeuqm (p, q, r); +} + +TYPE +do_subecuq (TYPE p, TYPE q, TYPE r) +{ + return __builtin_vec_vsubecuq (p, q, r); +} + +TYPE +do_subcuq (TYPE p, TYPE q) +{ + return __builtin_vec_vsubcuq (p, q); +} + +TYPE +do_subuqm (TYPE p, TYPE q) +{ + return __builtin_vec_vsubuqm (p, q); +} + +TYPE +do_zero (void) +{ + return (TYPE) { 0 }; +} + +TYPE +do_minus_one (void) +{ + return (TYPE) { -1 }; +} + +/* { dg-final { scan-assembler "vaddcuq" } } */ +/* { dg-final { scan-assembler "vadduqm" } } */ +/* { dg-final { scan-assembler "vaddecuq" } } */ +/* { dg-final { scan-assembler "vaddeuqm" } } */ +/* { dg-final { scan-assembler "vsubecuq" } } */ +/* { dg-final { scan-assembler "vsubeuqm" } } */ +/* { dg-final { scan-assembler "vsubcuq" } } */ +/* { dg-final { scan-assembler "vsubuqm" } } */ +/* { dg-final { scan-assembler-not "mtvsrd" } } */ +/* { dg-final { scan-assembler-not "mfvsrd" } } */ +/* { dg-final { scan-assembler-not "ori 2,2,0" } } */ +/* { dg-final { scan-assembler-not "xxpermdi" } } */ +/* { dg-final { scan-assembler-not "stxvd2x" } } */ +/* { dg-final { scan-assembler-not "stxvw4x" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/p8vector-int128-2.c b/gcc/testsuite/gcc.target/powerpc/p8vector-int128-2.c new file mode 100644 index 0000000..1064894 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/p8vector-int128-2.c @@ -0,0 +1,177 @@ +/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */ +/* { dg-require-effective-target p8vector_hw } */ +/* { dg-options "-mcpu=power8 -O2" } */ + +#include <stddef.h> +#include <stdlib.h> +#include <altivec.h> + +#ifdef DEBUG +#include <stdio.h> +#define UNUSED + +#ifdef __LITTLE_ENDIAN__ +#define HI_WORD 1 +#define LO_WORD 0 +#else +#define HI_WORD 0 +#define LO_WORD 1 +#endif + +#else +#define UNUSED __attribute__((__unused__)) +#endif + +#ifndef S_TYPE +#define S_TYPE __uint128_t +#endif + +#ifndef V_TYPE +#define V_TYPE vector S_TYPE +#endif + +static int compare (S_TYPE, V_TYPE, const char *, const char *) + __attribute__((__noinline__)); + +static int +compare (S_TYPE scalar, + V_TYPE vect, + const char *nl UNUSED, + const char *which UNUSED) +{ + unsigned long scalar_lo = (unsigned long) scalar; + unsigned long scalar_hi = (unsigned long) (scalar >> 64); + unsigned long vect_lo; + unsigned long vect_hi; + vector long long tmp; + int ret; + + __asm__ ("mfvsrd %0,%x3\n\t" + "xxpermdi %x2,%x3,%x3,3\n\t" + "mfvsrd %1,%x2" + : "=r" (vect_hi), + "=r" (vect_lo), + "=wa" (tmp) + : "wa" (vect)); + + ret = (scalar_lo != vect_lo) || (scalar_hi != vect_hi); + +#ifdef DEBUG + printf ("%s%s: 0x%.16lx %.16lx %s 0x%.16lx %.16lx\n", + nl, which, + scalar_hi, scalar_lo, + (ret) ? "!=" : "==", + vect_hi, vect_lo); + + fflush (stdout); +#endif + + return ret; +} + +static void convert_via_mem (V_TYPE *, S_TYPE *) + __attribute__((__noinline__)); + +static void +convert_via_mem (V_TYPE *v, S_TYPE *s) +{ + *v = (V_TYPE) { *s }; + __asm__ volatile ("nop" + : "+m" (*s), "+m" (*v) + : + : "memory"); + +} + + +/* Check if vadduqm returns the same values as normal 128-bit add. */ + +/* Values to add together. */ +const static struct { + unsigned long hi_1; + unsigned long lo_1; + unsigned long hi_2; + unsigned long lo_2; +} values[] = { + { 0x0000000000000000UL, 0xfffffffffffffffeUL, + 0x0000000000000000UL, 0x0000000000000002UL }, + { 0x0000000000000000UL, 0x0000000000000002UL, + 0x0000000000000000UL, 0xfffffffffffffffeUL }, + { 0xffffffffffffffffUL, 0xfffffffffffffffeUL, + 0x0000000000000000UL, 0x0000000000000002UL }, + { 0xfffffffffffffff2UL, 0xffffffffffffffffUL, + 0x0000000000000002UL, 0x0000000000000000UL }, + { 0x7fffffffffffffffUL, 0xfffffffffffffffeUL, + 0x0000000000000000UL, 0x0000000000000002UL }, + { 0x7ffffffffffffff2UL, 0xffffffffffffffffUL, + 0x0000000000000002UL, 0x0000000000000000UL }, +}; + +int +main (void) +{ + int reg_errors = 0; + int mem_errors = 0; + size_t i; + const char *nl = ""; + + for (i = 0; i < sizeof (values) / sizeof (values[0]); i++) + { + S_TYPE s_reg_res, s_reg_in1, s_reg_in2, s_mem_res, s_mem_in1, s_mem_in2; + V_TYPE v_reg_res, v_reg_in1, v_reg_in2, v_mem_res, v_mem_in1, v_mem_in2; + + s_reg_in1 = ((((S_TYPE)values[i].hi_1 << 64)) + ((S_TYPE)values[i].lo_1)); + reg_errors += compare (s_reg_in1, (V_TYPE) { s_reg_in1 }, nl, "reg, in1"); + + s_reg_in2 = ((((S_TYPE)values[i].hi_2 << 64)) + ((S_TYPE)values[i].lo_2)); + reg_errors += compare (s_reg_in2, (V_TYPE) { s_reg_in2 }, "", "reg, in2"); + + s_reg_res = s_reg_in1 + s_reg_in2; + + v_reg_in1 = (V_TYPE) { s_reg_in1 }; + v_reg_in2 = (V_TYPE) { s_reg_in2 }; + v_reg_res = vec_vadduqm (v_reg_in1, v_reg_in2); + reg_errors += compare (s_reg_res, v_reg_res, "", "reg, res"); + + s_mem_in1 = s_reg_in1; + convert_via_mem (&v_mem_in1, &s_mem_in1); + mem_errors += compare (s_mem_in1, (V_TYPE) { s_mem_in1 }, "\n", "mem, in1"); + + s_mem_in2 = s_reg_in2; + convert_via_mem (&v_mem_in2, &s_mem_in2); + mem_errors += compare (s_mem_in2, (V_TYPE) { s_mem_in2 }, "", "mem, in2"); + + s_mem_res = s_mem_in1 + s_mem_in2; + v_mem_res = vec_vadduqm (v_mem_in1, v_mem_in2); + mem_errors += compare (s_mem_res, v_mem_res, "", "mem, res"); + + nl = "\n"; + } + +#ifdef DEBUG + putchar ('\n'); + + if (!reg_errors) + fputs ("no errors found on register operations\n", stdout); + else + printf ("%d error%s found on register operations\n", + reg_errors, + (reg_errors == 1) ? "s" : ""); + + if (!mem_errors) + fputs ("no errors found on memory operations\n", stdout); + else + printf ("%d error%s found on memory operations\n", + mem_errors, + (mem_errors == 1) ? "s" : ""); + + fflush (stdout); +#endif + + if ((reg_errors + mem_errors) != 0) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/powerpc/timode_off.c b/gcc/testsuite/gcc.target/powerpc/timode_off.c index a8ca68e..c169e50 100644 --- a/gcc/testsuite/gcc.target/powerpc/timode_off.c +++ b/gcc/testsuite/gcc.target/powerpc/timode_off.c @@ -1,5 +1,5 @@ /* { dg-do assemble { target { lp64 } } } */ -/* { dg-options "-O2 -fno-align-functions -mtraceback=no -save-temps" } */ +/* { dg-options "-O2 -fno-align-functions -mtraceback=no -save-temps -mcpu=power5" } */ typedef int TImode __attribute__ ((mode (TI))); @@ -46,6 +46,12 @@ TImode r19 (void *x) { return *(TImode *) (x + 32749); } TImode r20 (void *x) { return *(TImode *) (x + 32748); } /* test should really be == 616, see pr54110 */ +/* When TImode is allowed in VSX registers, the allowable address modes for + TImode is just a single indirect address in order for the value to be loaded + and store in either GPR or VSX registers. This affects the generated code, + and it would cause this test to fail, when such an option is used. Fall + back to power5 to test the code. */ + /* { dg-final { object-size text <= 700 } } */ /* { dg-final { scan-assembler-not "(st|l)fd" } } */ /* { dg-final { cleanup-saved-temps "timode_off" } } */ |