diff options
author | Martin Liska <mliska@suse.cz> | 2022-03-06 16:28:20 +0100 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-03-06 16:28:20 +0100 |
commit | 1a576b7ac609d4225a3833e3a9c84a9439d0e45e (patch) | |
tree | 7172f94eacd2615a13f0579119f3cd5f42fdcd8e /gcc/config | |
parent | f015156d6662c3ce367c5834eb109a0a77b56f39 (diff) | |
parent | 98cd717fca9f21625b9c79c9231c2e909d1d93a3 (diff) | |
download | gcc-1a576b7ac609d4225a3833e3a9c84a9439d0e45e.zip gcc-1a576b7ac609d4225a3833e3a9c84a9439d0e45e.tar.gz gcc-1a576b7ac609d4225a3833e3a9c84a9439d0e45e.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/config')
100 files changed, 10195 insertions, 7789 deletions
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def index 976bf9b..8f39922 100644 --- a/gcc/config/aarch64/aarch64-modes.def +++ b/gcc/config/aarch64/aarch64-modes.def @@ -47,10 +47,10 @@ ADJUST_FLOAT_FORMAT (HF, &ieee_half_format); /* Vector modes. */ -VECTOR_BOOL_MODE (VNx16BI, 16, 2); -VECTOR_BOOL_MODE (VNx8BI, 8, 2); -VECTOR_BOOL_MODE (VNx4BI, 4, 2); -VECTOR_BOOL_MODE (VNx2BI, 2, 2); +VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2); +VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2); +VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2); +VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2); ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8); ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 2636853..d0e78d6 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -925,8 +925,6 @@ bool aarch64_split_128bit_move_p (rtx, rtx); bool aarch64_mov128_immediate (rtx); -void aarch64_split_simd_combine (rtx, rtx, rtx); - void aarch64_split_simd_move (rtx, rtx); /* Check for a legitimate floating point constant for FMOV. */ @@ -941,6 +939,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool, aarch64_addr_query_type = ADDR_QUERY_M); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); +bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool); rtx aarch64_load_tp (rtx); void aarch64_expand_compare_and_swap (rtx op[]); @@ -1000,6 +999,7 @@ void aarch64_atomic_assign_expand_fenv (tree *, tree *, tree *); int aarch64_ccmp_mode_to_code (machine_mode mode); bool extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset); +bool aarch64_mergeable_load_pair_p (machine_mode, rtx, rtx); bool aarch64_operands_ok_for_ldpstp (rtx *, bool, machine_mode); bool aarch64_operands_adjust_ok_for_ldpstp (rtx *, bool, machine_mode); void aarch64_swap_ldrstr_operands (rtx *, bool); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 71c429f..1873342 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -19,8 +19,8 @@ ;; <http://www.gnu.org/licenses/>. (define_expand "mov<mode>" - [(set (match_operand:VALL_F16MOV 0 "nonimmediate_operand") - (match_operand:VALL_F16MOV 1 "general_operand"))] + [(set (match_operand:VALL_F16 0 "nonimmediate_operand") + (match_operand:VALL_F16 1 "general_operand"))] "TARGET_SIMD" " /* Force the operand into a register if it is not an @@ -50,8 +50,8 @@ ) (define_expand "movmisalign<mode>" - [(set (match_operand:VALL 0 "nonimmediate_operand") - (match_operand:VALL 1 "general_operand"))] + [(set (match_operand:VALL_F16 0 "nonimmediate_operand") + (match_operand:VALL_F16 1 "general_operand"))] "TARGET_SIMD && !STRICT_ALIGNMENT" { /* This pattern is not permitted to fail during expansion: if both arguments @@ -272,7 +272,7 @@ (define_expand "@aarch64_split_simd_mov<mode>" [(set (match_operand:VQMOV 0) - (match_operand:VQMOV 1))] + (match_operand:VQMOV 1))] "TARGET_SIMD" { rtx dst = operands[0]; @@ -280,23 +280,22 @@ if (GP_REGNUM_P (REGNO (src))) { - rtx src_low_part = gen_lowpart (<VHALF>mode, src); - rtx src_high_part = gen_highpart (<VHALF>mode, src); + rtx src_low_part = gen_lowpart (<VHALF>mode, src); + rtx src_high_part = gen_highpart (<VHALF>mode, src); + rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); - emit_insn - (gen_move_lo_quad_<mode> (dst, src_low_part)); - emit_insn - (gen_move_hi_quad_<mode> (dst, src_high_part)); + emit_move_insn (dst_low_part, src_low_part); + emit_insn (gen_aarch64_combine<Vhalf> (dst, dst_low_part, + src_high_part)); } - else { - rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); - rtx dst_high_part = gen_highpart (<VHALF>mode, dst); + rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); + rtx dst_high_part = gen_highpart (<VHALF>mode, dst); rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); - emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo)); - emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi)); + emit_insn (gen_aarch64_get_half<mode> (dst_low_part, src, lo)); + emit_insn (gen_aarch64_get_half<mode> (dst_high_part, src, hi)); } DONE; } @@ -556,17 +555,17 @@ ;; remainder. Because of this, expand early. (define_expand "cml<fcmac1><conj_op><mode>4" [(set (match_operand:VHSDF 0 "register_operand") - (plus:VHSDF (match_operand:VHSDF 1 "register_operand") - (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand") - (match_operand:VHSDF 3 "register_operand")] - FCMLA_OP)))] + (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand") + (match_operand:VHSDF 2 "register_operand")] + FCMLA_OP) + (match_operand:VHSDF 3 "register_operand")))] "TARGET_COMPLEX && !BYTES_BIG_ENDIAN" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[1], - operands[3], operands[2])); + emit_insn (gen_aarch64_fcmla<rotsplit1><mode> (tmp, operands[3], + operands[2], operands[1])); emit_insn (gen_aarch64_fcmla<rotsplit2><mode> (operands[0], tmp, - operands[3], operands[2])); + operands[2], operands[1])); DONE; }) @@ -1039,7 +1038,7 @@ [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w") (vec_merge:VALL_F16 (vec_duplicate:VALL_F16 - (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv")) + (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv")) (match_operand:VALL_F16 3 "register_operand" "0,0,0") (match_operand:SI 2 "immediate_operand" "i,i,i")))] "TARGET_SIMD" @@ -1378,7 +1377,7 @@ (define_expand "vec_set<mode>" [(match_operand:VALL_F16 0 "register_operand") - (match_operand:<VEL> 1 "register_operand") + (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand") (match_operand:SI 2 "immediate_operand")] "TARGET_SIMD" { @@ -1580,102 +1579,6 @@ ;; What that means, is that the RTL descriptions of the below patterns ;; need to change depending on endianness. -;; Move to the low architectural bits of the register. -;; On little-endian this is { operand, zeroes } -;; On big-endian this is { zeroes, operand } - -(define_insn "move_lo_quad_internal_<mode>" - [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") - (vec_concat:VQMOV - (match_operand:<VHALF> 1 "register_operand" "w,r,r") - (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "@ - dup\\t%d0, %1.d[0] - fmov\\t%d0, %1 - dup\\t%d0, %1" - [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") - (set_attr "length" "4") - (set_attr "arch" "simd,fp,simd")] -) - -(define_insn "move_lo_quad_internal_be_<mode>" - [(set (match_operand:VQMOV 0 "register_operand" "=w,w,w") - (vec_concat:VQMOV - (match_operand:<VHALF> 2 "aarch64_simd_or_scalar_imm_zero") - (match_operand:<VHALF> 1 "register_operand" "w,r,r")))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "@ - dup\\t%d0, %1.d[0] - fmov\\t%d0, %1 - dup\\t%d0, %1" - [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") - (set_attr "length" "4") - (set_attr "arch" "simd,fp,simd")] -) - -(define_expand "move_lo_quad_<mode>" - [(match_operand:VQMOV 0 "register_operand") - (match_operand:<VHALF> 1 "register_operand")] - "TARGET_SIMD" -{ - rtx zs = CONST0_RTX (<VHALF>mode); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1], zs)); - else - emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1], zs)); - DONE; -} -) - -;; Move operand1 to the high architectural bits of the register, keeping -;; the low architectural bits of operand2. -;; For little-endian this is { operand2, operand1 } -;; For big-endian this is { operand1, operand2 } - -(define_insn "aarch64_simd_move_hi_quad_<mode>" - [(set (match_operand:VQMOV 0 "register_operand" "+w,w") - (vec_concat:VQMOV - (vec_select:<VHALF> - (match_dup 0) - (match_operand:VQMOV 2 "vect_par_cnst_lo_half" "")) - (match_operand:<VHALF> 1 "register_operand" "w,r")))] - "TARGET_SIMD && !BYTES_BIG_ENDIAN" - "@ - ins\\t%0.d[1], %1.d[0] - ins\\t%0.d[1], %1" - [(set_attr "type" "neon_ins")] -) - -(define_insn "aarch64_simd_move_hi_quad_be_<mode>" - [(set (match_operand:VQMOV 0 "register_operand" "+w,w") - (vec_concat:VQMOV - (match_operand:<VHALF> 1 "register_operand" "w,r") - (vec_select:<VHALF> - (match_dup 0) - (match_operand:VQMOV 2 "vect_par_cnst_lo_half" ""))))] - "TARGET_SIMD && BYTES_BIG_ENDIAN" - "@ - ins\\t%0.d[1], %1.d[0] - ins\\t%0.d[1], %1" - [(set_attr "type" "neon_ins")] -) - -(define_expand "move_hi_quad_<mode>" - [(match_operand:VQMOV 0 "register_operand") - (match_operand:<VHALF> 1 "register_operand")] - "TARGET_SIMD" -{ - rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], - operands[1], p)); - else - emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], - operands[1], p)); - DONE; -}) - ;; Narrowing operations. (define_insn "aarch64_xtn<mode>_insn_le" @@ -1776,16 +1679,12 @@ (define_expand "vec_pack_trunc_<mode>" [(match_operand:<VNARROWD> 0 "register_operand") - (match_operand:VDN 1 "register_operand") - (match_operand:VDN 2 "register_operand")] + (match_operand:VDN 1 "general_operand") + (match_operand:VDN 2 "general_operand")] "TARGET_SIMD" { rtx tempreg = gen_reg_rtx (<VDBL>mode); - int lo = BYTES_BIG_ENDIAN ? 2 : 1; - int hi = BYTES_BIG_ENDIAN ? 1 : 2; - - emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo])); - emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi])); + emit_insn (gen_aarch64_vec_concat<mode> (tempreg, operands[1], operands[2])); emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg)); DONE; }) @@ -3435,20 +3334,13 @@ (define_expand "vec_pack_trunc_df" [(set (match_operand:V2SF 0 "register_operand") - (vec_concat:V2SF - (float_truncate:SF - (match_operand:DF 1 "register_operand")) - (float_truncate:SF - (match_operand:DF 2 "register_operand")) - ))] + (vec_concat:V2SF + (float_truncate:SF (match_operand:DF 1 "general_operand")) + (float_truncate:SF (match_operand:DF 2 "general_operand"))))] "TARGET_SIMD" { rtx tmp = gen_reg_rtx (V2SFmode); - int lo = BYTES_BIG_ENDIAN ? 2 : 1; - int hi = BYTES_BIG_ENDIAN ? 1 : 2; - - emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); - emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); + emit_insn (gen_aarch64_vec_concatdf (tmp, operands[1], operands[2])); emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); DONE; } @@ -4351,92 +4243,175 @@ (define_insn "load_pair_lanes<mode>" [(set (match_operand:<VDBL> 0 "register_operand" "=w") (vec_concat:<VDBL> - (match_operand:VDC 1 "memory_operand" "Utq") - (match_operand:VDC 2 "memory_operand" "m")))] - "TARGET_SIMD && !STRICT_ALIGNMENT - && rtx_equal_p (XEXP (operands[2], 0), - plus_constant (Pmode, - XEXP (operands[1], 0), - GET_MODE_SIZE (<MODE>mode)))" - "ldr\\t%q0, %1" - [(set_attr "type" "neon_load1_1reg_q")] + (match_operand:VDCSIF 1 "memory_operand" "Utq") + (match_operand:VDCSIF 2 "memory_operand" "m")))] + "TARGET_SIMD + && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])" + "ldr\\t%<single_dtype>0, %1" + [(set_attr "type" "neon_load1_1reg<dblq>")] ) +;; This STP pattern is a partial duplicate of the general vec_concat patterns +;; below. The reason for having both of them is that the alternatives of +;; the later patterns do not have consistent register preferences: the STP +;; alternatives have no preference between GPRs and FPRs (and if anything, +;; the GPR form is more natural for scalar integers) whereas the other +;; alternatives *require* an FPR for operand 1 and prefer one for operand 2. +;; +;; Using "*" to hide the STP alternatives from the RA penalizes cases in +;; which the destination was always memory. On the other hand, expressing +;; the true preferences makes GPRs seem more palatable than they really are +;; for register destinations. +;; +;; Despite that, we do still want the general form to have STP alternatives, +;; in order to handle cases where a register destination is spilled. +;; +;; The best compromise therefore seemed to be to have a dedicated STP +;; pattern to catch cases in which the destination was always memory. +;; This dedicated pattern must come first. + (define_insn "store_pair_lanes<mode>" [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn") (vec_concat:<VDBL> - (match_operand:VDC 1 "register_operand" "w, r") - (match_operand:VDC 2 "register_operand" "w, r")))] + (match_operand:VDCSIF 1 "register_operand" "w, r") + (match_operand:VDCSIF 2 "register_operand" "w, r")))] "TARGET_SIMD" "@ - stp\\t%d1, %d2, %y0 - stp\\t%x1, %x2, %y0" + stp\t%<single_type>1, %<single_type>2, %y0 + stp\t%<single_wx>1, %<single_wx>2, %y0" [(set_attr "type" "neon_stp, store_16")] ) +;; Form a vector whose least significant half comes from operand 1 and whose +;; most significant half comes from operand 2. The register alternatives +;; tie the least significant half to the same register as the destination, +;; so that only the other half needs to be handled explicitly. For the +;; reasons given above, the STP alternatives use ? for constraints that +;; the register alternatives either don't accept or themselves disparage. + +(define_insn "*aarch64_combine_internal<mode>" + [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") + (vec_concat:<VDBL> + (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r") + (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))] + "TARGET_SIMD + && !BYTES_BIG_ENDIAN + && (register_operand (operands[0], <VDBL>mode) + || register_operand (operands[2], <MODE>mode))" + "@ + ins\t%0.<single_type>[1], %2.<single_type>[0] + ins\t%0.<single_type>[1], %<single_wx>2 + ld1\t{%0.<single_type>}[1], %2 + stp\t%<single_type>1, %<single_type>2, %y0 + stp\t%<single_wx>1, %<single_wx>2, %y0" + [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")] +) + +(define_insn "*aarch64_combine_internal_be<mode>" + [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn") + (vec_concat:<VDBL> + (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r") + (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))] + "TARGET_SIMD + && BYTES_BIG_ENDIAN + && (register_operand (operands[0], <VDBL>mode) + || register_operand (operands[2], <MODE>mode))" + "@ + ins\t%0.<single_type>[1], %2.<single_type>[0] + ins\t%0.<single_type>[1], %<single_wx>2 + ld1\t{%0.<single_type>}[1], %2 + stp\t%<single_type>2, %<single_type>1, %y0 + stp\t%<single_wx>2, %<single_wx>1, %y0" + [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")] +) + ;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; dest vector. -(define_insn "@aarch64_combinez<mode>" +(define_insn "*aarch64_combinez<mode>" [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") (vec_concat:<VDBL> - (match_operand:VDC 1 "general_operand" "w,?r,m") - (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))] + (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m") + (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))] "TARGET_SIMD && !BYTES_BIG_ENDIAN" "@ - mov\\t%0.8b, %1.8b - fmov\t%d0, %1 - ldr\\t%d0, %1" + fmov\\t%<single_type>0, %<single_type>1 + fmov\t%<single_type>0, %<single_wx>1 + ldr\\t%<single_type>0, %1" [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") (set_attr "arch" "simd,fp,simd")] ) -(define_insn "@aarch64_combinez_be<mode>" +(define_insn "*aarch64_combinez_be<mode>" [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") (vec_concat:<VDBL> - (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") - (match_operand:VDC 1 "general_operand" "w,?r,m")))] + (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero") + (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))] "TARGET_SIMD && BYTES_BIG_ENDIAN" "@ - mov\\t%0.8b, %1.8b - fmov\t%d0, %1 - ldr\\t%d0, %1" + fmov\\t%<single_type>0, %<single_type>1 + fmov\t%<single_type>0, %<single_wx>1 + ldr\\t%<single_type>0, %1" [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") (set_attr "arch" "simd,fp,simd")] ) -(define_expand "aarch64_combine<mode>" - [(match_operand:<VDBL> 0 "register_operand") - (match_operand:VDC 1 "register_operand") - (match_operand:VDC 2 "aarch64_simd_reg_or_zero")] +;; Form a vector whose first half (in array order) comes from operand 1 +;; and whose second half (in array order) comes from operand 2. +;; This operand order follows the RTL vec_concat operation. +(define_expand "@aarch64_vec_concat<mode>" + [(set (match_operand:<VDBL> 0 "register_operand") + (vec_concat:<VDBL> + (match_operand:VDCSIF 1 "general_operand") + (match_operand:VDCSIF 2 "general_operand")))] "TARGET_SIMD" { - if (operands[2] == CONST0_RTX (<MODE>mode)) + int lo = BYTES_BIG_ENDIAN ? 2 : 1; + int hi = BYTES_BIG_ENDIAN ? 1 : 2; + + if (MEM_P (operands[1]) + && MEM_P (operands[2]) + && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])) + /* Use load_pair_lanes<mode>. */ + ; + else if (operands[hi] == CONST0_RTX (<MODE>mode)) { - if (BYTES_BIG_ENDIAN) - emit_insn (gen_aarch64_combinez_be<mode> (operands[0], operands[1], - operands[2])); - else - emit_insn (gen_aarch64_combinez<mode> (operands[0], operands[1], - operands[2])); + /* Use *aarch64_combinez<mode>. */ + if (!nonimmediate_operand (operands[lo], <MODE>mode)) + operands[lo] = force_reg (<MODE>mode, operands[lo]); } else - aarch64_split_simd_combine (operands[0], operands[1], operands[2]); - DONE; -} -) + { + /* Use *aarch64_combine_general<mode>. */ + operands[lo] = force_reg (<MODE>mode, operands[lo]); + if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode)) + { + if (MEM_P (operands[hi])) + { + rtx addr = force_reg (Pmode, XEXP (operands[hi], 0)); + operands[hi] = replace_equiv_address (operands[hi], addr); + } + else + operands[hi] = force_reg (<MODE>mode, operands[hi]); + } + } +}) -(define_expand "@aarch64_simd_combine<mode>" +;; Form a vector whose least significant half comes from operand 1 and whose +;; most significant half comes from operand 2. This operand order follows +;; arm_neon.h vcombine* intrinsics. +(define_expand "aarch64_combine<mode>" [(match_operand:<VDBL> 0 "register_operand") - (match_operand:VDC 1 "register_operand") - (match_operand:VDC 2 "register_operand")] + (match_operand:VDC 1 "general_operand") + (match_operand:VDC 2 "general_operand")] "TARGET_SIMD" - { - emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1])); - emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2])); - DONE; - } -[(set_attr "type" "multiple")] +{ + if (BYTES_BIG_ENDIAN) + std::swap (operands[1], operands[2]); + emit_insn (gen_aarch64_vec_concat<mode> (operands[0], operands[1], + operands[2])); + DONE; +} ) ;; <su><addsub>l<q>. diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index bd22fe5..bd60e65 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -7278,11 +7278,11 @@ rtx tmp = gen_reg_rtx (<MODE>mode); emit_insn (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4], - operands[3], operands[2], - operands[1], operands[5])); + operands[2], operands[1], + operands[3], operands[5])); emit_insn (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4], - operands[3], operands[2], + operands[2], operands[1], tmp, operands[5])); DONE; }) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 296145e..dbeaaf4 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -80,6 +80,7 @@ #include "fractional-cost.h" #include "rtlanal.h" #include "tree-dfa.h" +#include "asan.h" /* This file should be included last. */ #include "target-def.h" @@ -3781,6 +3782,110 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y, return aarch64_gen_compare_reg (code, x, y); } +/* Consider the operation: + + OPERANDS[0] = CODE (OPERANDS[1], OPERANDS[2]) + OPERANDS[3] + + where: + + - CODE is [SU]MAX or [SU]MIN + - OPERANDS[2] and OPERANDS[3] are constant integers + - OPERANDS[3] is a positive or negative shifted 12-bit immediate + - all operands have mode MODE + + Decide whether it is possible to implement the operation using: + + SUBS <tmp>, OPERANDS[1], -OPERANDS[3] + or + ADDS <tmp>, OPERANDS[1], OPERANDS[3] + + followed by: + + <insn> OPERANDS[0], <tmp>, [wx]zr, <cond> + + where <insn> is one of CSEL, CSINV or CSINC. Return true if so. + If GENERATE_P is true, also update OPERANDS as follows: + + OPERANDS[4] = -OPERANDS[3] + OPERANDS[5] = the rtl condition representing <cond> + OPERANDS[6] = <tmp> + OPERANDS[7] = 0 for CSEL, -1 for CSINV or 1 for CSINC. */ +bool +aarch64_maxmin_plus_const (rtx_code code, rtx *operands, bool generate_p) +{ + signop sgn = (code == UMAX || code == UMIN ? UNSIGNED : SIGNED); + rtx dst = operands[0]; + rtx maxmin_op = operands[2]; + rtx add_op = operands[3]; + machine_mode mode = GET_MODE (dst); + + /* max (x, y) - z == (x >= y + 1 ? x : y) - z + == (x >= y ? x : y) - z + == (x > y ? x : y) - z + == (x > y - 1 ? x : y) - z + + min (x, y) - z == (x <= y - 1 ? x : y) - z + == (x <= y ? x : y) - z + == (x < y ? x : y) - z + == (x < y + 1 ? x : y) - z + + Check whether z is in { y - 1, y, y + 1 } and pick the form(s) for + which x is compared with z. Set DIFF to y - z. Thus the supported + combinations are as follows, with DIFF being the value after the ":": + + max (x, y) - z == x >= y + 1 ? x - (y + 1) : -1 [z == y + 1] + == x >= y ? x - y : 0 [z == y] + == x > y ? x - y : 0 [z == y] + == x > y - 1 ? x - (y - 1) : 1 [z == y - 1] + + min (x, y) - z == x <= y - 1 ? x - (y - 1) : 1 [z == y - 1] + == x <= y ? x - y : 0 [z == y] + == x < y ? x - y : 0 [z == y] + == x < y + 1 ? x - (y + 1) : -1 [z == y + 1]. */ + auto maxmin_val = rtx_mode_t (maxmin_op, mode); + auto add_val = rtx_mode_t (add_op, mode); + auto sub_val = wi::neg (add_val); + auto diff = wi::sub (maxmin_val, sub_val); + if (!(diff == 0 + || (diff == 1 && wi::gt_p (maxmin_val, sub_val, sgn)) + || (diff == -1 && wi::lt_p (maxmin_val, sub_val, sgn)))) + return false; + + if (!generate_p) + return true; + + rtx_code cmp; + switch (code) + { + case SMAX: + cmp = diff == 1 ? GT : GE; + break; + case UMAX: + cmp = diff == 1 ? GTU : GEU; + break; + case SMIN: + cmp = diff == -1 ? LT : LE; + break; + case UMIN: + cmp = diff == -1 ? LTU : LEU; + break; + default: + gcc_unreachable (); + } + rtx cc = gen_rtx_REG (CCmode, CC_REGNUM); + + operands[4] = immed_wide_int_const (sub_val, mode); + operands[5] = gen_rtx_fmt_ee (cmp, VOIDmode, cc, const0_rtx); + if (can_create_pseudo_p ()) + operands[6] = gen_reg_rtx (mode); + else + operands[6] = dst; + operands[7] = immed_wide_int_const (diff, mode); + + return true; +} + + /* Build the SYMBOL_REF for __tls_get_addr. */ static GTY(()) rtx tls_get_addr_libfunc; @@ -4239,23 +4344,6 @@ aarch64_split_128bit_move_p (rtx dst, rtx src) return true; } -/* Split a complex SIMD combine. */ - -void -aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) -{ - machine_mode src_mode = GET_MODE (src1); - machine_mode dst_mode = GET_MODE (dst); - - gcc_assert (VECTOR_MODE_P (dst_mode)); - gcc_assert (register_operand (dst, dst_mode) - && register_operand (src1, src_mode) - && register_operand (src2, src_mode)); - - emit_insn (gen_aarch64_simd_combine (src_mode, dst, src1, src2)); - return; -} - /* Split a complex SIMD move. */ void @@ -7460,8 +7548,8 @@ aarch64_layout_frame (void) #define SLOT_NOT_REQUIRED (-2) #define SLOT_REQUIRED (-1) - frame.wb_candidate1 = INVALID_REGNUM; - frame.wb_candidate2 = INVALID_REGNUM; + frame.wb_push_candidate1 = INVALID_REGNUM; + frame.wb_push_candidate2 = INVALID_REGNUM; frame.spare_pred_reg = INVALID_REGNUM; /* First mark all the registers that really need to be saved... */ @@ -7576,9 +7664,9 @@ aarch64_layout_frame (void) { /* FP and LR are placed in the linkage record. */ frame.reg_offset[R29_REGNUM] = offset; - frame.wb_candidate1 = R29_REGNUM; + frame.wb_push_candidate1 = R29_REGNUM; frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD; - frame.wb_candidate2 = R30_REGNUM; + frame.wb_push_candidate2 = R30_REGNUM; offset += 2 * UNITS_PER_WORD; } @@ -7586,10 +7674,10 @@ aarch64_layout_frame (void) if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED)) { frame.reg_offset[regno] = offset; - if (frame.wb_candidate1 == INVALID_REGNUM) - frame.wb_candidate1 = regno; - else if (frame.wb_candidate2 == INVALID_REGNUM) - frame.wb_candidate2 = regno; + if (frame.wb_push_candidate1 == INVALID_REGNUM) + frame.wb_push_candidate1 = regno; + else if (frame.wb_push_candidate2 == INVALID_REGNUM) + frame.wb_push_candidate2 = regno; offset += UNITS_PER_WORD; } @@ -7612,11 +7700,11 @@ aarch64_layout_frame (void) } frame.reg_offset[regno] = offset; - if (frame.wb_candidate1 == INVALID_REGNUM) - frame.wb_candidate1 = regno; - else if (frame.wb_candidate2 == INVALID_REGNUM - && frame.wb_candidate1 >= V0_REGNUM) - frame.wb_candidate2 = regno; + if (frame.wb_push_candidate1 == INVALID_REGNUM) + frame.wb_push_candidate1 = regno; + else if (frame.wb_push_candidate2 == INVALID_REGNUM + && frame.wb_push_candidate1 >= V0_REGNUM) + frame.wb_push_candidate2 = regno; offset += vector_save_size; } @@ -7647,10 +7735,38 @@ aarch64_layout_frame (void) frame.sve_callee_adjust = 0; frame.callee_offset = 0; + frame.wb_pop_candidate1 = frame.wb_push_candidate1; + frame.wb_pop_candidate2 = frame.wb_push_candidate2; + + /* Shadow call stack only deals with functions where the LR is pushed + onto the stack and without specifying the "no_sanitize" attribute + with the argument "shadow-call-stack". */ + frame.is_scs_enabled + = (!crtl->calls_eh_return + && sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK) + && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0)); + + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, and we don't need to pop x30 again in the traditional + way. Pop candidates record the registers that need to be popped + eventually. */ + if (frame.is_scs_enabled) + { + if (frame.wb_pop_candidate2 == R30_REGNUM) + frame.wb_pop_candidate2 = INVALID_REGNUM; + else if (frame.wb_pop_candidate1 == R30_REGNUM) + frame.wb_pop_candidate1 = INVALID_REGNUM; + } + + /* If candidate2 is INVALID_REGNUM, we need to adjust max_push_offset to + 256 to ensure that the offset meets the requirements of emit_move_insn. + Similarly, if candidate1 is INVALID_REGNUM, we need to set + max_push_offset to 0, because no registers are popped at this time, + so callee_adjust cannot be adjusted. */ HOST_WIDE_INT max_push_offset = 0; - if (frame.wb_candidate2 != INVALID_REGNUM) + if (frame.wb_pop_candidate2 != INVALID_REGNUM) max_push_offset = 512; - else if (frame.wb_candidate1 != INVALID_REGNUM) + else if (frame.wb_pop_candidate1 != INVALID_REGNUM) max_push_offset = 256; HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset; @@ -7740,8 +7856,8 @@ aarch64_layout_frame (void) { /* We've decided not to associate any register saves with the initial stack allocation. */ - frame.wb_candidate1 = INVALID_REGNUM; - frame.wb_candidate2 = INVALID_REGNUM; + frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM; + frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM; } frame.laid_out = true; @@ -8054,8 +8170,8 @@ aarch64_save_callee_saves (poly_int64 start_offset, bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno); if (skip_wb - && (regno == cfun->machine->frame.wb_candidate1 - || regno == cfun->machine->frame.wb_candidate2)) + && (regno == cfun->machine->frame.wb_push_candidate1 + || regno == cfun->machine->frame.wb_push_candidate2)) continue; if (cfun->machine->reg_is_wrapped_separately[regno]) @@ -8165,8 +8281,8 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start, rtx reg, mem; if (skip_wb - && (regno == cfun->machine->frame.wb_candidate1 - || regno == cfun->machine->frame.wb_candidate2)) + && (regno == cfun->machine->frame.wb_pop_candidate1 + || regno == cfun->machine->frame.wb_pop_candidate2)) continue; machine_mode mode = aarch64_reg_save_mode (regno); @@ -8337,8 +8453,8 @@ aarch64_get_separate_components (void) if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM) bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg); - unsigned reg1 = cfun->machine->frame.wb_candidate1; - unsigned reg2 = cfun->machine->frame.wb_candidate2; + unsigned reg1 = cfun->machine->frame.wb_push_candidate1; + unsigned reg2 = cfun->machine->frame.wb_push_candidate2; /* If registers have been chosen to be stored/restored with writeback don't interfere with them to avoid having to output explicit stack adjustment instructions. */ @@ -8947,8 +9063,8 @@ aarch64_expand_prologue (void) poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; poly_int64 below_hard_fp_saved_regs_size = cfun->machine->frame.below_hard_fp_saved_regs_size; - unsigned reg1 = cfun->machine->frame.wb_candidate1; - unsigned reg2 = cfun->machine->frame.wb_candidate2; + unsigned reg1 = cfun->machine->frame.wb_push_candidate1; + unsigned reg2 = cfun->machine->frame.wb_push_candidate2; bool emit_frame_chain = cfun->machine->frame.emit_frame_chain; rtx_insn *insn; @@ -8979,6 +9095,10 @@ aarch64_expand_prologue (void) RTX_FRAME_RELATED_P (insn) = 1; } + /* Push return address to shadow call stack. */ + if (cfun->machine->frame.is_scs_enabled) + emit_insn (gen_scs_push ()); + if (flag_stack_usage_info) current_function_static_stack_size = constant_lower_bound (frame_size); @@ -9125,8 +9245,10 @@ aarch64_expand_epilogue (bool for_sibcall) poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust; poly_int64 below_hard_fp_saved_regs_size = cfun->machine->frame.below_hard_fp_saved_regs_size; - unsigned reg1 = cfun->machine->frame.wb_candidate1; - unsigned reg2 = cfun->machine->frame.wb_candidate2; + unsigned reg1 = cfun->machine->frame.wb_pop_candidate1; + unsigned reg2 = cfun->machine->frame.wb_pop_candidate2; + unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled + ? R29_REGNUM : R30_REGNUM); rtx cfi_ops = NULL; rtx_insn *insn; /* A stack clash protection prologue may not have left EP0_REGNUM or @@ -9196,8 +9318,12 @@ aarch64_expand_epilogue (bool for_sibcall) false, &cfi_ops); if (maybe_ne (sve_callee_adjust, 0)) aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true); + + /* When shadow call stack is enabled, the scs_pop in the epilogue will + restore x30, we don't need to restore x30 again in the traditional + way. */ aarch64_restore_callee_saves (callee_offset - sve_callee_adjust, - R0_REGNUM, R30_REGNUM, + R0_REGNUM, last_gpr, callee_adjust != 0, &cfi_ops); if (need_barrier_p) @@ -9235,6 +9361,17 @@ aarch64_expand_epilogue (bool for_sibcall) RTX_FRAME_RELATED_P (insn) = 1; } + /* Pop return address from shadow call stack. */ + if (cfun->machine->frame.is_scs_enabled) + { + machine_mode mode = aarch64_reg_save_mode (R30_REGNUM); + rtx reg = gen_rtx_REG (mode, R30_REGNUM); + + insn = emit_insn (gen_scs_pop ()); + add_reg_note (insn, REG_CFA_RESTORE, reg); + RTX_FRAME_RELATED_P (insn) = 1; + } + /* We prefer to emit the combined return/authenticate instruction RETAA, however there are three cases in which we must instead emit an explicit authentication instruction. @@ -9939,9 +10076,15 @@ aarch64_classify_address (struct aarch64_address_info *info, /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode corresponds to the actual size of the memory being loaded/stored and the mode of the corresponding addressing mode is half of that. */ - if (type == ADDR_QUERY_LDP_STP_N - && known_eq (GET_MODE_SIZE (mode), 16)) - mode = DFmode; + if (type == ADDR_QUERY_LDP_STP_N) + { + if (known_eq (GET_MODE_SIZE (mode), 16)) + mode = DFmode; + else if (known_eq (GET_MODE_SIZE (mode), 8)) + mode = SFmode; + else + return false; + } bool allow_reg_index_p = (!load_store_pair_p && ((vec_flags == 0 @@ -11421,7 +11564,9 @@ aarch64_print_operand (FILE *f, rtx x, int code) machine_mode mode = GET_MODE (x); if (!MEM_P (x) - || (code == 'y' && maybe_ne (GET_MODE_SIZE (mode), 16))) + || (code == 'y' + && maybe_ne (GET_MODE_SIZE (mode), 8) + && maybe_ne (GET_MODE_SIZE (mode), 16))) { output_operand_lossage ("invalid operand for '%%%c'", code); return; @@ -14913,7 +15058,7 @@ public: aarch64_vector_costs (vec_info *, bool); unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, + stmt_vec_info stmt_info, slp_tree, tree vectype, int misalign, vect_cost_model_location where) override; void finish_cost (const vector_costs *) override; @@ -14941,6 +15086,31 @@ private: - If M_VEC_FLAGS & VEC_ANY_SVE is nonzero then we're costing SVE code. */ unsigned int m_vec_flags = 0; + /* At the moment, we do not model LDP and STP in the vector and scalar costs. + This means that code such as: + + a[0] = x; + a[1] = x; + + will be costed as two scalar instructions and two vector instructions + (a scalar_to_vec and an unaligned_store). For SLP, the vector form + wins if the costs are equal, because of the fact that the vector costs + include constant initializations whereas the scalar costs don't. + We would therefore tend to vectorize the code above, even though + the scalar version can use a single STP. + + We should eventually fix this and model LDP and STP in the main costs; + see the comment in aarch64_sve_adjust_stmt_cost for some of the problems. + Until then, we look specifically for code that does nothing more than + STP-like operations. We cost them on that basis in addition to the + normal latency-based costs. + + If the scalar or vector code could be a sequence of STPs + + initialization, this variable counts the cost of the sequence, + with 2 units per instruction. The variable is ~0U for other + kinds of code. */ + unsigned int m_stp_sequence_cost = 0; + /* On some CPUs, SVE and Advanced SIMD provide the same theoretical vector throughput, such as 4x128 Advanced SIMD vs. 2x256 SVE. In those situations, we try to predict whether an Advanced SIMD implementation @@ -15733,10 +15903,108 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, } } +/* Return true if STMT_INFO contains a memory access and if the constant + component of the memory address is aligned to SIZE bytes. */ +static bool +aarch64_aligned_constant_offset_p (stmt_vec_info stmt_info, + poly_uint64 size) +{ + if (!STMT_VINFO_DATA_REF (stmt_info)) + return false; + + if (auto first_stmt = DR_GROUP_FIRST_ELEMENT (stmt_info)) + stmt_info = first_stmt; + tree constant_offset = DR_INIT (STMT_VINFO_DATA_REF (stmt_info)); + /* Needed for gathers & scatters, for example. */ + if (!constant_offset) + return false; + + return multiple_p (wi::to_poly_offset (constant_offset), size); +} + +/* Check if a scalar or vector stmt could be part of a region of code + that does nothing more than store values to memory, in the scalar + case using STP. Return the cost of the stmt if so, counting 2 for + one instruction. Return ~0U otherwise. + + The arguments are a subset of those passed to add_stmt_cost. */ +unsigned int +aarch64_stp_sequence_cost (unsigned int count, vect_cost_for_stmt kind, + stmt_vec_info stmt_info, tree vectype) +{ + /* Code that stores vector constants uses a vector_load to create + the constant. We don't apply the heuristic to that case for two + main reasons: + + - At the moment, STPs are only formed via peephole2, and the + constant scalar moves would often come between STRs and so + prevent STP formation. + + - The scalar code also has to load the constant somehow, and that + isn't costed. */ + switch (kind) + { + case scalar_to_vec: + /* Count 2 insns for a GPR->SIMD dup and 1 insn for a FPR->SIMD dup. */ + return (FLOAT_TYPE_P (vectype) ? 2 : 4) * count; + + case vec_construct: + if (FLOAT_TYPE_P (vectype)) + /* Count 1 insn for the maximum number of FP->SIMD INS + instructions. */ + return (vect_nunits_for_cost (vectype) - 1) * 2 * count; + + /* Count 2 insns for a GPR->SIMD move and 2 insns for the + maximum number of GPR->SIMD INS instructions. */ + return vect_nunits_for_cost (vectype) * 4 * count; + + case vector_store: + case unaligned_store: + /* Count 1 insn per vector if we can't form STP Q pairs. */ + if (aarch64_sve_mode_p (TYPE_MODE (vectype))) + return count * 2; + if (aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) + return count * 2; + + if (stmt_info) + { + /* Assume we won't be able to use STP if the constant offset + component of the address is misaligned. ??? This could be + removed if we formed STP pairs earlier, rather than relying + on peephole2. */ + auto size = GET_MODE_SIZE (TYPE_MODE (vectype)); + if (!aarch64_aligned_constant_offset_p (stmt_info, size)) + return count * 2; + } + return CEIL (count, 2) * 2; + + case scalar_store: + if (stmt_info && STMT_VINFO_DATA_REF (stmt_info)) + { + /* Check for a mode in which STP pairs can be formed. */ + auto size = GET_MODE_SIZE (TYPE_MODE (aarch64_dr_type (stmt_info))); + if (maybe_ne (size, 4) && maybe_ne (size, 8)) + return ~0U; + + /* Assume we won't be able to use STP if the constant offset + component of the address is misaligned. ??? This could be + removed if we formed STP pairs earlier, rather than relying + on peephole2. */ + if (!aarch64_aligned_constant_offset_p (stmt_info, size)) + return ~0U; + } + return count; + + default: + return ~0U; + } +} + unsigned aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, - int misalign, + stmt_vec_info stmt_info, slp_tree, + tree vectype, int misalign, vect_cost_model_location where) { fractional_cost stmt_cost @@ -15756,6 +16024,14 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, m_analyzed_vinfo = true; } + /* Apply the heuristic described above m_stp_sequence_cost. */ + if (m_stp_sequence_cost != ~0U) + { + uint64_t cost = aarch64_stp_sequence_cost (count, kind, + stmt_info, vectype); + m_stp_sequence_cost = MIN (m_stp_sequence_cost + cost, ~0U); + } + /* Try to get a more accurate cost by looking at STMT_INFO instead of just looking at KIND. */ if (stmt_info && aarch64_use_new_vector_costs_p ()) @@ -16026,6 +16302,15 @@ aarch64_vector_costs::finish_cost (const vector_costs *uncast_scalar_costs) m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs, m_costs[vect_body]); + /* Apply the heuristic described above m_stp_sequence_cost. Prefer + the scalar code in the event of a tie, since there is more chance + of scalar code being optimized with surrounding operations. */ + if (!loop_vinfo + && scalar_costs + && m_stp_sequence_cost != ~0U + && m_stp_sequence_cost >= scalar_costs->m_stp_sequence_cost) + m_costs[vect_body] = 2 * scalar_costs->total_cost (); + vector_costs::finish_cost (scalar_costs); } @@ -16643,6 +16928,10 @@ aarch64_override_options_internal (struct gcc_options *opts) aarch64_stack_protector_guard_offset = offs; } + if ((flag_sanitize & SANITIZE_SHADOW_CALL_STACK) + && !fixed_regs[R18_REGNUM]) + error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>"); + initialize_aarch64_code_model (opts); initialize_aarch64_tls_size (opts); @@ -20941,37 +21230,13 @@ aarch64_expand_vector_init (rtx target, rtx vals) of mode N in VALS and we must put their concatentation into TARGET. */ if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0)))) { - gcc_assert (known_eq (GET_MODE_SIZE (mode), - 2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0))))); - rtx lo = XVECEXP (vals, 0, 0); - rtx hi = XVECEXP (vals, 0, 1); - machine_mode narrow_mode = GET_MODE (lo); - gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode); - gcc_assert (narrow_mode == GET_MODE (hi)); - - /* When we want to concatenate a half-width vector with zeroes we can - use the aarch64_combinez[_be] patterns. Just make sure that the - zeroes are in the right half. */ - if (BYTES_BIG_ENDIAN - && aarch64_simd_imm_zero (lo, narrow_mode) - && general_operand (hi, narrow_mode)) - emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo)); - else if (!BYTES_BIG_ENDIAN - && aarch64_simd_imm_zero (hi, narrow_mode) - && general_operand (lo, narrow_mode)) - emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi)); - else - { - /* Else create the two half-width registers and combine them. */ - if (!REG_P (lo)) - lo = force_reg (GET_MODE (lo), lo); - if (!REG_P (hi)) - hi = force_reg (GET_MODE (hi), hi); - - if (BYTES_BIG_ENDIAN) - std::swap (lo, hi); - emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi)); - } + machine_mode narrow_mode = GET_MODE (XVECEXP (vals, 0, 0)); + gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode + && known_eq (GET_MODE_SIZE (mode), + 2 * GET_MODE_SIZE (narrow_mode))); + emit_insn (gen_aarch64_vec_concat (narrow_mode, target, + XVECEXP (vals, 0, 0), + XVECEXP (vals, 0, 1))); return; } @@ -21063,11 +21328,7 @@ aarch64_expand_vector_init (rtx target, rtx vals) for store_pair_lanes<mode>. */ if (memory_operand (x0, inner_mode) && memory_operand (x1, inner_mode) - && !STRICT_ALIGNMENT - && rtx_equal_p (XEXP (x1, 0), - plus_constant (Pmode, - XEXP (x0, 0), - GET_MODE_SIZE (inner_mode)))) + && aarch64_mergeable_load_pair_p (mode, x0, x1)) { rtx t; if (inner_mode == DFmode) @@ -21201,7 +21462,7 @@ aarch64_sve_expand_vector_init_handle_trailing_constants { rtx x = builder.elt (i + nelts_reqd - n_trailing_constants); if (!valid_for_const_vector_p (elem_mode, x)) - x = const0_rtx; + x = CONST0_RTX (elem_mode); v.quick_push (x); } rtx const_vec = v.build (); @@ -24687,14 +24948,20 @@ aarch64_sched_adjust_priority (rtx_insn *insn, int priority) return priority; } -/* Check if *MEM1 and *MEM2 are consecutive memory references and, +/* If REVERSED is null, return true if memory reference *MEM2 comes + immediately after memory reference *MEM1. Do not change the references + in this case. + + Otherwise, check if *MEM1 and *MEM2 are consecutive memory references and, if they are, try to make them use constant offsets from the same base register. Return true on success. When returning true, set *REVERSED to true if *MEM1 comes after *MEM2, false if *MEM1 comes before *MEM2. */ static bool aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) { - *reversed = false; + if (reversed) + *reversed = false; + if (GET_RTX_CLASS (GET_CODE (XEXP (*mem1, 0))) == RTX_AUTOINC || GET_RTX_CLASS (GET_CODE (XEXP (*mem2, 0))) == RTX_AUTOINC) return false; @@ -24723,7 +24990,7 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) if (known_eq (UINTVAL (offset1) + size1, UINTVAL (offset2))) return true; - if (known_eq (UINTVAL (offset2) + size2, UINTVAL (offset1))) + if (known_eq (UINTVAL (offset2) + size2, UINTVAL (offset1)) && reversed) { *reversed = true; return true; @@ -24756,22 +25023,25 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) if (known_eq (expr_offset1 + size1, expr_offset2)) ; - else if (known_eq (expr_offset2 + size2, expr_offset1)) + else if (known_eq (expr_offset2 + size2, expr_offset1) && reversed) *reversed = true; else return false; - if (base2) + if (reversed) { - rtx addr1 = plus_constant (Pmode, XEXP (*mem2, 0), - expr_offset1 - expr_offset2); - *mem1 = replace_equiv_address_nv (*mem1, addr1); - } - else - { - rtx addr2 = plus_constant (Pmode, XEXP (*mem1, 0), - expr_offset2 - expr_offset1); - *mem2 = replace_equiv_address_nv (*mem2, addr2); + if (base2) + { + rtx addr1 = plus_constant (Pmode, XEXP (*mem2, 0), + expr_offset1 - expr_offset2); + *mem1 = replace_equiv_address_nv (*mem1, addr1); + } + else + { + rtx addr2 = plus_constant (Pmode, XEXP (*mem1, 0), + expr_offset2 - expr_offset1); + *mem2 = replace_equiv_address_nv (*mem2, addr2); + } } return true; } @@ -24779,6 +25049,17 @@ aarch64_check_consecutive_mems (rtx *mem1, rtx *mem2, bool *reversed) return false; } +/* Return true if MEM1 and MEM2 can be combined into a single access + of mode MODE, with the combined access having the same address as MEM1. */ + +bool +aarch64_mergeable_load_pair_p (machine_mode mode, rtx mem1, rtx mem2) +{ + if (STRICT_ALIGNMENT && MEM_ALIGN (mem1) < GET_MODE_ALIGNMENT (mode)) + return false; + return aarch64_check_consecutive_mems (&mem1, &mem2, nullptr); +} + /* Given OPERANDS of consecutive load/store, check if we can merge them into ldp/stp. LOAD is true if they are load instructions. MODE is the mode of memory operands. */ @@ -26857,6 +27138,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_ASM_FUNCTION_EPILOGUE #define TARGET_ASM_FUNCTION_EPILOGUE aarch64_sls_emit_blr_function_thunks +#undef TARGET_HAVE_SHADOW_CALL_STACK +#define TARGET_HAVE_SHADOW_CALL_STACK true + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-aarch64.h" diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index dddf133..27ba4f4 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -922,9 +922,21 @@ struct GTY (()) aarch64_frame Indicated by CALLEE_ADJUST == 0 && EMIT_FRAME_CHAIN. These fields indicate which registers we've decided to handle using - (1) or (2), or INVALID_REGNUM if none. */ - unsigned wb_candidate1; - unsigned wb_candidate2; + (1) or (2), or INVALID_REGNUM if none. + + In some cases we don't always need to pop all registers in the push + candidates, pop candidates record which registers need to be popped + eventually. The initial value of a pop candidate is copied from its + corresponding push candidate. + + Currently, different pop candidates are only used for shadow call + stack. When "-fsanitize=shadow-call-stack" is specified, we replace + x30 in the pop candidate with INVALID_REGNUM to ensure that x30 is + not popped twice. */ + unsigned wb_push_candidate1; + unsigned wb_push_candidate2; + unsigned wb_pop_candidate1; + unsigned wb_pop_candidate2; /* Big-endian SVE frames need a spare predicate register in order to save vector registers in the correct layout for unwinding. @@ -932,6 +944,9 @@ struct GTY (()) aarch64_frame unsigned spare_pred_reg; bool laid_out; + + /* True if shadow call stack should be enabled for the current function. */ + bool is_scs_enabled; }; typedef struct GTY (()) machine_function diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 3c72bda..c985250 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4405,6 +4405,33 @@ } ) +;; Implement MAX/MIN (A, B) - C using SUBS/ADDS followed by CSEL/CSINV/CSINC. +;; See aarch64_maxmin_plus_const for details about the supported cases. +(define_insn_and_split "*aarch64_minmax_plus" + [(set (match_operand:GPI 0 "register_operand" "=r") + (plus:GPI + (MAXMIN:GPI + (match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "const_int_operand")) + (match_operand:GPI 3 "aarch64_plus_immediate"))) + (clobber (reg:CC CC_REGNUM))] + "aarch64_maxmin_plus_const (<CODE>, operands, false)" + "#" + "&& 1" + [(parallel + [(set (reg:CC CC_REGNUM) + (compare:CC (match_dup 1) (match_dup 4))) + (set (match_dup 6) + (plus:GPI (match_dup 1) (match_dup 3)))]) + (set (match_dup 0) + (if_then_else:GPI (match_dup 5) (match_dup 6) (match_dup 7)))] + { + if (!aarch64_maxmin_plus_const (<CODE>, operands, true)) + gcc_unreachable (); + } + [(set_attr "length" "8")] +) + ;; ------------------------------------------------------------------- ;; Logical operations ;; ------------------------------------------------------------------- @@ -4531,7 +4558,7 @@ (define_split [(set (match_operand:GPI 0 "register_operand") - (LOGICAL:GPI + (LOGICAL_OR_PLUS:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand") (match_operand:QI 2 "aarch64_shift_imm_<mode>")) (match_operand:GPI 3 "const_int_operand")) @@ -4544,16 +4571,23 @@ && REGNO (operands[1]) == REGNO (operands[4]))) && (trunc_int_for_mode (GET_MODE_MASK (GET_MODE (operands[4])) << INTVAL (operands[2]), <MODE>mode) - == INTVAL (operands[3]))" + == INTVAL (operands[3])) + && (<CODE> != PLUS + || (GET_MODE_MASK (GET_MODE (operands[4])) + & INTVAL (operands[3])) == 0)" [(set (match_dup 5) (zero_extend:GPI (match_dup 4))) - (set (match_dup 0) (LOGICAL:GPI (ashift:GPI (match_dup 5) (match_dup 2)) - (match_dup 5)))] - "operands[5] = gen_reg_rtx (<MODE>mode);" + (set (match_dup 0) (match_dup 6))] + { + operands[5] = gen_reg_rtx (<MODE>mode); + rtx shift = gen_rtx_ASHIFT (<MODE>mode, operands[5], operands[2]); + rtx_code new_code = (<CODE> == PLUS ? IOR : <CODE>); + operands[6] = gen_rtx_fmt_ee (new_code, <MODE>mode, shift, operands[5]); + } ) (define_split [(set (match_operand:GPI 0 "register_operand") - (LOGICAL:GPI + (LOGICAL_OR_PLUS:GPI (and:GPI (ashift:GPI (match_operand:GPI 1 "register_operand") (match_operand:QI 2 "aarch64_shift_imm_<mode>")) (match_operand:GPI 4 "const_int_operand")) @@ -4562,11 +4596,17 @@ && pow2_or_zerop (UINTVAL (operands[3]) + 1) && (trunc_int_for_mode (UINTVAL (operands[3]) << INTVAL (operands[2]), <MODE>mode) - == INTVAL (operands[4]))" + == INTVAL (operands[4])) + && (<CODE> != PLUS + || (INTVAL (operands[4]) & INTVAL (operands[3])) == 0)" [(set (match_dup 5) (and:GPI (match_dup 1) (match_dup 3))) - (set (match_dup 0) (LOGICAL:GPI (ashift:GPI (match_dup 5) (match_dup 2)) - (match_dup 5)))] - "operands[5] = gen_reg_rtx (<MODE>mode);" + (set (match_dup 0) (match_dup 6))] + { + operands[5] = gen_reg_rtx (<MODE>mode); + rtx shift = gen_rtx_ASHIFT (<MODE>mode, operands[5], operands[2]); + rtx_code new_code = (<CODE> == PLUS ? IOR : <CODE>); + operands[6] = gen_rtx_fmt_ee (new_code, <MODE>mode, shift, operands[5]); + } ) (define_split @@ -7053,6 +7093,16 @@ "hint\t7 // xpaclri" ) +;; Save X30 in the X18-based POST_INC stack (consistent with clang). +(define_expand "scs_push" + [(set (mem:DI (post_inc:DI (reg:DI R18_REGNUM))) + (reg:DI R30_REGNUM))]) + +;; Load X30 form the X18-based PRE_DEC stack (consistent with clang). +(define_expand "scs_pop" + [(set (reg:DI R30_REGNUM) + (mem:DI (pre_dec:DI (reg:DI R18_REGNUM))))]) + ;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and ;; all of memory. This blocks insns from being moved across this point. diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 9160ce3..e72fdf35 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -187,11 +187,6 @@ (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) -;; All Advanced SIMD modes suitable for moving, loading, and storing, -;; including special Bfloat vector types. -(define_mode_iterator VALL_F16MOV [V8QI V16QI V4HI V8HI V2SI V4SI V2DI - V4HF V8HF V4BF V8BF V2SF V4SF V2DF]) - ;; The VALL_F16 modes except the 128-bit 2-element ones. (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI V4HF V8HF V2SF V4SF]) @@ -241,6 +236,9 @@ ;; Double vector modes for combines. (define_mode_iterator VDC [V8QI V4HI V4BF V4HF V2SI V2SF DI DF]) +;; VDC plus SI and SF. +(define_mode_iterator VDCSIF [V8QI V4HI V4BF V4HF V2SI V2SF SI SF DI DF]) + ;; Polynomial modes for vector combines. (define_mode_iterator VDC_P [V8QI V4HI DI]) @@ -1441,8 +1439,8 @@ (define_mode_attr VDBL [(V8QI "V16QI") (V4HI "V8HI") (V4HF "V8HF") (V4BF "V8BF") (V2SI "V4SI") (V2SF "V4SF") - (SI "V2SI") (DI "V2DI") - (DF "V2DF")]) + (SI "V2SI") (SF "V2SF") + (DI "V2DI") (DF "V2DF")]) ;; Register suffix for double-length mode. (define_mode_attr Vdtype [(V4HF "8h") (V2SF "4s")]) @@ -1562,6 +1560,30 @@ (V4SI "2s") (V8HF "4h") (V4SF "2s")]) +;; Whether a mode fits in W or X registers (i.e. "w" for 32-bit modes +;; and "x" for 64-bit modes). +(define_mode_attr single_wx [(SI "w") (SF "w") + (V8QI "x") (V4HI "x") + (V4HF "x") (V4BF "x") + (V2SI "x") (V2SF "x") + (DI "x") (DF "x")]) + +;; Whether a mode fits in S or D registers (i.e. "s" for 32-bit modes +;; and "d" for 64-bit modes). +(define_mode_attr single_type [(SI "s") (SF "s") + (V8QI "d") (V4HI "d") + (V4HF "d") (V4BF "d") + (V2SI "d") (V2SF "d") + (DI "d") (DF "d")]) + +;; Whether a double-width mode fits in D or Q registers (i.e. "d" for +;; 32-bit modes and "q" for 64-bit modes). +(define_mode_attr single_dtype [(SI "d") (SF "d") + (V8QI "q") (V4HI "q") + (V4HF "q") (V4BF "q") + (V2SI "q") (V2SF "q") + (DI "q") (DF "q")]) + ;; Define corresponding core/FP element mode for each vector mode. (define_mode_attr vw [(V8QI "w") (V16QI "w") (V4HI "w") (V8HI "w") @@ -1854,6 +1876,13 @@ (V4x1DF "") (V4x2DF "_q") (V4x4BF "") (V4x8BF "_q")]) +;; Equivalent of the "q" attribute for the <VDBL> mode. +(define_mode_attr dblq [(SI "") (SF "") + (V8QI "_q") (V4HI "_q") + (V4HF "_q") (V4BF "_q") + (V2SI "_q") (V2SF "_q") + (DI "_q") (DF "_q")]) + (define_mode_attr vp [(V8QI "v") (V16QI "v") (V4HI "v") (V8HI "v") (V2SI "p") (V4SI "v") @@ -2093,6 +2122,9 @@ ;; Code iterator for logical operations (define_code_iterator LOGICAL [and ior xor]) +;; LOGICAL with plus, for when | gets converted to +. +(define_code_iterator LOGICAL_OR_PLUS [and ior xor plus]) + ;; LOGICAL without AND. (define_code_iterator LOGICAL_OR [ior xor]) diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 7dc4c15..c308015 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -254,6 +254,10 @@ false, ADDR_QUERY_LDP_STP_N)"))) +(define_predicate "aarch64_reg_or_mem_pair_operand" + (ior (match_operand 0 "register_operand") + (match_operand 0 "aarch64_mem_pair_lanes_operand"))) + (define_predicate "aarch64_prefetch_operand" (match_test "aarch64_address_valid_for_prefetch_p (op, false)")) diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index 8cc1735..fbc17e6 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -2256,7 +2256,14 @@ gen_compare_reg (rtx comparison, machine_mode omode) cmode = GET_MODE (x); if (cmode == VOIDmode) cmode = GET_MODE (y); - gcc_assert (cmode == SImode || cmode == SFmode || cmode == DFmode); + + /* If ifcvt passed us a MODE_CC comparison we can + just return it. It should be in the proper form already. */ + if (GET_MODE_CLASS (cmode) == MODE_CC) + return comparison; + + if (cmode != SImode && cmode != SFmode && cmode != DFmode) + return NULL_RTX; if (cmode == SImode) { if (!register_operand (x, SImode)) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index ace3cb7..39b3580 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -1618,8 +1618,11 @@ core_3, archs4x, archs4xd, archs4xd_slow" (match_operand:SI 2 "nonmemory_operand" "") (match_operand:SI 3 "register_operand" "")))] "" - "operands[1] = gen_compare_reg (operands[1], VOIDmode);") - + " + operands[1] = gen_compare_reg (operands[1], VOIDmode); + if (operands[1] == NULL_RTX) + FAIL; + ") (define_expand "movdicc" [(set (match_operand:DI 0 "dest_reg_operand" "") @@ -1627,7 +1630,11 @@ core_3, archs4x, archs4xd, archs4xd_slow" (match_operand:DI 2 "nonmemory_operand" "") (match_operand:DI 3 "register_operand" "")))] "" - "operands[1] = gen_compare_reg (operands[1], VOIDmode);") + " + operands[1] = gen_compare_reg (operands[1], VOIDmode); + if (operands[1] == NULL_RTX) + FAIL; + ") (define_expand "movsfcc" @@ -1636,7 +1643,11 @@ core_3, archs4x, archs4xd, archs4xd_slow" (match_operand:SF 2 "nonmemory_operand" "") (match_operand:SF 3 "register_operand" "")))] "" - "operands[1] = gen_compare_reg (operands[1], VOIDmode);") + " + operands[1] = gen_compare_reg (operands[1], VOIDmode); + if (operands[1] == NULL_RTX) + FAIL; + ") (define_expand "movdfcc" [(set (match_operand:DF 0 "dest_reg_operand" "") @@ -1644,7 +1655,11 @@ core_3, archs4x, archs4xd, archs4xd_slow" (match_operand:DF 2 "nonmemory_operand" "") (match_operand:DF 3 "register_operand" "")))] "" - "operands[1] = gen_compare_reg (operands[1], VOIDmode);") + " + operands[1] = gen_compare_reg (operands[1], VOIDmode); + if (operands[1] == NULL_RTX) + FAIL; + ") (define_insn "*movsicc_insn" [(set (match_operand:SI 0 "dest_reg_operand" "=w,w") diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc index e6bbda2..a7acc1d 100644 --- a/gcc/config/arm/arm-builtins.cc +++ b/gcc/config/arm/arm-builtins.cc @@ -295,7 +295,7 @@ static enum arm_type_qualifiers arm_cx_unary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_immediate, qualifier_none, qualifier_unsigned_immediate, - qualifier_unsigned }; + qualifier_predicate }; #define CX_UNARY_UNONE_QUALIFIERS (arm_cx_unary_unone_qualifiers) /* T (immediate, T, T, unsigned immediate). */ @@ -304,7 +304,7 @@ arm_cx_binary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_immediate, qualifier_none, qualifier_none, qualifier_unsigned_immediate, - qualifier_unsigned }; + qualifier_predicate }; #define CX_BINARY_UNONE_QUALIFIERS (arm_cx_binary_unone_qualifiers) /* T (immediate, T, T, T, unsigned immediate). */ @@ -313,7 +313,7 @@ arm_cx_ternary_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_immediate, qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned_immediate, - qualifier_unsigned }; + qualifier_predicate }; #define CX_TERNARY_UNONE_QUALIFIERS (arm_cx_ternary_unone_qualifiers) /* The first argument (return type) of a store should be void type, @@ -421,6 +421,12 @@ arm_binop_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] (arm_binop_unone_unone_unone_qualifiers) static enum arm_type_qualifiers +arm_binop_pred_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned }; +#define BINOP_PRED_UNONE_UNONE_QUALIFIERS \ + (arm_binop_pred_unone_unone_qualifiers) + +static enum arm_type_qualifiers arm_binop_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_none, qualifier_immediate }; #define BINOP_UNONE_NONE_IMM_QUALIFIERS \ @@ -433,10 +439,10 @@ arm_binop_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] (arm_binop_none_none_unone_qualifiers) static enum arm_type_qualifiers -arm_binop_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_none, qualifier_none }; -#define BINOP_UNONE_NONE_NONE_QUALIFIERS \ - (arm_binop_unone_none_none_qualifiers) +arm_binop_pred_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_none, qualifier_none }; +#define BINOP_PRED_NONE_NONE_QUALIFIERS \ + (arm_binop_pred_none_none_qualifiers) static enum arm_type_qualifiers arm_binop_unone_unone_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] @@ -478,24 +484,24 @@ arm_ternop_unone_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] (arm_ternop_unone_unone_none_imm_qualifiers) static enum arm_type_qualifiers -arm_ternop_unone_unone_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_ternop_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_none, - qualifier_unsigned }; -#define TERNOP_UNONE_UNONE_NONE_UNONE_QUALIFIERS \ - (arm_ternop_unone_unone_none_unone_qualifiers) + qualifier_predicate }; +#define TERNOP_UNONE_UNONE_NONE_PRED_QUALIFIERS \ + (arm_ternop_unone_unone_none_pred_qualifiers) static enum arm_type_qualifiers -arm_ternop_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_ternop_unone_unone_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned }; -#define TERNOP_UNONE_UNONE_IMM_UNONE_QUALIFIERS \ - (arm_ternop_unone_unone_imm_unone_qualifiers) + qualifier_predicate }; +#define TERNOP_UNONE_UNONE_IMM_PRED_QUALIFIERS \ + (arm_ternop_unone_unone_imm_pred_qualifiers) static enum arm_type_qualifiers -arm_ternop_unone_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_none, qualifier_none, qualifier_unsigned }; -#define TERNOP_UNONE_NONE_NONE_UNONE_QUALIFIERS \ - (arm_ternop_unone_none_none_unone_qualifiers) +arm_ternop_pred_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_none, qualifier_none, qualifier_predicate }; +#define TERNOP_PRED_NONE_NONE_PRED_QUALIFIERS \ + (arm_ternop_pred_none_none_pred_qualifiers) static enum arm_type_qualifiers arm_ternop_none_none_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] @@ -504,22 +510,22 @@ arm_ternop_none_none_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS] (arm_ternop_none_none_none_imm_qualifiers) static enum arm_type_qualifiers -arm_ternop_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_none, qualifier_unsigned }; -#define TERNOP_NONE_NONE_NONE_UNONE_QUALIFIERS \ - (arm_ternop_none_none_none_unone_qualifiers) +arm_ternop_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_none, qualifier_predicate }; +#define TERNOP_NONE_NONE_NONE_PRED_QUALIFIERS \ + (arm_ternop_none_none_none_pred_qualifiers) static enum arm_type_qualifiers -arm_ternop_none_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_immediate, qualifier_unsigned }; -#define TERNOP_NONE_NONE_IMM_UNONE_QUALIFIERS \ - (arm_ternop_none_none_imm_unone_qualifiers) +arm_ternop_none_none_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_immediate, qualifier_predicate }; +#define TERNOP_NONE_NONE_IMM_PRED_QUALIFIERS \ + (arm_ternop_none_none_imm_pred_qualifiers) static enum arm_type_qualifiers -arm_ternop_none_none_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_unsigned }; -#define TERNOP_NONE_NONE_UNONE_UNONE_QUALIFIERS \ - (arm_ternop_none_none_unone_unone_qualifiers) +arm_ternop_none_none_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_predicate }; +#define TERNOP_NONE_NONE_UNONE_PRED_QUALIFIERS \ + (arm_ternop_none_none_unone_pred_qualifiers) static enum arm_type_qualifiers arm_ternop_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] @@ -529,66 +535,80 @@ arm_ternop_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] (arm_ternop_unone_unone_unone_unone_qualifiers) static enum arm_type_qualifiers +arm_ternop_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, + qualifier_predicate }; +#define TERNOP_UNONE_UNONE_UNONE_PRED_QUALIFIERS \ + (arm_ternop_unone_unone_unone_pred_qualifiers) + +static enum arm_type_qualifiers +arm_ternop_pred_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_predicate, qualifier_unsigned, qualifier_unsigned, + qualifier_predicate }; +#define TERNOP_PRED_UNONE_UNONE_PRED_QUALIFIERS \ + (arm_ternop_pred_unone_unone_pred_qualifiers) + +static enum arm_type_qualifiers arm_ternop_none_none_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_none }; #define TERNOP_NONE_NONE_NONE_NONE_QUALIFIERS \ (arm_ternop_none_none_none_none_qualifiers) static enum arm_type_qualifiers -arm_quadop_unone_unone_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_unone_unone_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_none, qualifier_none, - qualifier_unsigned }; -#define QUADOP_UNONE_UNONE_NONE_NONE_UNONE_QUALIFIERS \ - (arm_quadop_unone_unone_none_none_unone_qualifiers) + qualifier_predicate }; +#define QUADOP_UNONE_UNONE_NONE_NONE_PRED_QUALIFIERS \ + (arm_quadop_unone_unone_none_none_pred_qualifiers) static enum arm_type_qualifiers -arm_quadop_none_none_none_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_none_none_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_none, - qualifier_unsigned }; -#define QUADOP_NONE_NONE_NONE_NONE_UNONE_QUALIFIERS \ - (arm_quadop_none_none_none_none_unone_qualifiers) + qualifier_predicate }; +#define QUADOP_NONE_NONE_NONE_NONE_PRED_QUALIFIERS \ + (arm_quadop_none_none_none_none_pred_qualifiers) static enum arm_type_qualifiers -arm_quadop_none_none_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_none_none_none_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate, - qualifier_unsigned }; -#define QUADOP_NONE_NONE_NONE_IMM_UNONE_QUALIFIERS \ - (arm_quadop_none_none_none_imm_unone_qualifiers) + qualifier_predicate }; +#define QUADOP_NONE_NONE_NONE_IMM_PRED_QUALIFIERS \ + (arm_quadop_none_none_none_imm_pred_qualifiers) static enum arm_type_qualifiers -arm_quadop_unone_unone_unone_unone_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_unone_unone_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, - qualifier_unsigned, qualifier_unsigned }; -#define QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE_QUALIFIERS \ - (arm_quadop_unone_unone_unone_unone_unone_qualifiers) + qualifier_unsigned, qualifier_predicate }; +#define QUADOP_UNONE_UNONE_UNONE_UNONE_PRED_QUALIFIERS \ + (arm_quadop_unone_unone_unone_unone_pred_qualifiers) static enum arm_type_qualifiers -arm_quadop_unone_unone_none_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_unone_unone_none_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_none, - qualifier_immediate, qualifier_unsigned }; -#define QUADOP_UNONE_UNONE_NONE_IMM_UNONE_QUALIFIERS \ - (arm_quadop_unone_unone_none_imm_unone_qualifiers) + qualifier_immediate, qualifier_predicate }; +#define QUADOP_UNONE_UNONE_NONE_IMM_PRED_QUALIFIERS \ + (arm_quadop_unone_unone_none_imm_pred_qualifiers) static enum arm_type_qualifiers -arm_quadop_none_none_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_none_none_unone_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned }; -#define QUADOP_NONE_NONE_UNONE_IMM_UNONE_QUALIFIERS \ - (arm_quadop_none_none_unone_imm_unone_qualifiers) + qualifier_predicate }; +#define QUADOP_NONE_NONE_UNONE_IMM_PRED_QUALIFIERS \ + (arm_quadop_none_none_unone_imm_pred_qualifiers) static enum arm_type_qualifiers -arm_quadop_unone_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_unone_unone_unone_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, - qualifier_immediate, qualifier_unsigned }; -#define QUADOP_UNONE_UNONE_UNONE_IMM_UNONE_QUALIFIERS \ - (arm_quadop_unone_unone_unone_imm_unone_qualifiers) + qualifier_immediate, qualifier_predicate }; +#define QUADOP_UNONE_UNONE_UNONE_IMM_PRED_QUALIFIERS \ + (arm_quadop_unone_unone_unone_imm_pred_qualifiers) static enum arm_type_qualifiers -arm_quadop_unone_unone_unone_none_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quadop_unone_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, - qualifier_none, qualifier_unsigned }; -#define QUADOP_UNONE_UNONE_UNONE_NONE_UNONE_QUALIFIERS \ - (arm_quadop_unone_unone_unone_none_unone_qualifiers) + qualifier_none, qualifier_predicate }; +#define QUADOP_UNONE_UNONE_UNONE_NONE_PRED_QUALIFIERS \ + (arm_quadop_unone_unone_unone_none_pred_qualifiers) static enum arm_type_qualifiers arm_strs_qualifiers[SIMD_MAX_BUILTIN_ARGS] @@ -625,37 +645,37 @@ arm_strsbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] static enum arm_type_qualifiers arm_strs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_void, qualifier_pointer, qualifier_none, qualifier_unsigned}; + = { qualifier_void, qualifier_pointer, qualifier_none, qualifier_predicate}; #define STRS_P_QUALIFIERS (arm_strs_p_qualifiers) static enum arm_type_qualifiers arm_stru_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer, qualifier_unsigned, - qualifier_unsigned}; + qualifier_predicate}; #define STRU_P_QUALIFIERS (arm_stru_p_qualifiers) static enum arm_type_qualifiers arm_strsu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer, qualifier_unsigned, - qualifier_unsigned, qualifier_unsigned}; + qualifier_unsigned, qualifier_predicate}; #define STRSU_P_QUALIFIERS (arm_strsu_p_qualifiers) static enum arm_type_qualifiers arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_pointer, qualifier_unsigned, - qualifier_none, qualifier_unsigned}; + qualifier_none, qualifier_predicate}; #define STRSS_P_QUALIFIERS (arm_strss_p_qualifiers) static enum arm_type_qualifiers arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_unsigned, qualifier_immediate, - qualifier_none, qualifier_unsigned}; + qualifier_none, qualifier_predicate}; #define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers) static enum arm_type_qualifiers arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_void, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned, qualifier_unsigned}; + qualifier_unsigned, qualifier_predicate}; #define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers) static enum arm_type_qualifiers @@ -691,43 +711,43 @@ arm_ldrgbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] static enum arm_type_qualifiers arm_ldrgbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned}; + qualifier_predicate}; #define LDRGBS_Z_QUALIFIERS (arm_ldrgbs_z_qualifiers) static enum arm_type_qualifiers arm_ldrgbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned}; + qualifier_predicate}; #define LDRGBU_Z_QUALIFIERS (arm_ldrgbu_z_qualifiers) static enum arm_type_qualifiers arm_ldrgs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_pointer, qualifier_unsigned, - qualifier_unsigned}; + qualifier_predicate}; #define LDRGS_Z_QUALIFIERS (arm_ldrgs_z_qualifiers) static enum arm_type_qualifiers arm_ldrgu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_pointer, qualifier_unsigned, - qualifier_unsigned}; + qualifier_predicate}; #define LDRGU_Z_QUALIFIERS (arm_ldrgu_z_qualifiers) static enum arm_type_qualifiers arm_ldrs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_none, qualifier_pointer, qualifier_unsigned}; + = { qualifier_none, qualifier_pointer, qualifier_predicate}; #define LDRS_Z_QUALIFIERS (arm_ldrs_z_qualifiers) static enum arm_type_qualifiers arm_ldru_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] - = { qualifier_unsigned, qualifier_pointer, qualifier_unsigned}; + = { qualifier_unsigned, qualifier_pointer, qualifier_predicate}; #define LDRU_Z_QUALIFIERS (arm_ldru_z_qualifiers) static enum arm_type_qualifiers -arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers[SIMD_MAX_BUILTIN_ARGS] +arm_quinop_unone_unone_unone_unone_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned, - qualifier_unsigned, qualifier_immediate, qualifier_unsigned }; -#define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE_QUALIFIERS \ - (arm_quinop_unone_unone_unone_unone_imm_unone_qualifiers) + qualifier_unsigned, qualifier_immediate, qualifier_predicate }; +#define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED_QUALIFIERS \ + (arm_quinop_unone_unone_unone_unone_imm_pred_qualifiers) static enum arm_type_qualifiers arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] @@ -737,7 +757,7 @@ arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS] static enum arm_type_qualifiers arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned}; + qualifier_predicate}; #define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers) static enum arm_type_qualifiers @@ -753,13 +773,13 @@ arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] static enum arm_type_qualifiers arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_none, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned}; + qualifier_predicate}; #define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers) static enum arm_type_qualifiers arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate, - qualifier_unsigned}; + qualifier_predicate}; #define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers) static enum arm_type_qualifiers @@ -775,13 +795,13 @@ arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS] static enum arm_type_qualifiers arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_const, - qualifier_none, qualifier_unsigned}; + qualifier_none, qualifier_predicate}; #define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers) static enum arm_type_qualifiers arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_const, - qualifier_unsigned, qualifier_unsigned}; + qualifier_unsigned, qualifier_predicate}; #define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers) static enum arm_type_qualifiers @@ -804,6 +824,18 @@ arm_sqshl_qualifiers[SIMD_MAX_BUILTIN_ARGS] = { qualifier_unsigned, qualifier_unsigned, qualifier_const}; #define SQSHL_QUALIFIERS (arm_sqshl_qualifiers) +static enum arm_type_qualifiers +arm_binop_none_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_none, qualifier_none, qualifier_predicate }; +#define BINOP_NONE_NONE_PRED_QUALIFIERS \ + (arm_binop_none_none_pred_qualifiers) + +static enum arm_type_qualifiers +arm_binop_unone_unone_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS] + = { qualifier_unsigned, qualifier_unsigned, qualifier_predicate }; +#define BINOP_UNONE_UNONE_PRED_QUALIFIERS \ + (arm_binop_unone_unone_pred_qualifiers) + /* End of Qualifier for MVE builtins. */ /* void ([T element type] *, T, immediate). */ @@ -1553,11 +1585,28 @@ arm_init_simd_builtin_types (void) tree eltype = arm_simd_types[i].eltype; machine_mode mode = arm_simd_types[i].mode; - if (eltype == NULL) + if (eltype == NULL + /* VECTOR_BOOL is not supported unless MVE is activated, + this would make build_truth_vector_type_for_mode + crash. */ + && ((GET_MODE_CLASS (mode) != MODE_VECTOR_BOOL) + || !TARGET_HAVE_MVE)) continue; if (arm_simd_types[i].itype == NULL) { - tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode)); + tree type; + if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + { + /* Handle MVE predicates: they are internally stored as + 16 bits, but are used as vectors of 1, 2 or 4-bit + elements. */ + type = build_truth_vector_type_for_mode (GET_MODE_NUNITS (mode), + mode); + eltype = TREE_TYPE (type); + } + else + type = build_vector_type (eltype, GET_MODE_NUNITS (mode)); + type = build_distinct_type_copy (type); SET_TYPE_STRUCTURAL_EQUALITY (type); @@ -1695,6 +1744,11 @@ arm_init_builtin (unsigned int fcode, arm_builtin_datum *d, if (qualifiers & qualifier_map_mode) op_mode = d->mode; + /* MVE Predicates use HImode as mandated by the ABI: pred16_t is + unsigned short. */ + if (qualifiers & qualifier_predicate) + op_mode = HImode; + /* For pointers, we want a pointer to the basic type of the vector. */ if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode)) @@ -2939,6 +2993,12 @@ arm_expand_builtin_args (rtx target, machine_mode map_mode, int fcode, case ARG_BUILTIN_COPY_TO_REG: if (POINTER_TYPE_P (TREE_TYPE (arg[argc]))) op[argc] = convert_memory_address (Pmode, op[argc]); + + /* MVE uses mve_pred16_t (aka HImode) for vectors of + predicates. */ + if (GET_MODE_CLASS (mode[argc]) == MODE_VECTOR_BOOL) + op[argc] = gen_lowpart (mode[argc], op[argc]); + /*gcc_assert (GET_MODE (op[argc]) == mode[argc]); */ if (!(*insn_data[icode].operand[opno].predicate) (op[argc], mode[argc])) @@ -3144,6 +3204,13 @@ constant_arg: else emit_insn (insn); + if (GET_MODE_CLASS (tmode) == MODE_VECTOR_BOOL) + { + rtx HItarget = gen_reg_rtx (HImode); + emit_move_insn (HItarget, gen_lowpart (HImode, target)); + return HItarget; + } + return target; } diff --git a/gcc/config/arm/arm-builtins.h b/gcc/config/arm/arm-builtins.h index e5130d6..a8ef8ae 100644 --- a/gcc/config/arm/arm-builtins.h +++ b/gcc/config/arm/arm-builtins.h @@ -84,7 +84,9 @@ enum arm_type_qualifiers qualifier_lane_pair_index = 0x1000, /* Lane indices selected in quadtuplets - must be within range of previous argument = a vector. */ - qualifier_lane_quadtup_index = 0x2000 + qualifier_lane_quadtup_index = 0x2000, + /* MVE vector predicates. */ + qualifier_predicate = 0x4000 }; struct arm_simd_type_info diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def index de689c8..9ed0cd0 100644 --- a/gcc/config/arm/arm-modes.def +++ b/gcc/config/arm/arm-modes.def @@ -84,6 +84,14 @@ VECTOR_MODE (FLOAT, BF, 2); /* V2BF. */ VECTOR_MODE (FLOAT, BF, 4); /* V4BF. */ VECTOR_MODE (FLOAT, BF, 8); /* V8BF. */ +/* Predicates for MVE. */ +BOOL_MODE (B2I, 2, 1); +BOOL_MODE (B4I, 4, 1); + +VECTOR_BOOL_MODE (V16BI, 16, BI, 2); +VECTOR_BOOL_MODE (V8BI, 8, B2I, 2); +VECTOR_BOOL_MODE (V4BI, 4, B4I, 2); + /* Fraction and accumulator vector modes. */ VECTOR_MODES (FRACT, 4); /* V4QQ V2HQ */ VECTOR_MODES (UFRACT, 4); /* V4UQQ V2UHQ */ diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 881c72c..9d14209 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -101,6 +101,7 @@ extern char *neon_output_shift_immediate (const char *, char, rtx *, machine_mode, int, bool); extern void neon_pairwise_reduce (rtx, rtx, machine_mode, rtx (*) (rtx, rtx, rtx)); +extern rtx mve_bool_vec_to_const (rtx const_vec); extern rtx neon_make_constant (rtx, bool generate = true); extern tree arm_builtin_vectorized_function (unsigned int, tree, tree); extern void neon_expand_vector_init (rtx, rtx); @@ -203,6 +204,7 @@ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree); extern bool arm_pad_reg_upward (machine_mode, tree, int); #endif extern int arm_apply_result_size (void); +extern opt_machine_mode arm_get_mask_mode (machine_mode mode); #endif /* RTX_CODE */ @@ -379,7 +381,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, extern bool arm_fusion_enabled_p (tune_params::fuse_ops); extern bool arm_valid_symbolic_address_p (rtx); extern bool arm_validize_comparison (rtx *, rtx *, rtx *); -extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool, bool); +extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool); #endif /* RTX_CODE */ extern bool arm_gen_setmem (rtx *); diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def index 6ba6f21..d1d6416 100644 --- a/gcc/config/arm/arm-simd-builtin-types.def +++ b/gcc/config/arm/arm-simd-builtin-types.def @@ -51,3 +51,7 @@ ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20) ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20) ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20) + + ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 16) + ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 15) + ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 15) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 663f459..c1103d9 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -832,6 +832,9 @@ static const struct attribute_spec arm_attribute_table[] = #undef TARGET_STACK_PROTECT_GUARD #define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard + +#undef TARGET_VECTORIZE_GET_MASK_MODE +#define TARGET_VECTORIZE_GET_MASK_MODE arm_get_mask_mode /* Obstack for minipool constant handling. */ static struct obstack minipool_obstack; @@ -12802,7 +12805,10 @@ simd_valid_immediate (rtx op, machine_mode mode, int inverse, innersize = GET_MODE_UNIT_SIZE (mode); /* Only support 128-bit vectors for MVE. */ - if (TARGET_HAVE_MVE && (!vector || n_elts * innersize != 16)) + if (TARGET_HAVE_MVE + && (!vector + || (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL) + || n_elts * innersize != 16)) return -1; /* Vectors of float constants. */ @@ -13167,6 +13173,29 @@ neon_vdup_constant (rtx vals, bool generate) return gen_vec_duplicate (mode, x); } +/* Return a HI representation of CONST_VEC suitable for MVE predicates. */ +rtx +mve_bool_vec_to_const (rtx const_vec) +{ + int n_elts = GET_MODE_NUNITS ( GET_MODE (const_vec)); + int repeat = 16 / n_elts; + int i; + int hi_val = 0; + + for (i = 0; i < n_elts; i++) + { + rtx el = CONST_VECTOR_ELT (const_vec, i); + unsigned HOST_WIDE_INT elpart; + + gcc_assert (CONST_INT_P (el)); + elpart = INTVAL (el); + + for (int j = 0; j < repeat; j++) + hi_val |= elpart << (i * repeat + j); + } + return gen_int_mode (hi_val, HImode); +} + /* Return a non-NULL RTX iff VALS, which is a PARALLEL containing only constants (for vec_init) or CONST_VECTOR, can be effeciently loaded into a register. @@ -13207,6 +13236,8 @@ neon_make_constant (rtx vals, bool generate) && simd_immediate_valid_for_move (const_vec, mode, NULL, NULL)) /* Load using VMOV. On Cortex-A8 this takes one cycle. */ return const_vec; + else if (TARGET_HAVE_MVE && (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)) + return mve_bool_vec_to_const (const_vec); else if ((target = neon_vdup_constant (vals, generate)) != NULL_RTX) /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON pipeline cycle; creating the constant takes one or two ARM @@ -25339,6 +25370,9 @@ thumb2_asm_output_opcode (FILE * stream) static unsigned int arm_hard_regno_nregs (unsigned int regno, machine_mode mode) { + if (IS_VPR_REGNUM (regno)) + return CEIL (GET_MODE_SIZE (mode), 2); + if (TARGET_32BIT && regno > PC_REGNUM && regno != FRAME_POINTER_REGNUM @@ -25362,7 +25396,10 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode) return false; if (IS_VPR_REGNUM (regno)) - return mode == HImode; + return mode == HImode + || mode == V16BImode + || mode == V8BImode + || mode == V4BImode; if (TARGET_THUMB1) /* For the Thumb we only allow values bigger than SImode in @@ -29252,7 +29289,8 @@ arm_vector_mode_supported_p (machine_mode mode) if (TARGET_HAVE_MVE && (mode == V2DImode || mode == V4SImode || mode == V8HImode - || mode == V16QImode)) + || mode == V16QImode + || mode == V16BImode || mode == V8BImode || mode == V4BImode)) return true; if (TARGET_HAVE_MVE_FLOAT @@ -29366,7 +29404,7 @@ arm_class_likely_spilled_p (reg_class_t rclass) || rclass == CC_REG) return true; - return false; + return default_class_likely_spilled_p (rclass); } /* Implements target hook small_register_classes_for_mode_p. */ @@ -31050,21 +31088,30 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, arm_post_atomic_barrier (model); } +/* Return the mode for the MVE vector of predicates corresponding to MODE. */ +opt_machine_mode +arm_mode_to_pred_mode (machine_mode mode) +{ + switch (GET_MODE_NUNITS (mode)) + { + case 16: return V16BImode; + case 8: return V8BImode; + case 4: return V4BImode; + } + return opt_machine_mode (); +} + /* Expand code to compare vectors OP0 and OP1 using condition CODE. If CAN_INVERT, store either the result or its inverse in TARGET and return true if TARGET contains the inverse. If !CAN_INVERT, always store the result in TARGET, never its inverse. - If VCOND_MVE, do not emit the vpsel instruction here, let arm_expand_vcond do - it with the right destination type to avoid emiting two vpsel, one here and - one in arm_expand_vcond. - Note that the handling of floating-point comparisons is not IEEE compliant. */ bool arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, - bool can_invert, bool vcond_mve) + bool can_invert) { machine_mode cmp_result_mode = GET_MODE (target); machine_mode cmp_mode = GET_MODE (op0); @@ -31093,7 +31140,7 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, and then store its inverse in TARGET. This avoids reusing TARGET (which for integer NE could be one of the inputs). */ rtx tmp = gen_reg_rtx (cmp_result_mode); - if (arm_expand_vector_compare (tmp, code, op0, op1, true, vcond_mve)) + if (arm_expand_vector_compare (tmp, code, op0, op1, true)) gcc_unreachable (); emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp))); return false; @@ -31129,36 +31176,22 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, case NE: if (TARGET_HAVE_MVE) { - rtx vpr_p0; - if (vcond_mve) - vpr_p0 = target; - else - vpr_p0 = gen_reg_rtx (HImode); - switch (GET_MODE_CLASS (cmp_mode)) { case MODE_VECTOR_INT: - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, + op0, force_reg (cmp_mode, op1))); break; case MODE_VECTOR_FLOAT: if (TARGET_HAVE_MVE_FLOAT) - emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); + emit_insn (gen_mve_vcmpq_f (code, cmp_mode, target, + op0, force_reg (cmp_mode, op1))); else gcc_unreachable (); break; default: gcc_unreachable (); } - - /* If we are not expanding a vcond, build the result here. */ - if (!vcond_mve) - { - rtx zero = gen_reg_rtx (cmp_result_mode); - rtx one = gen_reg_rtx (cmp_result_mode); - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); - } } else emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); @@ -31170,23 +31203,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, case GEU: case GTU: if (TARGET_HAVE_MVE) - { - rtx vpr_p0; - if (vcond_mve) - vpr_p0 = target; - else - vpr_p0 = gen_reg_rtx (HImode); - - emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); - if (!vcond_mve) - { - rtx zero = gen_reg_rtx (cmp_result_mode); - rtx one = gen_reg_rtx (cmp_result_mode); - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); - } - } + emit_insn (gen_mve_vcmpq (code, cmp_mode, target, + op0, force_reg (cmp_mode, op1))); else emit_insn (gen_neon_vc (code, cmp_mode, target, op0, force_reg (cmp_mode, op1))); @@ -31197,23 +31215,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, case LEU: case LTU: if (TARGET_HAVE_MVE) - { - rtx vpr_p0; - if (vcond_mve) - vpr_p0 = target; - else - vpr_p0 = gen_reg_rtx (HImode); - - emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0)); - if (!vcond_mve) - { - rtx zero = gen_reg_rtx (cmp_result_mode); - rtx one = gen_reg_rtx (cmp_result_mode); - emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); - emit_move_insn (one, CONST1_RTX (cmp_result_mode)); - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); - } - } + emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, target, + force_reg (cmp_mode, op1), op0)); else emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, target, force_reg (cmp_mode, op1), op0)); @@ -31228,8 +31231,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, rtx gt_res = gen_reg_rtx (cmp_result_mode); rtx alt_res = gen_reg_rtx (cmp_result_mode); rtx_code alt_code = (code == LTGT ? LT : LE); - if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve) - || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, vcond_mve)) + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true) + || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true)) gcc_unreachable (); emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode, gt_res, alt_res))); @@ -31249,19 +31252,15 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode) { /* When expanding for MVE, we do not want to emit a (useless) vpsel in arm_expand_vector_compare, and another one here. */ - bool vcond_mve=false; rtx mask; if (TARGET_HAVE_MVE) - { - vcond_mve=true; - mask = gen_reg_rtx (HImode); - } + mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ()); else mask = gen_reg_rtx (cmp_result_mode); bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]), - operands[4], operands[5], true, vcond_mve); + operands[4], operands[5], true); if (inverted) std::swap (operands[1], operands[2]); if (TARGET_NEON) @@ -31269,20 +31268,20 @@ arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode) mask, operands[1], operands[2])); else { - machine_mode cmp_mode = GET_MODE (operands[4]); - rtx vpr_p0 = mask; - rtx zero = gen_reg_rtx (cmp_mode); - rtx one = gen_reg_rtx (cmp_mode); - emit_move_insn (zero, CONST0_RTX (cmp_mode)); - emit_move_insn (one, CONST1_RTX (cmp_mode)); + machine_mode cmp_mode = GET_MODE (operands[0]); + switch (GET_MODE_CLASS (cmp_mode)) { case MODE_VECTOR_INT: - emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], one, zero, vpr_p0)); + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_mode, operands[0], + operands[1], operands[2], mask)); break; case MODE_VECTOR_FLOAT: if (TARGET_HAVE_MVE_FLOAT) - emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, vpr_p0)); + emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], + operands[1], operands[2], mask)); + else + gcc_unreachable (); break; default: gcc_unreachable (); @@ -34204,4 +34203,15 @@ arm_mode_base_reg_class (machine_mode mode) struct gcc_target targetm = TARGET_INITIALIZER; +/* Implement TARGET_VECTORIZE_GET_MASK_MODE. */ + +opt_machine_mode +arm_get_mask_mode (machine_mode mode) +{ + if (TARGET_HAVE_MVE) + return arm_mode_to_pred_mode (mode); + + return default_get_mask_mode (mode); +} + #include "gt-arm.h" diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index f52724d..ef7b66f 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -1287,6 +1287,7 @@ enum reg_class SFP_REG, AFP_REG, VPR_REG, + GENERAL_AND_VPR_REGS, ALL_REGS, LIM_REG_CLASSES }; @@ -1316,6 +1317,7 @@ enum reg_class "SFP_REG", \ "AFP_REG", \ "VPR_REG", \ + "GENERAL_AND_VPR_REGS", \ "ALL_REGS" \ } @@ -1344,7 +1346,8 @@ enum reg_class { 0x00000000, 0x00000000, 0x00000000, 0x00000040 }, /* SFP_REG */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000080 }, /* AFP_REG */ \ { 0x00000000, 0x00000000, 0x00000000, 0x00000400 }, /* VPR_REG. */ \ - { 0xFFFF7FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000000F } /* ALL_REGS. */ \ + { 0x00005FFF, 0x00000000, 0x00000000, 0x00000400 }, /* GENERAL_AND_VPR_REGS. */ \ + { 0xFFFF7FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000040F } /* ALL_REGS. */ \ } #define FP_SYSREGS \ @@ -1453,7 +1456,9 @@ extern const char *fp_sysreg_names[NB_FP_SYSREGS]; ARM regs are UNITS_PER_WORD bits. FIXME: Is this true for iWMMX? */ #define CLASS_MAX_NREGS(CLASS, MODE) \ - (ARM_NUM_REGS (MODE)) + (CLASS == VPR_REG) \ + ? CEIL (GET_MODE_SIZE (MODE), 2) \ + : (ARM_NUM_REGS (MODE)) /* If defined, gives a class of registers that cannot be used as the operand of a SUBREG that changes the mode of the object illegally. */ diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt index cc16534..3209b6c 100644 --- a/gcc/config/arm/arm.opt +++ b/gcc/config/arm/arm.opt @@ -274,13 +274,13 @@ Mitigate issues with VLLDM on some M-profile devices (CVE-2021-35465). mfix-cortex-a57-aes-1742098 Target Var(fix_aes_erratum_1742098) Init(2) Save -Mitigate issues with AES instructions on Cortex-A57 and Cortex-A72. -Arm erratum #1742098 +Mitigate issues with AES instructions on Cortex-A57 and Cortex-A72 +(Arm erratum #1742098). mfix-cortex-a72-aes-1655431 Target Alias(mfix-cortex-a57-aes-1742098) -Mitigate issues with AES instructions on Cortex-A57 and Cortex-A72. -Arm erratum #1655431 +Mitigate issues with AES instructions on Cortex-A57 and Cortex-A72 +(Arm erratum #1655431). munaligned-access Target Var(unaligned_access) Init(2) Save diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def index c3ae407..1c8ee34 100644 --- a/gcc/config/arm/arm_mve_builtins.def +++ b/gcc/config/arm/arm_mve_builtins.def @@ -87,9 +87,9 @@ VAR4 (BINOP_UNONE_UNONE_UNONE, vcreateq_u, v16qi, v8hi, v4si, v2di) VAR4 (BINOP_NONE_UNONE_UNONE, vcreateq_s, v16qi, v8hi, v4si, v2di) VAR3 (BINOP_UNONE_UNONE_IMM, vshrq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_IMM, vshrq_n_s, v16qi, v8hi, v4si) -VAR1 (BINOP_NONE_NONE_UNONE, vaddlvq_p_s, v4si) -VAR1 (BINOP_UNONE_UNONE_UNONE, vaddlvq_p_u, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si) +VAR1 (BINOP_NONE_NONE_PRED, vaddlvq_p_s, v4si) +VAR1 (BINOP_UNONE_UNONE_PRED, vaddlvq_p_u, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpneq_, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vshlq_s, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_NONE, vshlq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vsubq_u, v16qi, v8hi, v4si) @@ -117,13 +117,13 @@ VAR3 (BINOP_UNONE_UNONE_UNONE, vhsubq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vhaddq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, veorq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_UNONE_UNONE, vcmphiq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_UNONE_UNONE, vcmpcsq_n_, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vbicq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vandq_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvq_p_u, v16qi, v8hi, v4si) +VAR3 (BINOP_UNONE_UNONE_PRED, vaddvq_p_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vaddvaq_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vaddq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_UNONE, vabdq_u, v16qi, v8hi, v4si) @@ -142,19 +142,19 @@ VAR3 (BINOP_UNONE_UNONE_NONE, vbrsrq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_IMM, vshlq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_IMM, vrshrq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_IMM, vqshlq_n_u, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si) -VAR3 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpneq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpltq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpleq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpgtq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpgeq_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpgeq_n_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpeqq_, v16qi, v8hi, v4si) +VAR3 (BINOP_PRED_NONE_NONE, vcmpeqq_n_, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_NONE_IMM, vqshluq_n_s, v16qi, v8hi, v4si) -VAR3 (BINOP_NONE_NONE_UNONE, vaddvq_p_s, v16qi, v8hi, v4si) +VAR3 (BINOP_NONE_NONE_PRED, vaddvq_p_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vsubq_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vsubq_n_s, v16qi, v8hi, v4si) VAR3 (BINOP_NONE_NONE_NONE, vshlq_r_s, v16qi, v8hi, v4si) @@ -218,18 +218,18 @@ VAR2 (BINOP_UNONE_UNONE_IMM, vshlltq_n_u, v16qi, v8hi) VAR2 (BINOP_UNONE_UNONE_IMM, vshllbq_n_u, v16qi, v8hi) VAR2 (BINOP_UNONE_UNONE_IMM, vorrq_n_u, v8hi, v4si) VAR2 (BINOP_UNONE_UNONE_IMM, vbicq_n_u, v8hi, v4si) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_n_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpneq_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_n_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpltq_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_n_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpleq_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_n_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpgtq_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_n_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpgeq_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_n_f, v8hf, v4sf) -VAR2 (BINOP_UNONE_NONE_NONE, vcmpeqq_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpneq_n_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpneq_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpltq_n_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpltq_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpleq_n_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpleq_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpgtq_n_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpgtq_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpgeq_n_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpgeq_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpeqq_n_f, v8hf, v4sf) +VAR2 (BINOP_PRED_NONE_NONE, vcmpeqq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vsubq_f, v8hf, v4sf) VAR2 (BINOP_NONE_NONE_NONE, vqmovntq_s, v8hi, v4si) VAR2 (BINOP_NONE_NONE_NONE, vqmovnbq_s, v8hi, v4si) @@ -277,87 +277,87 @@ VAR1 (BINOP_NONE_NONE_NONE, vrmlaldavhq_s, v4si) VAR1 (BINOP_NONE_NONE_NONE, vcvttq_f16_f32, v8hf) VAR1 (BINOP_NONE_NONE_NONE, vcvtbq_f16_f32, v8hf) VAR1 (BINOP_NONE_NONE_NONE, vaddlvaq_s, v4si) -VAR2 (TERNOP_NONE_NONE_IMM_UNONE, vbicq_m_n_s, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_IMM_UNONE, vbicq_m_n_u, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_IMM_PRED, vbicq_m_n_s, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_IMM_PRED, vbicq_m_n_u, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vqrshrnbq_n_s, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_UNONE_IMM, vqrshrnbq_n_u, v8hi, v4si) VAR1 (TERNOP_NONE_NONE_NONE_NONE, vrmlaldavhaq_s, v4si) VAR1 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrmlaldavhaq_u, v4si) -VAR2 (TERNOP_NONE_NONE_UNONE_UNONE, vcvtq_m_to_f_u, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtq_m_to_f_s, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpeqq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_UNONE_PRED, vcvtq_m_to_f_u, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtq_m_to_f_s, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpeqq_m_f, v8hf, v4sf) VAR3 (TERNOP_UNONE_NONE_UNONE_IMM, vshlcq_carry_s, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_carry_u, v16qi, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_NONE_IMM, vqrshrunbq_n_s, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_NONE_NONE, vabavq_s, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vabavq_u, v16qi, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vcvtaq_m_u, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtaq_m_s, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vcvtaq_m_u, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtaq_m_s, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_vec_u, v16qi, v8hi, v4si) VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si) -VAR4 (TERNOP_UNONE_UNONE_UNONE_UNONE, vpselq_u, v16qi, v8hi, v4si, v2di) -VAR4 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_s, v16qi, v8hi, v4si, v2di) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev64q_m_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmvnq_m_u, v16qi, v8hi, v4si) +VAR4 (TERNOP_UNONE_UNONE_UNONE_PRED, vpselq_u, v16qi, v8hi, v4si, v2di) +VAR4 (TERNOP_NONE_NONE_NONE_PRED, vpselq_s, v16qi, v8hi, v4si, v2di) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vrev64q_m_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vmvnq_m_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlasq_n_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlaq_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmladavq_p_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vmladavq_p_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmladavaq_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vminvq_p_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmaxvq_p_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vdupq_m_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmpneq_m_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmpneq_m_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmphiq_m_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmphiq_m_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmpeqq_m_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmpeqq_m_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmpcsq_m_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vcmpcsq_m_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vclzq_m_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vaddvaq_p_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vminvq_p_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vmaxvq_p_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vdupq_m_n_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmpneq_m_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmpneq_m_n_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmphiq_m_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmphiq_m_n_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmpeqq_m_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmpeqq_m_n_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmpcsq_m_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_UNONE_UNONE_PRED, vcmpcsq_m_n_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vclzq_m_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vaddvaq_p_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vsriq_n_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vsliq_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vshlq_m_r_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vrshlq_m_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vqshlq_m_r_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vqrshlq_m_n_u, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vminavq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vminaq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vmaxavq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_UNONE_NONE_UNONE, vmaxaq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpneq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpneq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpltq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpltq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpleq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpleq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgtq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgtq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgeq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgeq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpeqq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpeqq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vshlq_m_r_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vrshlq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vrev64q_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vqshlq_m_r_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vqrshlq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vqnegq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vqabsq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vnegq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vmvnq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vmlsdavxq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vmlsdavq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vmladavxq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vmladavq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vminvq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vmaxvq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vdupq_m_n_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vclzq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vclsq_m_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vaddvaq_p_s, v16qi, v8hi, v4si) -VAR3 (TERNOP_NONE_NONE_NONE_UNONE, vabsq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vshlq_m_r_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vrshlq_m_n_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vqshlq_m_r_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vqrshlq_m_n_u, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vminavq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vminaq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vmaxavq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_UNONE_UNONE_NONE_PRED, vmaxaq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpneq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpneq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpltq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpltq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpleq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpleq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpgtq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpgtq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpgeq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpgeq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpeqq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_PRED_NONE_NONE_PRED, vcmpeqq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vshlq_m_r_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vrshlq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vrev64q_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vqshlq_m_r_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vqrshlq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vqnegq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vqabsq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vnegq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vmvnq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vmlsdavxq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vmlsdavq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vmladavxq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vmladavq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vminvq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vmaxvq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vdupq_m_n_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vclzq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vclsq_m_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vaddvaq_p_s, v16qi, v8hi, v4si) +VAR3 (TERNOP_NONE_NONE_NONE_PRED, vabsq_m_s, v16qi, v8hi, v4si) VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqrdmlsdhxq_s, v16qi, v8hi, v4si) VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqrdmlsdhq_s, v16qi, v8hi, v4si) VAR3 (TERNOP_NONE_NONE_NONE_NONE, vqrdmlashq_n_s, v16qi, v8hi, v4si) @@ -378,14 +378,14 @@ VAR3 (TERNOP_NONE_NONE_NONE_NONE, vmladavaxq_s, v16qi, v8hi, v4si) VAR3 (TERNOP_NONE_NONE_NONE_NONE, vmladavaq_s, v16qi, v8hi, v4si) VAR3 (TERNOP_NONE_NONE_NONE_IMM, vsriq_n_s, v16qi, v8hi, v4si) VAR3 (TERNOP_NONE_NONE_NONE_IMM, vsliq_n_s, v16qi, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev32q_m_u, v16qi, v8hi) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqmovntq_m_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vqmovnbq_m_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmovntq_m_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmovnbq_m_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmovltq_m_u, v16qi, v8hi) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmovlbq_m_u, v16qi, v8hi) -VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlaldavq_p_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vrev32q_m_u, v16qi, v8hi) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vqmovntq_m_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vqmovnbq_m_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vmovntq_m_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vmovnbq_m_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vmovltq_m_u, v16qi, v8hi) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vmovlbq_m_u, v16qi, v8hi) +VAR2 (TERNOP_UNONE_UNONE_UNONE_PRED, vmlaldavq_p_u, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_UNONE_UNONE, vmlaldavaq_u, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_UNONE_IMM, vshrntq_n_u, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_UNONE_IMM, vshrnbq_n_u, v8hi, v4si) @@ -394,60 +394,60 @@ VAR2 (TERNOP_UNONE_UNONE_UNONE_IMM, vrshrnbq_n_u, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_UNONE_IMM, vqshrntq_n_u, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_UNONE_IMM, vqshrnbq_n_u, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_UNONE_IMM, vqrshrntq_n_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vqmovuntq_m_s, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vqmovunbq_m_s, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vcvtq_m_from_f_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vcvtpq_m_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vcvtnq_m_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_NONE_UNONE, vcvtmq_m_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vqmovuntq_m_s, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vqmovunbq_m_s, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vcvtq_m_from_f_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vcvtpq_m_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vcvtnq_m_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vcvtmq_m_u, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_NONE_IMM, vqshruntq_n_s, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_NONE_IMM, vqshrunbq_n_s, v8hi, v4si) VAR2 (TERNOP_UNONE_UNONE_NONE_IMM, vqrshruntq_n_s, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_IMM_UNONE, vorrq_m_n_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_UNONE_IMM_UNONE, vmvnq_m_n_u, v8hi, v4si) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpneq_m_n_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpneq_m_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpltq_m_n_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpltq_m_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpleq_m_n_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpleq_m_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgtq_m_n_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgtq_m_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgeq_m_n_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpgeq_m_f, v8hf, v4sf) -VAR2 (TERNOP_UNONE_NONE_NONE_UNONE, vcmpeqq_m_n_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrndxq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrndq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrndpq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrndnq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrndmq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrndaq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrev64q_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vrev32q_m_s, v16qi, v8hi) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovntq_m_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vqmovnbq_m_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vpselq_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vnegq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovntq_m_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovnbq_m_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovltq_m_s, v16qi, v8hi) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmovlbq_m_s, v16qi, v8hi) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmlsldavxq_p_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmlsldavq_p_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmlaldavxq_p_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmlaldavq_p_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vminnmvq_p_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vminnmavq_p_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vminnmaq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmaxnmvq_p_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmaxnmavq_p_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vmaxnmaq_m_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vdupq_m_n_f, v8hf, v4sf) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtq_m_from_f_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtpq_m_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtnq_m_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vcvtmq_m_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_NONE_UNONE, vabsq_m_f, v8hf, v4sf) +VAR2 (TERNOP_UNONE_UNONE_IMM_PRED, vorrq_m_n_u, v8hi, v4si) +VAR2 (TERNOP_UNONE_UNONE_IMM_PRED, vmvnq_m_n_u, v8hi, v4si) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpneq_m_n_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpneq_m_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpltq_m_n_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpltq_m_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpleq_m_n_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpleq_m_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpgtq_m_n_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpgtq_m_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpgeq_m_n_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpgeq_m_f, v8hf, v4sf) +VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpeqq_m_n_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrndxq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrndq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrndpq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrndnq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrndmq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrndaq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrev64q_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vrev32q_m_s, v16qi, v8hi) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vqmovntq_m_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vqmovnbq_m_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vpselq_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vnegq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmovntq_m_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmovnbq_m_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmovltq_m_s, v16qi, v8hi) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmovlbq_m_s, v16qi, v8hi) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmlsldavxq_p_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmlsldavq_p_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmlaldavxq_p_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmlaldavq_p_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vminnmvq_p_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vminnmavq_p_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vminnmaq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmaxnmvq_p_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmaxnmavq_p_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vmaxnmaq_m_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vdupq_m_n_f, v8hf, v4sf) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtq_m_from_f_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtpq_m_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtnq_m_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtmq_m_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_NONE_PRED, vabsq_m_f, v8hf, v4sf) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vmlsldavaxq_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vmlsldavaq_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_NONE, vmlaldavaxq_s, v8hi, v4si) @@ -463,208 +463,208 @@ VAR2 (TERNOP_NONE_NONE_NONE_IMM, vrshrnbq_n_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vqshrntq_n_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vqshrnbq_n_s, v8hi, v4si) VAR2 (TERNOP_NONE_NONE_NONE_IMM, vqrshrntq_n_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_IMM_UNONE, vorrq_m_n_s, v8hi, v4si) -VAR2 (TERNOP_NONE_NONE_IMM_UNONE, vmvnq_m_n_s, v8hi, v4si) -VAR1 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrmlaldavhq_p_u, v4si) -VAR1 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrev16q_m_u, v16qi) -VAR1 (TERNOP_UNONE_UNONE_UNONE_UNONE, vaddlvaq_p_u, v4si) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vrmlsldavhxq_p_s, v4si) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vrmlsldavhq_p_s, v4si) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vrmlaldavhxq_p_s, v4si) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vrmlaldavhq_p_s, v4si) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vrev32q_m_f, v8hf) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vrev16q_m_s, v16qi) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vcvttq_m_f32_f16, v4sf) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vcvttq_m_f16_f32, v8hf) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vcvtbq_m_f32_f16, v4sf) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vcvtbq_m_f16_f32, v8hf) -VAR1 (TERNOP_NONE_NONE_NONE_UNONE, vaddlvaq_p_s, v4si) +VAR2 (TERNOP_NONE_NONE_IMM_PRED, vorrq_m_n_s, v8hi, v4si) +VAR2 (TERNOP_NONE_NONE_IMM_PRED, vmvnq_m_n_s, v8hi, v4si) +VAR1 (TERNOP_UNONE_UNONE_UNONE_PRED, vrmlaldavhq_p_u, v4si) +VAR1 (TERNOP_UNONE_UNONE_UNONE_PRED, vrev16q_m_u, v16qi) +VAR1 (TERNOP_UNONE_UNONE_UNONE_PRED, vaddlvaq_p_u, v4si) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vrmlsldavhxq_p_s, v4si) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vrmlsldavhq_p_s, v4si) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vrmlaldavhxq_p_s, v4si) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vrmlaldavhq_p_s, v4si) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vrev32q_m_f, v8hf) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vrev16q_m_s, v16qi) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vcvttq_m_f32_f16, v4sf) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vcvttq_m_f16_f32, v8hf) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vcvtbq_m_f32_f16, v4sf) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vcvtbq_m_f16_f32, v8hf) +VAR1 (TERNOP_NONE_NONE_NONE_PRED, vaddlvaq_p_s, v4si) VAR1 (TERNOP_NONE_NONE_NONE_NONE, vrmlsldavhaxq_s, v4si) VAR1 (TERNOP_NONE_NONE_NONE_NONE, vrmlsldavhaq_s, v4si) VAR1 (TERNOP_NONE_NONE_NONE_NONE, vrmlaldavhaxq_s, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vsriq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vsriq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsubq_m_u, v16qi, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vcvtq_m_n_to_f_u, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vcvtq_m_n_to_f_s, v8hf, v4sf) -VAR3 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vqshluq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_NONE_NONE_UNONE, vabavq_p_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vabavq_p_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_UNONE, vshlq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vshlq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsubq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vrmulhq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vrhaddq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vqsubq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vqsubq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vqaddq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vqaddq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vorrq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vornq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulltq_int_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmullbq_int_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulhq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlasq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmladavaq_p_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vminq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmaxq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vhsubq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vhsubq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vhaddq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vhaddq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, veorq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vcaddq_rot90_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vcaddq_rot270_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vbicq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vandq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vaddq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vaddq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vabdq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_UNONE, vrshlq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_UNONE, vqshlq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_UNONE, vqrshlq_m_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_UNONE, vbrsrq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vsliq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vrshrq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vqshlq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrshlq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmulhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrhaddq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqsubq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqsubq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqshlq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrshlq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmulhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmulhq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmlsdhxq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmlsdhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmlashq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmlahq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmladhxq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqrdmladhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulhq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhxq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlsdhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlahq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmlashq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhxq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmladhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqaddq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqaddq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vorrq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vornq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulltq_int_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmullbq_int_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulhq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlsdavaxq_p_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlsdavaq_p_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlasq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlaq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmladavaxq_p_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmladavaq_p_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vminq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmaxq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vhsubq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vhsubq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vhcaddq_rot90_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vhcaddq_rot270_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vhaddq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vhaddq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, veorq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot90_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot270_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbrsrq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbicq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vandq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vabdq_m_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vsliq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vshrq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vshlq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vrshrq_m_n_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqshlq_m_n_s, v16qi, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmulltq_poly_m_p, v16qi, v8hi) -VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmullbq_poly_m_p, v16qi, v8hi) -VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vmlaldavaq_p_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrntq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshrnbq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlltq_m_n_u, v16qi, v8hi) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshllbq_m_n_u, v16qi, v8hi) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vrshrntq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vrshrnbq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vqshrntq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vqshrnbq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vqrshrntq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vqrshrnbq_m_n_u, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vqshruntq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vqshrunbq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vqrshruntq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vqrshrunbq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulltq_m_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmulltq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmullbq_m_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vqdmullbq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlsldavaxq_p_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlsldavaq_p_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlaldavaxq_p_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmlaldavaq_p_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vshrntq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vshrnbq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vshlltq_m_n_s, v16qi, v8hi) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vshllbq_m_n_s, v16qi, v8hi) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vrshrntq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vrshrnbq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqshrntq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqshrnbq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqrshrntq_m_n_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vqrshrnbq_m_n_s, v8hi, v4si) -VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vrmlaldavhaq_p_u, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlsldavhaxq_p_s, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlsldavhaq_p_s, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlaldavhaxq_p_s, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vrmlaldavhaq_p_s, v4si) -VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_UNONE, vcvtq_m_n_from_f_u, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_IMM_UNONE, vcvtq_m_n_from_f_s, v8hi, v4si) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbrsrq_m_n_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_n_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsubq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vorrq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vornq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_n_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmulq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vminnmq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vmaxnmq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmsq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmasq_m_n_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmaq_m_n_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vfmaq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, veorq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot90_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot270_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_rot180_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmulq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot90_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot270_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_rot180_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcmlaq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot90_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vcaddq_rot270_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vbicq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vandq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_n_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vaddq_m_f, v8hf, v4sf) -VAR2 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vabdq_m_f, v8hf, v4sf) +VAR3 (QUADOP_NONE_NONE_NONE_IMM_PRED, vsriq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vsriq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsubq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vsubq_m_u, v16qi, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_UNONE_IMM_PRED, vcvtq_m_n_to_f_u, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vcvtq_m_n_to_f_s, v8hf, v4sf) +VAR3 (QUADOP_UNONE_UNONE_NONE_IMM_PRED, vqshluq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_NONE_NONE_PRED, vabavq_p_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vabavq_p_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_PRED, vshlq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vshlq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vsubq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vrmulhq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vrhaddq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vqsubq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vqsubq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vqaddq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vqaddq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vorrq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vornq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmulq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmulq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmulltq_int_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmullbq_int_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmulhq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmlasq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmlaq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmladavaq_p_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vminq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmaxq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vhsubq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vhsubq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vhaddq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vhaddq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, veorq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vcaddq_rot90_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vcaddq_rot270_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vbicq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vandq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vaddq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vaddq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vabdq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_PRED, vrshlq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_PRED, vqshlq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_PRED, vqrshlq_m_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_NONE_PRED, vbrsrq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vsliq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshrq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vrshrq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vqshlq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsubq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vrshlq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vrmulhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vrhaddq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqsubq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqsubq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqshlq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrshlq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmulhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmulhq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmlsdhxq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmlsdhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmlashq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmlahq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmladhxq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqrdmladhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmulhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmulhq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmlsdhxq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmlsdhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmlahq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmlashq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmladhxq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmladhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqaddq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqaddq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vorrq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vornq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmulq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmulq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmulltq_int_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmullbq_int_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmulhq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlsdavaxq_p_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlsdavaq_p_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlasq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlaq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmladavaxq_p_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmladavaq_p_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vminq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmaxq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vhsubq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vhsubq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vhcaddq_rot90_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vhcaddq_rot270_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vhaddq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vhaddq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, veorq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcaddq_rot90_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcaddq_rot270_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vbrsrq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vbicq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_IMM_PRED, vsliq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_IMM_PRED, vshrq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_IMM_PRED, vshlq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_IMM_PRED, vrshrq_m_n_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_NONE_IMM_PRED, vqshlq_m_n_s, v16qi, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmulltq_poly_m_p, v16qi, v8hi) +VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmullbq_poly_m_p, v16qi, v8hi) +VAR2 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vmlaldavaq_p_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshrntq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshrnbq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlltq_m_n_u, v16qi, v8hi) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshllbq_m_n_u, v16qi, v8hi) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vrshrntq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vrshrnbq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vqshrntq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vqshrnbq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vqrshrntq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vqrshrnbq_m_n_u, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_PRED, vqshruntq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_PRED, vqshrunbq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_PRED, vqrshruntq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_PRED, vqrshrunbq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmulltq_m_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmulltq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmullbq_m_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vqdmullbq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlsldavaxq_p_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlsldavaq_p_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlaldavaxq_p_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmlaldavaq_p_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vshrntq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vshrnbq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vshlltq_m_n_s, v16qi, v8hi) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vshllbq_m_n_s, v16qi, v8hi) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vrshrntq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vrshrnbq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vqshrntq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vqshrnbq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vqrshrntq_m_n_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vqrshrnbq_m_n_s, v8hi, v4si) +VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vrmlaldavhaq_p_u, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vrmlsldavhaxq_p_s, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vrmlsldavhaq_p_s, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vrmlaldavhaxq_p_s, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vrmlaldavhaq_p_s, v4si) +VAR2 (QUADOP_UNONE_UNONE_NONE_IMM_PRED, vcvtq_m_n_from_f_u, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_IMM_PRED, vcvtq_m_n_from_f_s, v8hi, v4si) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vbrsrq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsubq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsubq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vorrq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vornq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmulq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmulq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vminnmq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vmaxnmq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vfmsq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vfmasq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vfmaq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vfmaq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, veorq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmulq_rot90_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmulq_rot270_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmulq_rot180_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmulq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmlaq_rot90_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmlaq_rot270_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmlaq_rot180_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcmlaq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcaddq_rot90_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vcaddq_rot270_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vbicq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf) +VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf) VAR3 (STRS, vstrbq_s, v16qi, v8hi, v4si) VAR3 (STRU, vstrbq_u, v16qi, v8hi, v4si) VAR3 (STRSS, vstrbq_scatter_offset_s, v16qi, v8hi, v4si) @@ -797,14 +797,14 @@ VAR1 (STRSU_P, vstrwq_scatter_offset_p_u, v4si) VAR1 (STRSU_P, vstrwq_scatter_shifted_offset_p_u, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_wb_u, v16qi, v4si, v8hi) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_wb_u, v16qi, v4si, v8hi) -VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE, viwdupq_m_wb_u, v16qi, v8hi, v4si) -VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE, vdwdupq_m_wb_u, v16qi, v8hi, v4si) -VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE, viwdupq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_UNONE, vdwdupq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, viwdupq_m_wb_u, v16qi, v8hi, v4si) +VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_wb_u, v16qi, v8hi, v4si) +VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, viwdupq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_IMM, vddupq_n_u, v16qi, v8hi, v4si) VAR3 (BINOP_UNONE_UNONE_IMM, vidupq_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vddupq_m_n_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vidupq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vddupq_m_n_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vidupq_m_n_u, v16qi, v8hi, v4si) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi) VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi) VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si) @@ -845,14 +845,14 @@ VAR1 (BINOP_NONE_NONE_NONE, vsbciq_s, v4si) VAR1 (BINOP_UNONE_UNONE_UNONE, vsbciq_u, v4si) VAR1 (BINOP_NONE_NONE_NONE, vsbcq_s, v4si) VAR1 (BINOP_UNONE_UNONE_UNONE, vsbcq_u, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vadciq_m_s, v4si) -VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vadciq_m_u, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vadcq_m_s, v4si) -VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vadcq_m_u, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsbciq_m_s, v4si) -VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsbciq_m_u, v4si) -VAR1 (QUADOP_NONE_NONE_NONE_NONE_UNONE, vsbcq_m_s, v4si) -VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_UNONE, vsbcq_m_u, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vadciq_m_s, v4si) +VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vadciq_m_u, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vadcq_m_s, v4si) +VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vadcq_m_u, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsbciq_m_s, v4si) +VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vsbciq_m_u, v4si) +VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsbcq_m_s, v4si) +VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vsbcq_m_u, v4si) VAR5 (STORE1, vst2q, v16qi, v8hi, v4si, v8hf, v4sf) VAR5 (LOAD1, vld4q, v16qi, v8hi, v4si, v8hf, v4sf) VAR5 (LOAD1, vld2q, v16qi, v8hi, v4si, v8hf, v4sf) @@ -870,10 +870,10 @@ VAR1 (UQSHL, urshr_, si) VAR1 (UQSHL, urshrl_, di) VAR1 (UQSHL, uqshl_, si) VAR1 (UQSHL, uqshll_, di) -VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_vec_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_NONE_NONE_UNONE_IMM_UNONE, vshlcq_m_carry_s, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_vec_u, v16qi, v8hi, v4si) -VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_UNONE, vshlcq_m_carry_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_UNONE_IMM_PRED, vshlcq_m_vec_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_NONE_NONE_UNONE_IMM_PRED, vshlcq_m_carry_s, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlcq_m_vec_u, v16qi, v8hi, v4si) +VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlcq_m_carry_u, v16qi, v8hi, v4si) /* optabs without any suffixes. */ VAR5 (BINOP_NONE_NONE_NONE, vcaddq_rot90, v16qi, v8hi, v4si, v8hf, v4sf) diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h index 9b6d599..b30d04c 100644 --- a/gcc/config/arm/arm_neon.h +++ b/gcc/config/arm/arm_neon.h @@ -18243,6 +18243,35 @@ vdotq_lane_s32 (int32x4_t __r, int8x16_t __a, int8x8_t __b, const int __index) return __builtin_neon_sdot_lanev16qi (__r, __a, __b, __index); } +__extension__ extern __inline uint32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_laneq_u32 (uint32x2_t __r, uint8x8_t __a, uint8x16_t __b, const int __index) +{ + return __builtin_neon_udot_laneqv8qi_uuuus (__r, __a, __b, __index); +} + +__extension__ extern __inline uint32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_laneq_u32 (uint32x4_t __r, uint8x16_t __a, uint8x16_t __b, + const int __index) +{ + return __builtin_neon_udot_laneqv16qi_uuuus (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdot_laneq_s32 (int32x2_t __r, int8x8_t __a, int8x16_t __b, const int __index) +{ + return __builtin_neon_sdot_laneqv8qi (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vdotq_laneq_s32 (int32x4_t __r, int8x16_t __a, int8x16_t __b, const int __index) +{ + return __builtin_neon_sdot_laneqv16qi (__r, __a, __b, __index); +} + #pragma GCC pop_options #endif @@ -18901,6 +18930,13 @@ vusdot_s32 (int32x2_t __r, uint8x8_t __a, int8x8_t __b) return __builtin_neon_usdotv8qi_ssus (__r, __a, __b); } +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdotq_s32 (int32x4_t __r, uint8x16_t __a, int8x16_t __b) +{ + return __builtin_neon_usdotv16qi_ssus (__r, __a, __b); +} + __extension__ extern __inline int32x2_t __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) vusdot_lane_s32 (int32x2_t __r, uint8x8_t __a, @@ -18933,6 +18969,38 @@ vsudotq_lane_s32 (int32x4_t __r, int8x16_t __a, return __builtin_neon_sudot_lanev16qi_sssus (__r, __a, __b, __index); } +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdot_laneq_s32 (int32x2_t __r, uint8x8_t __a, + int8x16_t __b, const int __index) +{ + return __builtin_neon_usdot_laneqv8qi_ssuss (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vusdotq_laneq_s32 (int32x4_t __r, uint8x16_t __a, + int8x16_t __b, const int __index) +{ + return __builtin_neon_usdot_laneqv16qi_ssuss (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x2_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsudot_laneq_s32 (int32x2_t __r, int8x8_t __a, + uint8x16_t __b, const int __index) +{ + return __builtin_neon_sudot_laneqv8qi_sssus (__r, __a, __b, __index); +} + +__extension__ extern __inline int32x4_t +__attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) +vsudotq_laneq_s32 (int32x4_t __r, int8x16_t __a, + uint8x16_t __b, const int __index) +{ + return __builtin_neon_sudot_laneqv16qi_sssus (__r, __a, __b, __index); +} + #pragma GCC pop_options #pragma GCC pop_options diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def index 865de65..445b2bf 100644 --- a/gcc/config/arm/arm_neon_builtins.def +++ b/gcc/config/arm/arm_neon_builtins.def @@ -342,10 +342,14 @@ VAR2 (TERNOP, sdot, v8qi, v16qi) VAR2 (UTERNOP, udot, v8qi, v16qi) VAR2 (MAC_LANE, sdot_lane, v8qi, v16qi) VAR2 (UMAC_LANE, udot_lane, v8qi, v16qi) +VAR2 (MAC_LANE, sdot_laneq, v8qi, v16qi) +VAR2 (UMAC_LANE, udot_laneq, v8qi, v16qi) -VAR1 (USTERNOP, usdot, v8qi) +VAR2 (USTERNOP, usdot, v8qi, v16qi) VAR2 (USMAC_LANE_QUADTUP, usdot_lane, v8qi, v16qi) VAR2 (SUMAC_LANE_QUADTUP, sudot_lane, v8qi, v16qi) +VAR2 (USMAC_LANE_QUADTUP, usdot_laneq, v8qi, v16qi) +VAR2 (SUMAC_LANE_QUADTUP, sudot_laneq, v8qi, v16qi) VAR4 (BINOP, vcadd90, v4hf, v2sf, v8hf, v4sf) VAR4 (BINOP, vcadd270, v4hf, v2sf, v8hf, v4sf) diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md index 1920004..2b411b0 100644 --- a/gcc/config/arm/constraints.md +++ b/gcc/config/arm/constraints.md @@ -312,6 +312,12 @@ (and (match_code "const_vector") (match_test "(TARGET_NEON || TARGET_HAVE_MVE) && op == CONST0_RTX (mode)"))) +(define_constraint "DB" + "@internal + In ARM/Thumb-2 state with MVE a constant vector of booleans." + (and (match_code "const_vector") + (match_test "TARGET_HAVE_MVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL"))) + (define_constraint "Da" "@internal In ARM/Thumb-2 state a const_int, const_double or const_vector that can diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 8202c27..37cf797 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -272,6 +272,8 @@ (define_mode_iterator MVE_2 [V16QI V8HI V4SI]) (define_mode_iterator MVE_5 [V8HI V4SI]) (define_mode_iterator MVE_6 [V8HI V4SI]) +(define_mode_iterator MVE_7 [V16BI V8BI V4BI]) +(define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI]) ;;---------------------------------------------------------------------------- ;; Code iterators @@ -946,6 +948,10 @@ (V8HF "u16") (V4SF "32")]) (define_mode_attr earlyclobber_32 [(V16QI "=w") (V8HI "=w") (V4SI "=&w") (V8HF "=w") (V4SF "=&w")]) +(define_mode_attr MVE_VPRED [(V16QI "V16BI") (V8HI "V8BI") (V4SI "V4BI") + (V2DI "HI") (V8HF "V8BI") (V4SF "V4BI")]) +(define_mode_attr MVE_vpred [(V16QI "v16bi") (V8HI "v8bi") (V4SI "v4bi") + (V2DI "hi") (V8HF "v8bi") (V4SF "v4bi")]) ;;---------------------------------------------------------------------------- ;; Code attributes diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 171dd38..908bedc 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -130,7 +130,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRNDQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -617,7 +617,7 @@ (define_insn "mve_vmvnq_n_<supf><mode>" [ (set (match_operand:MVE_5 0 "s_register_operand" "=w") - (unspec:MVE_5 [(match_operand:HI 1 "immediate_operand" "i")] + (unspec:MVE_5 [(match_operand:<V_elem> 1 "immediate_operand" "i")] VMVNQ_N)) ] "TARGET_HAVE_MVE" @@ -826,7 +826,7 @@ [ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:V4BI 2 "vpr_register_operand" "Up")] VADDLVQ_P)) ] "TARGET_HAVE_MVE" @@ -839,8 +839,8 @@ ;; (define_insn "@mve_vcmp<mve_cmp_op>q_<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (MVE_COMPARISONS:HI (match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (MVE_COMPARISONS:<MVE_VPRED> (match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE" @@ -853,8 +853,8 @@ ;; (define_insn "mve_vcmp<mve_cmp_op>q_n_<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (MVE_COMPARISONS:HI (match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (MVE_COMPARISONS:<MVE_VPRED> (match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r"))) ] "TARGET_HAVE_MVE" @@ -918,7 +918,7 @@ [ (set (match_operand:SI 0 "s_register_operand" "=Te") (unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")] VADDVQ_P)) ] "TARGET_HAVE_MVE" @@ -1929,8 +1929,8 @@ ;; (define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (MVE_FP_COMPARISONS:<MVE_VPRED> (match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -1943,8 +1943,8 @@ ;; (define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (MVE_FP_COMPARISONS:<MVE_VPRED> (match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r"))) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -2581,7 +2581,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:SI 2 "immediate_operand" "i") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VBICQ_M_N)) ] "TARGET_HAVE_MVE" @@ -2593,10 +2593,10 @@ ;; (define_insn "mve_vcmpeqq_m_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPEQQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -2611,7 +2611,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTAQ_M)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -2626,7 +2626,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTQ_M_TO_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -2748,7 +2748,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VABSQ_M_S)) ] "TARGET_HAVE_MVE" @@ -2764,7 +2764,7 @@ (set (match_operand:SI 0 "s_register_operand" "=Te") (unspec:SI [(match_operand:SI 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VADDVAQ_P)) ] "TARGET_HAVE_MVE" @@ -2780,7 +2780,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCLSQ_M_S)) ] "TARGET_HAVE_MVE" @@ -2796,7 +2796,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCLZQ_M)) ] "TARGET_HAVE_MVE" @@ -2809,10 +2809,10 @@ ;; (define_insn "mve_vcmpcsq_m_n_u<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPCSQ_M_N_U)) ] "TARGET_HAVE_MVE" @@ -2825,10 +2825,10 @@ ;; (define_insn "mve_vcmpcsq_m_u<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPCSQ_M_U)) ] "TARGET_HAVE_MVE" @@ -2841,10 +2841,10 @@ ;; (define_insn "mve_vcmpeqq_m_n_<supf><mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPEQQ_M_N)) ] "TARGET_HAVE_MVE" @@ -2857,10 +2857,10 @@ ;; (define_insn "mve_vcmpeqq_m_<supf><mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPEQQ_M)) ] "TARGET_HAVE_MVE" @@ -2873,10 +2873,10 @@ ;; (define_insn "mve_vcmpgeq_m_n_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGEQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -2889,10 +2889,10 @@ ;; (define_insn "mve_vcmpgeq_m_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGEQ_M_S)) ] "TARGET_HAVE_MVE" @@ -2905,10 +2905,10 @@ ;; (define_insn "mve_vcmpgtq_m_n_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGTQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -2921,10 +2921,10 @@ ;; (define_insn "mve_vcmpgtq_m_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGTQ_M_S)) ] "TARGET_HAVE_MVE" @@ -2937,10 +2937,10 @@ ;; (define_insn "mve_vcmphiq_m_n_u<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPHIQ_M_N_U)) ] "TARGET_HAVE_MVE" @@ -2953,10 +2953,10 @@ ;; (define_insn "mve_vcmphiq_m_u<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPHIQ_M_U)) ] "TARGET_HAVE_MVE" @@ -2969,10 +2969,10 @@ ;; (define_insn "mve_vcmpleq_m_n_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLEQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -2985,10 +2985,10 @@ ;; (define_insn "mve_vcmpleq_m_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLEQ_M_S)) ] "TARGET_HAVE_MVE" @@ -3001,10 +3001,10 @@ ;; (define_insn "mve_vcmpltq_m_n_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLTQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -3017,10 +3017,10 @@ ;; (define_insn "mve_vcmpltq_m_s<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLTQ_M_S)) ] "TARGET_HAVE_MVE" @@ -3033,10 +3033,10 @@ ;; (define_insn "mve_vcmpneq_m_n_<supf><mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPNEQ_M_N)) ] "TARGET_HAVE_MVE" @@ -3049,10 +3049,10 @@ ;; (define_insn "mve_vcmpneq_m_<supf><mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_2 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPNEQ_M)) ] "TARGET_HAVE_MVE" @@ -3068,7 +3068,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VDUPQ_M_N)) ] "TARGET_HAVE_MVE" @@ -3084,7 +3084,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMAXAQ_M_S)) ] "TARGET_HAVE_MVE" @@ -3100,7 +3100,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMAXAVQ_P_S)) ] "TARGET_HAVE_MVE" @@ -3116,7 +3116,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMAXVQ_P)) ] "TARGET_HAVE_MVE" @@ -3132,7 +3132,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMINAQ_M_S)) ] "TARGET_HAVE_MVE" @@ -3148,7 +3148,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMINAVQ_P_S)) ] "TARGET_HAVE_MVE" @@ -3164,7 +3164,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMINVQ_P)) ] "TARGET_HAVE_MVE" @@ -3196,7 +3196,7 @@ (set (match_operand:SI 0 "s_register_operand" "=Te") (unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLADAVQ_P)) ] "TARGET_HAVE_MVE" @@ -3212,7 +3212,7 @@ (set (match_operand:SI 0 "s_register_operand" "=Te") (unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLADAVXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -3260,7 +3260,7 @@ (set (match_operand:SI 0 "s_register_operand" "=Te") (unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLSDAVQ_P_S)) ] "TARGET_HAVE_MVE" @@ -3276,7 +3276,7 @@ (set (match_operand:SI 0 "s_register_operand" "=Te") (unspec:SI [(match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLSDAVXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -3292,7 +3292,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMVNQ_M)) ] "TARGET_HAVE_MVE" @@ -3308,7 +3308,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VNEGQ_M_S)) ] "TARGET_HAVE_MVE" @@ -3324,7 +3324,7 @@ (set (match_operand:MVE_1 0 "s_register_operand" "=w") (unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" "w") (match_operand:MVE_1 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VPSELQ)) ] "TARGET_HAVE_MVE" @@ -3340,7 +3340,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQABSQ_M_S)) ] "TARGET_HAVE_MVE" @@ -3388,7 +3388,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQNEGQ_M_S)) ] "TARGET_HAVE_MVE" @@ -3500,7 +3500,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:SI 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQRSHLQ_M_N)) ] "TARGET_HAVE_MVE" @@ -3516,7 +3516,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:SI 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQSHLQ_M_R)) ] "TARGET_HAVE_MVE" @@ -3532,7 +3532,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VREV64Q_M)) ] "TARGET_HAVE_MVE" @@ -3548,7 +3548,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:SI 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRSHLQ_M_N)) ] "TARGET_HAVE_MVE" @@ -3564,7 +3564,7 @@ (set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:SI 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VSHLQ_M_R)) ] "TARGET_HAVE_MVE" @@ -3723,7 +3723,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VABSQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3739,7 +3739,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VADDLVAQ_P)) ] "TARGET_HAVE_MVE" @@ -3770,10 +3770,10 @@ ;; (define_insn "mve_vcmpeqq_m_n_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPEQQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3786,10 +3786,10 @@ ;; (define_insn "mve_vcmpgeq_m_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGEQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3802,10 +3802,10 @@ ;; (define_insn "mve_vcmpgeq_m_n_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGEQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3818,10 +3818,10 @@ ;; (define_insn "mve_vcmpgtq_m_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGTQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3834,10 +3834,10 @@ ;; (define_insn "mve_vcmpgtq_m_n_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPGTQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3850,10 +3850,10 @@ ;; (define_insn "mve_vcmpleq_m_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLEQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3866,10 +3866,10 @@ ;; (define_insn "mve_vcmpleq_m_n_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLEQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3882,10 +3882,10 @@ ;; (define_insn "mve_vcmpltq_m_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLTQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3898,10 +3898,10 @@ ;; (define_insn "mve_vcmpltq_m_n_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPLTQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3914,10 +3914,10 @@ ;; (define_insn "mve_vcmpneq_m_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPNEQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3930,10 +3930,10 @@ ;; (define_insn "mve_vcmpneq_m_n_f<mode>" [ - (set (match_operand:HI 0 "vpr_register_operand" "=Up") - (unspec:HI [(match_operand:MVE_0 1 "s_register_operand" "w") + (set (match_operand:<MVE_VPRED> 0 "vpr_register_operand" "=Up") + (unspec:<MVE_VPRED> [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCMPNEQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3949,7 +3949,7 @@ (set (match_operand:V8HF 0 "s_register_operand" "=w") (unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0") (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTBQ_M_F16_F32)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3965,7 +3965,7 @@ (set (match_operand:V4SF 0 "s_register_operand" "=w") (unspec:V4SF [(match_operand:V4SF 1 "s_register_operand" "0") (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTBQ_M_F32_F16)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3981,7 +3981,7 @@ (set (match_operand:V8HF 0 "s_register_operand" "=w") (unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0") (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTTQ_M_F16_F32)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -3997,7 +3997,7 @@ (set (match_operand:V4SF 0 "s_register_operand" "=w") (unspec:V4SF [(match_operand:V4SF 1 "s_register_operand" "0") (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTTQ_M_F32_F16)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4013,7 +4013,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:<V_elem> 2 "s_register_operand" "r") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VDUPQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4092,7 +4092,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMAXNMAQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4107,7 +4107,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMAXNMAVQ_P_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4123,7 +4123,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMAXNMVQ_P_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4138,7 +4138,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMINNMAQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4154,7 +4154,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMINNMAVQ_P_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4169,7 +4169,7 @@ (set (match_operand:<V_elem> 0 "s_register_operand" "=r") (unspec:<V_elem> [(match_operand:<V_elem> 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMINNMVQ_P_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4217,7 +4217,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLALDAVQ_P)) ] "TARGET_HAVE_MVE" @@ -4233,7 +4233,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLALDAVXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -4280,7 +4280,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLSLDAVQ_P_S)) ] "TARGET_HAVE_MVE" @@ -4296,7 +4296,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:MVE_5 1 "s_register_operand" "w") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMLSLDAVXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -4311,7 +4311,7 @@ (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_3 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMOVLBQ_M)) ] "TARGET_HAVE_MVE" @@ -4326,7 +4326,7 @@ (set (match_operand:<V_double_width> 0 "s_register_operand" "=w") (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_3 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMOVLTQ_M)) ] "TARGET_HAVE_MVE" @@ -4341,7 +4341,7 @@ (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMOVNBQ_M)) ] "TARGET_HAVE_MVE" @@ -4357,7 +4357,7 @@ (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMOVNTQ_M)) ] "TARGET_HAVE_MVE" @@ -4373,7 +4373,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:SI 2 "immediate_operand" "i") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VMVNQ_M_N)) ] "TARGET_HAVE_MVE" @@ -4388,7 +4388,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VNEGQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4404,7 +4404,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:SI 2 "immediate_operand" "i") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VORRQ_M_N)) ] "TARGET_HAVE_MVE" @@ -4419,7 +4419,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VPSELQ_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4435,7 +4435,7 @@ (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQMOVNBQ_M)) ] "TARGET_HAVE_MVE" @@ -4451,7 +4451,7 @@ (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQMOVNTQ_M)) ] "TARGET_HAVE_MVE" @@ -4467,7 +4467,7 @@ (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQMOVUNBQ_M_S)) ] "TARGET_HAVE_MVE" @@ -4483,7 +4483,7 @@ (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VQMOVUNTQ_M_S)) ] "TARGET_HAVE_MVE" @@ -4595,7 +4595,7 @@ (set (match_operand:V8HF 0 "s_register_operand" "=w") (unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0") (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VREV32Q_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4611,7 +4611,7 @@ (set (match_operand:MVE_3 0 "s_register_operand" "=w") (unspec:MVE_3 [(match_operand:MVE_3 1 "s_register_operand" "0") (match_operand:MVE_3 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VREV32Q_M)) ] "TARGET_HAVE_MVE" @@ -4627,7 +4627,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VREV64Q_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4659,7 +4659,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRMLALDAVHXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -4691,7 +4691,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRMLSLDAVHQ_P_S)) ] "TARGET_HAVE_MVE" @@ -4707,7 +4707,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRMLSLDAVHXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -4723,7 +4723,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRNDAQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4739,7 +4739,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRNDMQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4755,7 +4755,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRNDNQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4771,7 +4771,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRNDPQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4787,7 +4787,7 @@ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VRNDXQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4867,7 +4867,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTMQ_M)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4883,7 +4883,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTPQ_M)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4899,7 +4899,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTNQ_M)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4916,7 +4916,7 @@ (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred2>" "<MVE_constraint2>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCVTQ_M_N_FROM_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4932,7 +4932,7 @@ (set (match_operand:V16QI 0 "s_register_operand" "=w") (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "0") (match_operand:V16QI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V16BI 3 "vpr_register_operand" "Up")] VREV16Q_M)) ] "TARGET_HAVE_MVE" @@ -4948,7 +4948,7 @@ (set (match_operand:MVE_5 0 "s_register_operand" "=w") (unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VCVTQ_M_FROM_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -4964,7 +4964,7 @@ (set (match_operand:DI 0 "s_register_operand" "=r") (unspec:DI [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VRMLALDAVHQ_P)) ] "TARGET_HAVE_MVE" @@ -4997,7 +4997,7 @@ (unspec:SI [(match_operand:SI 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VABAVQ_P)) ] "TARGET_HAVE_MVE" @@ -5014,7 +5014,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "mve_imm_7" "Ra") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSHLUQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -5030,7 +5030,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSHLQ_M)) ] "TARGET_HAVE_MVE" @@ -5046,7 +5046,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "mve_imm_selective_upto_8" "Rg") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSRIQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5062,7 +5062,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSUBQ_M)) ] "TARGET_HAVE_MVE" @@ -5078,7 +5078,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:<MVE_CNVT> 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred2>" "<MVE_constraint2>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCVTQ_M_N_TO_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -5094,7 +5094,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VABDQ_M)) ] "TARGET_HAVE_MVE" @@ -5111,7 +5111,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VADDQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5128,7 +5128,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VADDQ_M)) ] "TARGET_HAVE_MVE" @@ -5145,7 +5145,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VANDQ_M)) ] "TARGET_HAVE_MVE" @@ -5162,7 +5162,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VBICQ_M)) ] "TARGET_HAVE_MVE" @@ -5179,7 +5179,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VBRSRQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5196,7 +5196,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCADDQ_ROT270_M)) ] "TARGET_HAVE_MVE" @@ -5213,7 +5213,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCADDQ_ROT90_M)) ] "TARGET_HAVE_MVE" @@ -5230,7 +5230,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VEORQ_M)) ] "TARGET_HAVE_MVE" @@ -5247,7 +5247,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VHADDQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5264,7 +5264,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VHADDQ_M)) ] "TARGET_HAVE_MVE" @@ -5281,7 +5281,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VHSUBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5298,7 +5298,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VHSUBQ_M)) ] "TARGET_HAVE_MVE" @@ -5315,7 +5315,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMAXQ_M)) ] "TARGET_HAVE_MVE" @@ -5332,7 +5332,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMINQ_M)) ] "TARGET_HAVE_MVE" @@ -5349,7 +5349,7 @@ (unspec:SI [(match_operand:SI 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLADAVAQ_P)) ] "TARGET_HAVE_MVE" @@ -5366,7 +5366,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLAQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5383,7 +5383,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLASQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5400,7 +5400,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULHQ_M)) ] "TARGET_HAVE_MVE" @@ -5417,7 +5417,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULLBQ_INT_M)) ] "TARGET_HAVE_MVE" @@ -5434,7 +5434,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULLTQ_INT_M)) ] "TARGET_HAVE_MVE" @@ -5451,7 +5451,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5468,7 +5468,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULQ_M)) ] "TARGET_HAVE_MVE" @@ -5485,7 +5485,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VORNQ_M)) ] "TARGET_HAVE_MVE" @@ -5502,7 +5502,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VORRQ_M)) ] "TARGET_HAVE_MVE" @@ -5519,7 +5519,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQADDQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5536,7 +5536,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQADDQ_M)) ] "TARGET_HAVE_MVE" @@ -5553,7 +5553,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMLAHQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -5570,7 +5570,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMLASHQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -5587,7 +5587,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMLAHQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -5604,7 +5604,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMLASHQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -5621,7 +5621,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRSHLQ_M)) ] "TARGET_HAVE_MVE" @@ -5638,7 +5638,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "immediate_operand" "i") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSHLQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5655,7 +5655,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSHLQ_M)) ] "TARGET_HAVE_MVE" @@ -5672,7 +5672,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSUBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5689,7 +5689,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSUBQ_M)) ] "TARGET_HAVE_MVE" @@ -5706,7 +5706,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRHADDQ_M)) ] "TARGET_HAVE_MVE" @@ -5723,7 +5723,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRMULHQ_M)) ] "TARGET_HAVE_MVE" @@ -5740,7 +5740,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRSHLQ_M)) ] "TARGET_HAVE_MVE" @@ -5757,7 +5757,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred2>" "<MVE_constraint2>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRSHRQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5774,7 +5774,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "immediate_operand" "i") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSHLQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5791,7 +5791,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred2>" "<MVE_constraint2>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSHRQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5808,7 +5808,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred>" "<MVE_constraint>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSLIQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5825,7 +5825,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSUBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -5842,7 +5842,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VHCADDQ_ROT270_M_S)) ] "TARGET_HAVE_MVE" @@ -5859,7 +5859,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VHCADDQ_ROT90_M_S)) ] "TARGET_HAVE_MVE" @@ -5876,7 +5876,7 @@ (unspec:SI [(match_operand:SI 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLADAVAXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -5893,7 +5893,7 @@ (unspec:SI [(match_operand:SI 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLSDAVAQ_P_S)) ] "TARGET_HAVE_MVE" @@ -5910,7 +5910,7 @@ (unspec:SI [(match_operand:SI 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLSDAVAXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -5927,7 +5927,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMLADHQ_M_S)) ] "TARGET_HAVE_MVE" @@ -5944,7 +5944,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMLADHXQ_M_S)) ] "TARGET_HAVE_MVE" @@ -5961,7 +5961,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMLSDHQ_M_S)) ] "TARGET_HAVE_MVE" @@ -5978,7 +5978,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMLSDHXQ_M_S)) ] "TARGET_HAVE_MVE" @@ -5995,7 +5995,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMULHQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6012,7 +6012,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMULHQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6029,7 +6029,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMLADHQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6046,7 +6046,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMLADHXQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6063,7 +6063,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMLSDHQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6080,7 +6080,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMLSDHXQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6097,7 +6097,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMULHQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6114,7 +6114,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:MVE_2 2 "s_register_operand" "w") (match_operand:MVE_2 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRDMULHQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6131,7 +6131,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:MVE_5 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLALDAVAQ_P)) ] "TARGET_HAVE_MVE" @@ -6148,7 +6148,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:MVE_5 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLALDAVAXQ_P)) ] "TARGET_HAVE_MVE" @@ -6165,7 +6165,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "mve_imm_8" "Rb") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRSHRNBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6182,7 +6182,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "mve_imm_8" "Rb") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRSHRNTQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6199,7 +6199,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSHRNBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6216,7 +6216,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSHRNTQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6233,7 +6233,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRMLALDAVHAQ_P_S)) ] "TARGET_HAVE_MVE" @@ -6250,7 +6250,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "mve_imm_8" "Rb") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRSHRNBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6267,7 +6267,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "mve_imm_8" "Rb") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRSHRNTQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6284,7 +6284,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_3 2 "s_register_operand" "w") (match_operand:SI 3 "immediate_operand" "i") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSHLLBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6301,7 +6301,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_3 2 "s_register_operand" "w") (match_operand:SI 3 "immediate_operand" "i") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSHLLTQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6318,7 +6318,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSHRNBQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6335,7 +6335,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSHRNTQ_M_N)) ] "TARGET_HAVE_MVE" @@ -6352,7 +6352,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:MVE_5 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLSLDAVAQ_P_S)) ] "TARGET_HAVE_MVE" @@ -6369,7 +6369,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:MVE_5 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMLSLDAVAXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -6386,7 +6386,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_3 2 "s_register_operand" "w") (match_operand:MVE_3 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULLBQ_POLY_M_P)) ] "TARGET_HAVE_MVE" @@ -6403,7 +6403,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_3 2 "s_register_operand" "w") (match_operand:MVE_3 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULLTQ_POLY_M_P)) ] "TARGET_HAVE_MVE" @@ -6420,7 +6420,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMULLBQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6437,7 +6437,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:MVE_5 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMULLBQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6454,7 +6454,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMULLTQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6471,7 +6471,7 @@ (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:MVE_5 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQDMULLTQ_M_S)) ] "TARGET_HAVE_MVE" @@ -6488,7 +6488,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "mve_imm_8" "Rb") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRSHRUNBQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6505,7 +6505,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQRSHRUNTQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6522,7 +6522,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSHRUNBQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6539,7 +6539,7 @@ (unspec:<V_narrow_pack> [(match_operand:<V_narrow_pack> 1 "s_register_operand" "0") (match_operand:MVE_5 2 "s_register_operand" "w") (match_operand:SI 3 "<MVE_pred3>" "<MVE_constraint3>") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VQSHRUNTQ_M_N_S)) ] "TARGET_HAVE_MVE" @@ -6556,7 +6556,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRMLALDAVHAQ_P_U)) ] "TARGET_HAVE_MVE" @@ -6573,7 +6573,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRMLALDAVHAXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -6590,7 +6590,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRMLSLDAVHAQ_P_S)) ] "TARGET_HAVE_MVE" @@ -6607,7 +6607,7 @@ (unspec:DI [(match_operand:DI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VRMLSLDAVHAXQ_P_S)) ] "TARGET_HAVE_MVE" @@ -6623,7 +6623,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VABDQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6640,7 +6640,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VADDQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6657,7 +6657,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VADDQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6674,7 +6674,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VANDQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6691,7 +6691,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VBICQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6708,7 +6708,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:SI 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VBRSRQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6725,7 +6725,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCADDQ_ROT270_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6742,7 +6742,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCADDQ_ROT90_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6759,7 +6759,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMLAQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6776,7 +6776,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMLAQ_ROT180_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6793,7 +6793,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMLAQ_ROT270_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6810,7 +6810,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMLAQ_ROT90_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6827,7 +6827,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMULQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6844,7 +6844,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMULQ_ROT180_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6861,7 +6861,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMULQ_ROT270_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6878,7 +6878,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VCMULQ_ROT90_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6895,7 +6895,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VEORQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6912,7 +6912,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VFMAQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6929,7 +6929,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VFMAQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6946,7 +6946,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VFMASQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6963,7 +6963,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VFMSQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6980,7 +6980,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMAXNMQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -6997,7 +6997,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMINNMQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7014,7 +7014,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7031,7 +7031,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VMULQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7048,7 +7048,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VORNQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7065,7 +7065,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VORRQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7082,7 +7082,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:MVE_0 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSUBQ_M_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7099,7 +7099,7 @@ (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0") (match_operand:MVE_0 2 "s_register_operand" "w") (match_operand:<V_elem> 3 "s_register_operand" "r") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")] VSUBQ_M_N_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7248,7 +7248,7 @@ [(match_operand:<MVE_B_ELEM> 0 "mve_scatter_memory") (match_operand:MVE_2 1 "s_register_operand") (match_operand:MVE_2 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand" "Up") + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up") (unspec:V4SI [(const_int 0)] VSTRBSOQ)] "TARGET_HAVE_MVE" { @@ -7267,7 +7267,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:MVE_2 1 "s_register_operand" "w") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VSTRBSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrbt.<V_sz_elem>\t%q2, [%0, %q1]" @@ -7282,7 +7282,7 @@ [(match_operand:V4SI 0 "s_register_operand" "w") (match_operand:SI 1 "immediate_operand" "i") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VSTRWSBQ)) ] "TARGET_HAVE_MVE" @@ -7302,7 +7302,7 @@ (define_insn "mve_vstrbq_p_<supf><mode>" [(set (match_operand:<MVE_B_ELEM> 0 "mve_memory_operand" "=Ux") (unspec:<MVE_B_ELEM> [(match_operand:MVE_2 1 "s_register_operand" "w") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")] VSTRBQ)) ] "TARGET_HAVE_MVE" @@ -7323,7 +7323,7 @@ [(set (match_operand:MVE_2 0 "s_register_operand" "=&w") (unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "memory_operand" "Us") (match_operand:MVE_2 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VLDRBGOQ)) ] "TARGET_HAVE_MVE" @@ -7347,7 +7347,7 @@ (define_insn "mve_vldrbq_z_<supf><mode>" [(set (match_operand:MVE_2 0 "s_register_operand" "=w") (unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "mve_memory_operand" "Ux") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")] VLDRBQ)) ] "TARGET_HAVE_MVE" @@ -7371,7 +7371,7 @@ [(set (match_operand:V4SI 0 "s_register_operand" "=&w") (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VLDRWGBQ)) ] "TARGET_HAVE_MVE" @@ -7434,7 +7434,7 @@ [(set (match_operand:MVE_6 0 "s_register_operand" "=&w") (unspec:MVE_6 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us") (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up") + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up") ]VLDRHGOQ)) ] "TARGET_HAVE_MVE" @@ -7482,7 +7482,7 @@ [(set (match_operand:MVE_6 0 "s_register_operand" "=&w") (unspec:MVE_6 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us") (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up") + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up") ]VLDRHGSOQ)) ] "TARGET_HAVE_MVE" @@ -7528,7 +7528,7 @@ (define_insn "mve_vldrhq_z_fv8hf" [(set (match_operand:V8HF 0 "s_register_operand" "=w") (unspec:V8HF [(match_operand:V8HI 1 "mve_memory_operand" "Ux") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")] VLDRHQ_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7548,7 +7548,7 @@ (define_insn "mve_vldrhq_z_<supf><mode>" [(set (match_operand:MVE_6 0 "s_register_operand" "=w") (unspec:MVE_6 [(match_operand:<MVE_H_ELEM> 1 "mve_memory_operand" "Ux") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")] VLDRHQ)) ] "TARGET_HAVE_MVE" @@ -7609,7 +7609,7 @@ (define_insn "mve_vldrwq_z_fv4sf" [(set (match_operand:V4SF 0 "s_register_operand" "=w") (unspec:V4SF [(match_operand:V4SI 1 "mve_memory_operand" "Ux") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:V4BI 2 "vpr_register_operand" "Up")] VLDRWQ_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7629,7 +7629,7 @@ (define_insn "mve_vldrwq_z_<supf>v4si" [(set (match_operand:V4SI 0 "s_register_operand" "=w") (unspec:V4SI [(match_operand:V4SI 1 "mve_memory_operand" "Ux") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:V4BI 2 "vpr_register_operand" "Up")] VLDRWQ)) ] "TARGET_HAVE_MVE" @@ -7813,7 +7813,7 @@ [(set (match_operand:V8HF 0 "s_register_operand" "=&w") (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") (match_operand:V8HI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V8BI 3 "vpr_register_operand" "Up")] VLDRHQGO_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7855,7 +7855,7 @@ [(set (match_operand:V8HF 0 "s_register_operand" "=&w") (unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us") (match_operand:V8HI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V8BI 3 "vpr_register_operand" "Up")] VLDRHQGSO_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7897,7 +7897,7 @@ [(set (match_operand:V4SF 0 "s_register_operand" "=&w") (unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:SI 2 "immediate_operand" "i") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VLDRWQGB_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7958,7 +7958,7 @@ [(set (match_operand:V4SF 0 "s_register_operand" "=&w") (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VLDRWQGO_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -7980,7 +7980,7 @@ [(set (match_operand:V4SI 0 "s_register_operand" "=&w") (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VLDRWGOQ)) ] "TARGET_HAVE_MVE" @@ -8042,7 +8042,7 @@ [(set (match_operand:V4SF 0 "s_register_operand" "=&w") (unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VLDRWQGSO_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -8064,7 +8064,7 @@ [(set (match_operand:V4SI 0 "s_register_operand" "=&w") (unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VLDRWGSOQ)) ] "TARGET_HAVE_MVE" @@ -8104,7 +8104,7 @@ (define_insn "mve_vstrhq_p_fv8hf" [(set (match_operand:V8HI 0 "mve_memory_operand" "=Ux") (unspec:V8HI [(match_operand:V8HF 1 "s_register_operand" "w") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:V8BI 2 "vpr_register_operand" "Up")] VSTRHQ_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -8124,7 +8124,7 @@ (define_insn "mve_vstrhq_p_<supf><mode>" [(set (match_operand:<MVE_H_ELEM> 0 "mve_memory_operand" "=Ux") (unspec:<MVE_H_ELEM> [(match_operand:MVE_6 1 "s_register_operand" "w") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")] VSTRHQ)) ] "TARGET_HAVE_MVE" @@ -8145,7 +8145,7 @@ [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") (match_operand:MVE_6 1 "s_register_operand") (match_operand:MVE_6 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:<MVE_VPRED> 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRHSOQ)] "TARGET_HAVE_MVE" { @@ -8164,7 +8164,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:MVE_6 1 "s_register_operand" "w") (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VSTRHSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1]" @@ -8205,7 +8205,7 @@ [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory") (match_operand:MVE_6 1 "s_register_operand") (match_operand:MVE_6 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:<MVE_VPRED> 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRHSSOQ)] "TARGET_HAVE_MVE" { @@ -8224,7 +8224,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:MVE_6 1 "s_register_operand" "w") (match_operand:MVE_6 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")] VSTRHSSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]" @@ -8303,7 +8303,7 @@ (define_insn "mve_vstrwq_p_fv4sf" [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux") (unspec:V4SI [(match_operand:V4SF 1 "s_register_operand" "w") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")] VSTRWQ_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -8323,7 +8323,7 @@ (define_insn "mve_vstrwq_p_<supf>v4si" [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux") (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w") - (match_operand:HI 2 "vpr_register_operand" "Up")] + (match_operand:V4BI 2 "vpr_register_operand" "Up")] VSTRWQ)) ] "TARGET_HAVE_MVE" @@ -8576,7 +8576,7 @@ [(match_operand:V8HI 0 "mve_scatter_memory") (match_operand:V8HI 1 "s_register_operand") (match_operand:V8HF 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V8BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRHQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { @@ -8594,7 +8594,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:V8HI 1 "s_register_operand" "w") (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V8BI 3 "vpr_register_operand" "Up")] VSTRHQSO_F))] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" "vpst\;vstrht.16\t%q2, [%0, %q1]" @@ -8635,7 +8635,7 @@ [(match_operand:V8HI 0 "memory_operand" "=Us") (match_operand:V8HI 1 "s_register_operand" "w") (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up") + (match_operand:V8BI 3 "vpr_register_operand" "Up") (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { @@ -8654,7 +8654,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:V8HI 1 "s_register_operand" "w") (match_operand:V8HF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V8BI 3 "vpr_register_operand" "Up")] VSTRHQSSO_F))] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" "vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]" @@ -8691,7 +8691,7 @@ [(match_operand:V4SI 0 "s_register_operand" "w") (match_operand:SI 1 "immediate_operand" "i") (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VSTRWQSB_F)) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" @@ -8740,7 +8740,7 @@ [(match_operand:V4SI 0 "mve_scatter_memory") (match_operand:V4SI 1 "s_register_operand") (match_operand:V4SF 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRWQSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { @@ -8758,7 +8758,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VSTRWQSO_F))] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" "vpst\;vstrwt.32\t%q2, [%0, %q1]" @@ -8771,7 +8771,7 @@ [(match_operand:V4SI 0 "mve_scatter_memory") (match_operand:V4SI 1 "s_register_operand") (match_operand:V4SI 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRWSOQ)] "TARGET_HAVE_MVE" { @@ -8789,7 +8789,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VSTRWSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrwt.32\t%q2, [%0, %q1]" @@ -8858,7 +8858,7 @@ [(match_operand:V4SI 0 "mve_scatter_memory") (match_operand:V4SI 1 "s_register_operand") (match_operand:V4SF 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { @@ -8877,7 +8877,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SF 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VSTRWQSSO_F))] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" @@ -8890,7 +8890,7 @@ [(match_operand:V4SI 0 "mve_scatter_memory") (match_operand:V4SI 1 "s_register_operand") (match_operand:V4SI 2 "s_register_operand") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VSTRWSSOQ)] "TARGET_HAVE_MVE" { @@ -8909,7 +8909,7 @@ [(match_operand:SI 0 "register_operand" "r") (match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") - (match_operand:HI 3 "vpr_register_operand" "Up")] + (match_operand:V4BI 3 "vpr_register_operand" "Up")] VSTRWSSOQ))] "TARGET_HAVE_MVE" "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]" @@ -9011,7 +9011,7 @@ (match_operand:MVE_2 1 "s_register_operand") (match_operand:SI 2 "s_register_operand") (match_operand:SI 3 "mve_imm_selective_upto_8") - (match_operand:HI 4 "vpr_register_operand")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand")] "TARGET_HAVE_MVE" { rtx temp = gen_reg_rtx (SImode); @@ -9031,7 +9031,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:SI 3 "s_register_operand" "2") (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg") - (match_operand:HI 5 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 5 "vpr_register_operand" "Up")] VIDUPQ_M)) (set (match_operand:SI 2 "s_register_operand" "=Te") (plus:SI (match_dup 3) @@ -9079,7 +9079,7 @@ (match_operand:MVE_2 1 "s_register_operand") (match_operand:SI 2 "s_register_operand") (match_operand:SI 3 "mve_imm_selective_upto_8") - (match_operand:HI 4 "vpr_register_operand")] + (match_operand:<MVE_VPRED> 4 "vpr_register_operand")] "TARGET_HAVE_MVE" { rtx temp = gen_reg_rtx (SImode); @@ -9099,7 +9099,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0") (match_operand:SI 3 "s_register_operand" "2") (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg") - (match_operand:HI 5 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 5 "vpr_register_operand" "Up")] VDDUPQ_M)) (set (match_operand:SI 2 "s_register_operand" "=Te") (minus:SI (match_dup 3) @@ -9170,7 +9170,7 @@ (match_operand:SI 2 "s_register_operand") (match_operand:DI 3 "s_register_operand") (match_operand:SI 4 "mve_imm_selective_upto_8") - (match_operand:HI 5 "vpr_register_operand")] + (match_operand:<MVE_VPRED> 5 "vpr_register_operand")] "TARGET_HAVE_MVE" { rtx ignore_wb = gen_reg_rtx (SImode); @@ -9190,7 +9190,7 @@ (match_operand:SI 2 "s_register_operand") (match_operand:DI 3 "s_register_operand") (match_operand:SI 4 "mve_imm_selective_upto_8") - (match_operand:HI 5 "vpr_register_operand")] + (match_operand:<MVE_VPRED> 5 "vpr_register_operand")] "TARGET_HAVE_MVE" { rtx ignore_vec = gen_reg_rtx (<MODE>mode); @@ -9210,7 +9210,7 @@ (match_operand:SI 3 "s_register_operand" "1") (subreg:SI (match_operand:DI 4 "s_register_operand" "r") 4) (match_operand:SI 5 "mve_imm_selective_upto_8" "Rg") - (match_operand:HI 6 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 6 "vpr_register_operand" "Up")] VDWDUPQ_M)) (set (match_operand:SI 1 "s_register_operand" "=Te") (unspec:SI [(match_dup 2) @@ -9287,7 +9287,7 @@ (match_operand:SI 2 "s_register_operand") (match_operand:DI 3 "s_register_operand") (match_operand:SI 4 "mve_imm_selective_upto_8") - (match_operand:HI 5 "vpr_register_operand")] + (match_operand:<MVE_VPRED> 5 "vpr_register_operand")] "TARGET_HAVE_MVE" { rtx ignore_wb = gen_reg_rtx (SImode); @@ -9307,7 +9307,7 @@ (match_operand:SI 2 "s_register_operand") (match_operand:DI 3 "s_register_operand") (match_operand:SI 4 "mve_imm_selective_upto_8") - (match_operand:HI 5 "vpr_register_operand")] + (match_operand:<MVE_VPRED> 5 "vpr_register_operand")] "TARGET_HAVE_MVE" { rtx ignore_vec = gen_reg_rtx (<MODE>mode); @@ -9327,7 +9327,7 @@ (match_operand:SI 3 "s_register_operand" "1") (subreg:SI (match_operand:DI 4 "s_register_operand" "r") 4) (match_operand:SI 5 "mve_imm_selective_upto_8" "Rg") - (match_operand:HI 6 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 6 "vpr_register_operand" "Up")] VIWDUPQ_M)) (set (match_operand:SI 1 "s_register_operand" "=Te") (unspec:SI [(match_dup 2) @@ -9376,7 +9376,7 @@ [(match_operand:V4SI 1 "s_register_operand" "0") (match_operand:SI 2 "mve_vldrd_immediate" "Ri") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand")] + (match_operand:V4BI 4 "vpr_register_operand")] VSTRWSBWBQ)) (set (match_operand:V4SI 0 "s_register_operand" "=w") (unspec:V4SI [(match_dup 1) (match_dup 2)] @@ -9427,7 +9427,7 @@ [(match_operand:V4SI 1 "s_register_operand" "0") (match_operand:SI 2 "mve_vldrd_immediate" "Ri") (match_operand:V4SF 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand")] + (match_operand:V4BI 4 "vpr_register_operand")] VSTRWQSBWB_F)) (set (match_operand:V4SI 0 "s_register_operand" "=w") (unspec:V4SI [(match_dup 1) (match_dup 2)] @@ -9551,7 +9551,7 @@ [(match_operand:V4SI 0 "s_register_operand") (match_operand:V4SI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] "TARGET_HAVE_MVE" { @@ -9566,7 +9566,7 @@ [(match_operand:V4SI 0 "s_register_operand") (match_operand:V4SI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)] "TARGET_HAVE_MVE" { @@ -9585,7 +9585,7 @@ [(set (match_operand:V4SI 0 "s_register_operand" "=&w") (unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1") (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:HI 4 "vpr_register_operand" "Up") + (match_operand:V4BI 4 "vpr_register_operand" "Up") (mem:BLK (scratch))] VLDRWGBWBQ)) (set (match_operand:V4SI 1 "s_register_operand" "=&w") @@ -9659,7 +9659,7 @@ [(match_operand:V4SI 0 "s_register_operand") (match_operand:V4SI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { @@ -9675,7 +9675,7 @@ [(match_operand:V4SF 0 "s_register_operand") (match_operand:V4SI 1 "s_register_operand") (match_operand:SI 2 "mve_vldrd_immediate") - (match_operand:HI 3 "vpr_register_operand") + (match_operand:V4BI 3 "vpr_register_operand") (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" { @@ -9694,7 +9694,7 @@ [(set (match_operand:V4SF 0 "s_register_operand" "=&w") (unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1") (match_operand:SI 3 "mve_vldrd_immediate" "Ri") - (match_operand:HI 4 "vpr_register_operand" "Up") + (match_operand:V4BI 4 "vpr_register_operand" "Up") (mem:BLK (scratch))] VLDRWQGBWB_F)) (set (match_operand:V4SI 1 "s_register_operand" "=&w") @@ -9844,7 +9844,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:V4BI 4 "vpr_register_operand" "Up")] VADCIQ_M)) (set (reg:SI VFPCC_REGNUM) (unspec:SI [(const_int 0)] @@ -9880,7 +9880,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:V4BI 4 "vpr_register_operand" "Up")] VADCQ_M)) (set (reg:SI VFPCC_REGNUM) (unspec:SI [(reg:SI VFPCC_REGNUM)] @@ -9917,7 +9917,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:V4BI 4 "vpr_register_operand" "Up")] VSBCIQ_M)) (set (reg:SI VFPCC_REGNUM) (unspec:SI [(const_int 0)] @@ -9953,7 +9953,7 @@ (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w") (match_operand:V4SI 2 "s_register_operand" "w") (match_operand:V4SI 3 "s_register_operand" "w") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:V4BI 4 "vpr_register_operand" "Up")] VSBCQ_M)) (set (reg:SI VFPCC_REGNUM) (unspec:SI [(reg:SI VFPCC_REGNUM)] @@ -10335,7 +10335,7 @@ (match_operand:MVE_2 1 "s_register_operand") (match_operand:SI 2 "s_register_operand") (match_operand:SI 3 "mve_imm_32") - (match_operand:HI 4 "vpr_register_operand") + (match_operand:<MVE_VPRED> 4 "vpr_register_operand") (unspec:MVE_2 [(const_int 0)] VSHLCQ_M)] "TARGET_HAVE_MVE" { @@ -10351,7 +10351,7 @@ (match_operand:MVE_2 1 "s_register_operand") (match_operand:SI 2 "s_register_operand") (match_operand:SI 3 "mve_imm_32") - (match_operand:HI 4 "vpr_register_operand") + (match_operand:<MVE_VPRED> 4 "vpr_register_operand") (unspec:MVE_2 [(const_int 0)] VSHLCQ_M)] "TARGET_HAVE_MVE" { @@ -10367,7 +10367,7 @@ (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0") (match_operand:SI 3 "s_register_operand" "1") (match_operand:SI 4 "mve_imm_32" "Rf") - (match_operand:HI 5 "vpr_register_operand" "Up")] + (match_operand:<MVE_VPRED> 5 "vpr_register_operand" "Up")] VSHLCQ_M)) (set (match_operand:SI 1 "s_register_operand" "=r") (unspec:SI [(match_dup 2) @@ -10457,7 +10457,7 @@ (unspec:V16QI [(match_operand:SI 1 "const_int_coproc_operand" "i") (match_operand:V16QI 2 "register_operand" "0") (match_operand:SI 3 "const_int_mve_cde1_operand" "i") - (match_operand:HI 4 "vpr_register_operand" "Up")] + (match_operand:V16BI 4 "vpr_register_operand" "Up")] CDE_VCX))] "TARGET_CDE && TARGET_HAVE_MVE" "vpst\;vcx1<a>t\\tp%c1, %q0, #%c3" @@ -10471,7 +10471,7 @@ (match_operand:V16QI 2 "register_operand" "0") (match_operand:V16QI 3 "register_operand" "t") (match_operand:SI 4 "const_int_mve_cde2_operand" "i") - (match_operand:HI 5 "vpr_register_operand" "Up")] + (match_operand:V16BI 5 "vpr_register_operand" "Up")] CDE_VCX))] "TARGET_CDE && TARGET_HAVE_MVE" "vpst\;vcx2<a>t\\tp%c1, %q0, %q3, #%c4" @@ -10486,7 +10486,7 @@ (match_operand:V16QI 3 "register_operand" "t") (match_operand:V16QI 4 "register_operand" "t") (match_operand:SI 5 "const_int_mve_cde3_operand" "i") - (match_operand:HI 6 "vpr_register_operand" "Up")] + (match_operand:V16BI 6 "vpr_register_operand" "Up")] CDE_VCX))] "TARGET_CDE && TARGET_HAVE_MVE" "vpst\;vcx3<a>t\\tp%c1, %q0, %q3, %q4, #%c5" @@ -10516,3 +10516,65 @@ "vldr<V_sz_elem1>.<V_sz_elem>\t%q0, %E1" [(set_attr "type" "mve_load")] ) + +;; Expander for VxBI moves +(define_expand "mov<mode>" + [(set (match_operand:MVE_7 0 "nonimmediate_operand") + (match_operand:MVE_7 1 "general_operand"))] + "TARGET_HAVE_MVE" + { + if (!register_operand (operands[0], <MODE>mode)) + operands[1] = force_reg (<MODE>mode, operands[1]); + } +) + +;; Expanders for vec_cmp and vcond + +(define_expand "vec_cmp<mode><MVE_vpred>" + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand") + (match_operator:<MVE_VPRED> 1 "comparison_operator" + [(match_operand:MVE_VLD_ST 2 "s_register_operand") + (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))] + "TARGET_HAVE_MVE + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + +(define_expand "vec_cmpu<mode><MVE_vpred>" + [(set (match_operand:<MVE_VPRED> 0 "s_register_operand") + (match_operator:<MVE_VPRED> 1 "comparison_operator" + [(match_operand:MVE_2 2 "s_register_operand") + (match_operand:MVE_2 3 "reg_or_zero_operand")]))] + "TARGET_HAVE_MVE" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + +(define_expand "vcond_mask_<mode><MVE_vpred>" + [(set (match_operand:MVE_VLD_ST 0 "s_register_operand") + (if_then_else:MVE_VLD_ST + (match_operand:<MVE_VPRED> 3 "s_register_operand") + (match_operand:MVE_VLD_ST 1 "s_register_operand") + (match_operand:MVE_VLD_ST 2 "s_register_operand")))] + "TARGET_HAVE_MVE" +{ + switch (GET_MODE_CLASS (<MODE>mode)) + { + case MODE_VECTOR_INT: + emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], + operands[1], operands[2], operands[3])); + break; + case MODE_VECTOR_FLOAT: + emit_insn (gen_mve_vpselq_f (<MODE>mode, operands[0], + operands[1], operands[2], operands[3])); + break; + default: + gcc_unreachable (); + } + DONE; +}) diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index e06c824..f270ded 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1394,6 +1394,45 @@ [(set_attr "type" "neon_qsub<q>")] ) +(define_expand "vec_cmp<mode><v_cmp_result>" + [(set (match_operand:<V_cmp_result> 0 "s_register_operand") + (match_operator:<V_cmp_result> 1 "comparison_operator" + [(match_operand:VDQWH 2 "s_register_operand") + (match_operand:VDQWH 3 "reg_or_zero_operand")]))] + "TARGET_NEON + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VDQIW 0 "s_register_operand") + (match_operator:VDQIW 1 "comparison_operator" + [(match_operand:VDQIW 2 "s_register_operand") + (match_operand:VDQIW 3 "reg_or_zero_operand")]))] + "TARGET_NEON" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false); + DONE; +}) + +(define_expand "vcond_mask_<mode><v_cmp_result>" + [(set (match_operand:VDQWH 0 "s_register_operand") + (if_then_else:VDQWH + (match_operand:<V_cmp_result> 3 "s_register_operand") + (match_operand:VDQWH 1 "s_register_operand") + (match_operand:VDQWH 2 "s_register_operand")))] + "TARGET_NEON + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1], + operands[2])); + DONE; +}) + ;; Patterns for builtins. ; good for plain vadd, vaddq. @@ -2866,20 +2905,49 @@ }) -;; These instructions map to the __builtins for the Dot Product operations. -(define_insn "neon_<sup>dot<vsi2qi>" +;; These map to the auto-vectorizer Dot Product optab. +;; The auto-vectorizer expects a dot product builtin that also does an +;; accumulation into the provided register. +;; Given the following pattern +;; +;; for (i=0; i<len; i++) { +;; c = a[i] * b[i]; +;; r += c; +;; } +;; return result; +;; +;; This can be auto-vectorized to +;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; +;; +;; given enough iterations. However the vectorizer can keep unrolling the loop +;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; +;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; +;; ... +;; +;; and so the vectorizer provides r, in which the result has to be accumulated. +(define_insn "<sup>dot_prod<vsi2qi>" [(set (match_operand:VCVTI 0 "register_operand" "=w") - (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") - (unspec:VCVTI [(match_operand:<VSI2QI> 2 - "register_operand" "w") - (match_operand:<VSI2QI> 3 - "register_operand" "w")] - DOTPROD)))] + (plus:VCVTI + (unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w") + (match_operand:<VSI2QI> 2 "register_operand" "w")] + DOTPROD) + (match_operand:VCVTI 3 "register_operand" "0")))] "TARGET_DOTPROD" - "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3" + "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set_attr "type" "neon_dot<q>")] ) +;; These instructions map to the __builtins for the Dot Product operations +(define_expand "neon_<sup>dot<vsi2qi>" + [(set (match_operand:VCVTI 0 "register_operand" "=w") + (plus:VCVTI + (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand") + (match_operand:<VSI2QI> 3 "register_operand")] + DOTPROD) + (match_operand:VCVTI 1 "register_operand")))] + "TARGET_DOTPROD" +) + ;; These instructions map to the __builtins for the Dot Product operations. (define_insn "neon_usdot<vsi2qi>" [(set (match_operand:VCVTI 0 "register_operand" "=w") @@ -2898,17 +2966,40 @@ ;; indexed operations. (define_insn "neon_<sup>dot_lane<vsi2qi>" [(set (match_operand:VCVTI 0 "register_operand" "=w") - (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0") - (unspec:VCVTI [(match_operand:<VSI2QI> 2 - "register_operand" "w") - (match_operand:V8QI 3 "register_operand" "t") - (match_operand:SI 4 "immediate_operand" "i")] - DOTPROD)))] + (plus:VCVTI + (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w") + (match_operand:V8QI 3 "register_operand" "t") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD) + (match_operand:VCVTI 1 "register_operand" "0")))] + "TARGET_DOTPROD" + "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; + [(set_attr "type" "neon_dot<q>")] +) + +;; These instructions map to the __builtins for the Dot Product +;; indexed operations. +(define_insn "neon_<sup>dot_laneq<vsi2qi>" + [(set (match_operand:VCVTI 0 "register_operand" "=w") + (plus:VCVTI + (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "t") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD) + (match_operand:VCVTI 1 "register_operand" "0")))] "TARGET_DOTPROD" { - operands[4] - = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); - return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; + int lane = INTVAL (operands[4]); + if (lane > GET_MODE_NUNITS (V2SImode) - 1) + { + operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode)); + return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; + } + else + { + operands[4] = GEN_INT (lane); + return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; + } } [(set_attr "type" "neon_dot<q>")] ) @@ -2925,50 +3016,37 @@ DOTPROD_I8MM) (match_operand:VCVTI 1 "register_operand" "0")))] "TARGET_I8MM" + "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]" + [(set_attr "type" "neon_dot<q>")] +) + +;; These instructions map to the __builtins for the Dot Product +;; indexed operations in the v8.6 I8MM extension. +(define_insn "neon_<sup>dot_laneq<vsi2qi>" + [(set (match_operand:VCVTI 0 "register_operand" "=w") + (plus:VCVTI + (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "t") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD_I8MM) + (match_operand:VCVTI 1 "register_operand" "0")))] + "TARGET_I8MM" { - operands[4] = GEN_INT (INTVAL (operands[4])); - return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]"; + int lane = INTVAL (operands[4]); + if (lane > GET_MODE_NUNITS (V2SImode) - 1) + { + operands[4] = GEN_INT (lane - GET_MODE_NUNITS (V2SImode)); + return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %f3[%c4]"; + } + else + { + operands[4] = GEN_INT (lane); + return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %e3[%c4]"; + } } [(set_attr "type" "neon_dot<q>")] ) -;; These expands map to the Dot Product optab the vectorizer checks for. -;; The auto-vectorizer expects a dot product builtin that also does an -;; accumulation into the provided register. -;; Given the following pattern -;; -;; for (i=0; i<len; i++) { -;; c = a[i] * b[i]; -;; r += c; -;; } -;; return result; -;; -;; This can be auto-vectorized to -;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; -;; -;; given enough iterations. However the vectorizer can keep unrolling the loop -;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; -;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; -;; ... -;; -;; and so the vectorizer provides r, in which the result has to be accumulated. -(define_expand "<sup>dot_prod<vsi2qi>" - [(set (match_operand:VCVTI 0 "register_operand") - (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1 - "register_operand") - (match_operand:<VSI2QI> 2 - "register_operand")] - DOTPROD) - (match_operand:VCVTI 3 "register_operand")))] - "TARGET_DOTPROD" -{ - emit_insn ( - gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1], - operands[2])); - emit_insn (gen_rtx_SET (operands[0], operands[3])); - DONE; -}) - ;; Auto-vectorizer pattern for usdot (define_expand "usdot_prod<vsi2qi>" [(set (match_operand:VCVTI 0 "register_operand") diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index cef358e..f130090 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -265,18 +265,18 @@ ;; remainder. Because of this, expand early. (define_expand "cml<fcmac1><conj_op><mode>4" [(set (match_operand:VF 0 "register_operand") - (plus:VF (match_operand:VF 1 "register_operand") - (unspec:VF [(match_operand:VF 2 "register_operand") - (match_operand:VF 3 "register_operand")] - VCMLA_OP)))] + (plus:VF (unspec:VF [(match_operand:VF 1 "register_operand") + (match_operand:VF 2 "register_operand")] + VCMLA_OP) + (match_operand:VF 3 "register_operand")))] "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT && ARM_HAVE_<MODE>_ARITH)) && !BYTES_BIG_ENDIAN" { rtx tmp = gen_reg_rtx (<MODE>mode); - emit_insn (gen_arm_vcmla<rotsplit1><mode> (tmp, operands[1], - operands[3], operands[2])); + emit_insn (gen_arm_vcmla<rotsplit1><mode> (tmp, operands[3], + operands[2], operands[1])); emit_insn (gen_arm_vcmla<rotsplit2><mode> (operands[0], tmp, - operands[3], operands[2])); + operands[2], operands[1])); DONE; }) @@ -363,33 +363,6 @@ } }) -(define_expand "vec_cmp<mode><v_cmp_result>" - [(set (match_operand:<V_cmp_result> 0 "s_register_operand") - (match_operator:<V_cmp_result> 1 "comparison_operator" - [(match_operand:VDQWH 2 "s_register_operand") - (match_operand:VDQWH 3 "reg_or_zero_operand")]))] - "ARM_HAVE_<MODE>_ARITH - && !TARGET_REALLY_IWMMXT - && (!<Is_float_mode> || flag_unsafe_math_optimizations)" -{ - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), - operands[2], operands[3], false, false); - DONE; -}) - -(define_expand "vec_cmpu<mode><mode>" - [(set (match_operand:VDQIW 0 "s_register_operand") - (match_operator:VDQIW 1 "comparison_operator" - [(match_operand:VDQIW 2 "s_register_operand") - (match_operand:VDQIW 3 "reg_or_zero_operand")]))] - "ARM_HAVE_<MODE>_ARITH - && !TARGET_REALLY_IWMMXT" -{ - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), - operands[2], operands[3], false, false); - DONE; -}) - ;; Conditional instructions. These are comparisons with conditional moves for ;; vectors. They perform the assignment: ;; @@ -461,31 +434,6 @@ DONE; }) -(define_expand "vcond_mask_<mode><v_cmp_result>" - [(set (match_operand:VDQWH 0 "s_register_operand") - (if_then_else:VDQWH - (match_operand:<V_cmp_result> 3 "s_register_operand") - (match_operand:VDQWH 1 "s_register_operand") - (match_operand:VDQWH 2 "s_register_operand")))] - "ARM_HAVE_<MODE>_ARITH - && !TARGET_REALLY_IWMMXT - && (!<Is_float_mode> || flag_unsafe_math_optimizations)" -{ - if (TARGET_NEON) - { - emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3], - operands[1], operands[2])); - } - else if (TARGET_HAVE_MVE) - { - emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], - operands[1], operands[2], operands[3])); - } - else - gcc_unreachable (); - DONE; -}) - (define_expand "vec_load_lanesoi<mode>" [(set (match_operand:OI 0 "s_register_operand") (unspec:OI [(match_operand:OI 1 "neon_struct_operand") diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index f5ccb92..d0f423c 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -73,21 +73,26 @@ (define_insn "*thumb2_movhi_vfp" [(set - (match_operand:HI 0 "nonimmediate_operand" + (match_operand:MVE_7_HI 0 "nonimmediate_operand" "=rk, r, l, r, m, r, *t, r, *t, Up, r") - (match_operand:HI 1 "general_operand" - "rk, I, Py, n, r, m, r, *t, *t, r, Up"))] + (match_operand:MVE_7_HI 1 "general_operand" + "rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))] "TARGET_THUMB2 && TARGET_VFP_BASE && !TARGET_VFP_FP16INST - && (register_operand (operands[0], HImode) - || register_operand (operands[1], HImode))" + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" { switch (which_alternative) { case 0: - case 1: case 2: return "mov%?\t%0, %1\t%@ movhi"; + case 1: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL) + operands[1] = mve_bool_vec_to_const (operands[1]); + else + operands[1] = gen_lowpart (HImode, operands[1]); + return "mov%?\t%0, %1\t%@ movhi"; case 3: return "movw%?\t%0, %L1\t%@ movhi"; case 4: @@ -173,20 +178,25 @@ (define_insn "*thumb2_movhi_fp16" [(set - (match_operand:HI 0 "nonimmediate_operand" + (match_operand:MVE_7_HI 0 "nonimmediate_operand" "=rk, r, l, r, m, r, *t, r, *t, Up, r") - (match_operand:HI 1 "general_operand" - "rk, I, Py, n, r, m, r, *t, *t, r, Up"))] + (match_operand:MVE_7_HI 1 "general_operand" + "rk, IDB, Py, n, r, m, r, *t, *t, r, Up"))] "TARGET_THUMB2 && (TARGET_VFP_FP16INST || TARGET_HAVE_MVE) - && (register_operand (operands[0], HImode) - || register_operand (operands[1], HImode))" + && (register_operand (operands[0], <MODE>mode) + || register_operand (operands[1], <MODE>mode))" { switch (which_alternative) { case 0: - case 1: case 2: return "mov%?\t%0, %1\t%@ movhi"; + case 1: + if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_VECTOR_BOOL) + operands[1] = mve_bool_vec_to_const (operands[1]); + else + operands[1] = gen_lowpart (HImode, operands[1]); + return "mov%?\t%0, %1\t%@ movhi"; case 3: return "movw%?\t%0, %L1\t%@ movhi"; case 4: diff --git a/gcc/config/cris/constraints.md b/gcc/config/cris/constraints.md index 01ec12c..83fab62 100644 --- a/gcc/config/cris/constraints.md +++ b/gcc/config/cris/constraints.md @@ -18,7 +18,12 @@ ;; <http://www.gnu.org/licenses/>. ;; Register constraints. -(define_register_constraint "b" "GENNONACR_REGS" + +;; Kept for compatibility. It used to exclude the CRIS v32 +;; register "ACR", which was like GENERAL_REGS except it +;; couldn't be used for autoincrement, and intended mainly +;; for use in user asm statements. +(define_register_constraint "b" "GENERAL_REGS" "@internal") (define_register_constraint "h" "MOF_REGS" diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc index a7807b3..f0017d6 100644 --- a/gcc/config/cris/cris.cc +++ b/gcc/config/cris/cris.cc @@ -1661,15 +1661,25 @@ cris_reload_address_legitimized (rtx x, a bug. */ static reg_class_t -cris_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) +cris_preferred_reload_class (rtx x, reg_class_t rclass) { - if (rclass != ACR_REGS - && rclass != MOF_REGS + if (rclass != MOF_REGS && rclass != MOF_SRP_REGS && rclass != SRP_REGS && rclass != CC0_REGS && rclass != SPECIAL_REGS) - return GENNONACR_REGS; + return GENERAL_REGS; + + /* We can't make use of something that's not a general register when + reloading an "eliminated" register (i.e. something that has turned into + e.g. sp + const_int). */ + if (GET_CODE (x) == PLUS && !reg_class_subset_p (rclass, GENERAL_REGS)) + return NO_REGS; + + /* Avoid putting constants into a special register, where the instruction is + shorter if loaded into a general register. */ + if (satisfies_constraint_P (x) && !reg_class_subset_p (rclass, GENERAL_REGS)) + return NO_REGS; return rclass; } @@ -1684,20 +1694,10 @@ cris_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, their move cost within that class is higher. How about 7? That's 3 for a move to a GENERAL_REGS register, 3 for the move from the GENERAL_REGS register, and 1 for the increased register pressure. - Also, it's higher than the memory move cost, as it should. - We also do this for ALL_REGS, since we don't want that class to be - preferred (even to memory) at all where GENERAL_REGS doesn't fit. - Whenever it's about to be used, it's for SPECIAL_REGS. If we don't - present a higher cost for ALL_REGS than memory, a SPECIAL_REGS may be - used when a GENERAL_REGS should be used, even if there are call-saved - GENERAL_REGS left to allocate. This is because the fall-back when - the most preferred register class isn't available, isn't the next - (or next good) wider register class, but the *most widest* register - class. FIXME: pre-IRA comment, perhaps obsolete now. */ - - if ((reg_classes_intersect_p (from, SPECIAL_REGS) - && reg_classes_intersect_p (to, SPECIAL_REGS)) - || from == ALL_REGS || to == ALL_REGS) + Also, it's higher than the memory move cost, as it should be. */ + + if (reg_classes_intersect_p (from, SPECIAL_REGS) + && reg_classes_intersect_p (to, SPECIAL_REGS)) return 7; /* Make moves to/from SPECIAL_REGS slightly more expensive, as we diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h index b274e11..6edfe13 100644 --- a/gcc/config/cris/cris.h +++ b/gcc/config/cris/cris.h @@ -153,7 +153,9 @@ extern int cris_cpu_version; #ifdef HAVE_AS_NO_MUL_BUG_ABORT_OPTION #define MAYBE_AS_NO_MUL_BUG_ABORT \ - "%{mno-mul-bug-workaround:-no-mul-bug-abort} " + "%{mno-mul-bug-workaround:-no-mul-bug-abort} " \ + "%{mmul-bug-workaround:-mul-bug-abort} " \ + "%{!mmul-bug-workaround:%{!mno-mul-bug-workaround:" MUL_BUG_ASM_DEFAULT "}} " #else #define MAYBE_AS_NO_MUL_BUG_ABORT #endif @@ -255,15 +257,26 @@ extern int cris_cpu_version; (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \ + MASK_CONST_ALIGN + MASK_DATA_ALIGN \ + MASK_ALIGN_BY_32 \ - + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG) + + MASK_PROLOGUE_EPILOGUE) # else /* 0 */ # define TARGET_DEFAULT \ (MASK_SIDE_EFFECT_PREFIXES + MASK_STACK_ALIGN \ + MASK_CONST_ALIGN + MASK_DATA_ALIGN \ - + MASK_PROLOGUE_EPILOGUE + MASK_MUL_BUG) + + MASK_PROLOGUE_EPILOGUE) # endif #endif +/* Don't depend on the assembler default setting for the errata machinery; + always pass the option to turn it on or off explicitly. But, we have to + decide on which is the *GCC* default, and for that we should only need to + consider what's in TARGET_DEFAULT; no other changes should be necessary. */ + +#if (TARGET_DEFAULT & MASK_MUL_BUG) +#define MUL_BUG_ASM_DEFAULT "-mul-bug-abort" +#else +#define MUL_BUG_ASM_DEFAULT "-no-mul-bug-abort" +#endif + /* Local, providing a default for cris_cpu_version. */ #define CRIS_DEFAULT_CPU_VERSION TARGET_CPU_DEFAULT @@ -423,19 +436,15 @@ extern int cris_cpu_version; /* Node: Register Classes */ -/* We need a separate register class to handle register allocation for - ACR, since it can't be used for post-increment. - - It's not obvious, but having subunions of all movable-between +/* It's not obvious, but having subunions of all movable-between register classes does really help register allocation (pre-IRA comment). */ enum reg_class { NO_REGS, - ACR_REGS, MOF_REGS, SRP_REGS, CC0_REGS, + MOF_REGS, SRP_REGS, CC0_REGS, MOF_SRP_REGS, SPECIAL_REGS, - SPEC_ACR_REGS, GENNONACR_REGS, - SPEC_GENNONACR_REGS, GENERAL_REGS, + GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES }; @@ -444,9 +453,8 @@ enum reg_class #define REG_CLASS_NAMES \ {"NO_REGS", \ - "ACR_REGS", "MOF_REGS", "SRP_REGS", "CC0_REGS", \ + "MOF_REGS", "SRP_REGS", "CC0_REGS", \ "MOF_SRP_REGS", "SPECIAL_REGS", \ - "SPEC_ACR_REGS", "GENNONACR_REGS", "SPEC_GENNONACR_REGS", \ "GENERAL_REGS", "ALL_REGS"} #define CRIS_SPECIAL_REGS_CONTENTS \ @@ -459,37 +467,25 @@ enum reg_class #define REG_CLASS_CONTENTS \ { \ {0}, \ - {1 << CRIS_ACR_REGNUM}, \ {1 << CRIS_MOF_REGNUM}, \ {1 << CRIS_SRP_REGNUM}, \ {1 << CRIS_CC0_REGNUM}, \ {(1 << CRIS_MOF_REGNUM) \ | (1 << CRIS_SRP_REGNUM)}, \ {CRIS_SPECIAL_REGS_CONTENTS}, \ - {CRIS_SPECIAL_REGS_CONTENTS \ - | (1 << CRIS_ACR_REGNUM)}, \ - {(0xffff | CRIS_FAKED_REGS_CONTENTS) \ - & ~(1 << CRIS_ACR_REGNUM)}, \ - {(0xffff | CRIS_FAKED_REGS_CONTENTS \ - | CRIS_SPECIAL_REGS_CONTENTS) \ - & ~(1 << CRIS_ACR_REGNUM)}, \ {0xffff | CRIS_FAKED_REGS_CONTENTS}, \ {0xffff | CRIS_FAKED_REGS_CONTENTS \ | CRIS_SPECIAL_REGS_CONTENTS} \ } #define REGNO_REG_CLASS(REGNO) \ - ((REGNO) == CRIS_ACR_REGNUM ? ACR_REGS : \ - (REGNO) == CRIS_MOF_REGNUM ? MOF_REGS : \ + ((REGNO) == CRIS_MOF_REGNUM ? MOF_REGS : \ (REGNO) == CRIS_SRP_REGNUM ? SRP_REGS : \ (REGNO) == CRIS_CC0_REGNUM ? CC0_REGS : \ GENERAL_REGS) #define BASE_REG_CLASS GENERAL_REGS -#define MODE_CODE_BASE_REG_CLASS(MODE, AS, OCODE, ICODE) \ - ((OCODE) != POST_INC ? BASE_REG_CLASS : GENNONACR_REGS) - #define INDEX_REG_CLASS GENERAL_REGS /* Since it uses reg_renumber, it is safe only once reg_renumber diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md index bc8d758..dd70941 100644 --- a/gcc/config/cris/cris.md +++ b/gcc/config/cris/cris.md @@ -60,7 +60,6 @@ [(CRIS_STATIC_CHAIN_REGNUM 7) (CRIS_REAL_FP_REGNUM 8) (CRIS_SP_REGNUM 14) - (CRIS_ACR_REGNUM 15) (CRIS_SRP_REGNUM 16) (CRIS_MOF_REGNUM 17) (CRIS_AP_REGNUM 18) @@ -154,9 +153,20 @@ (not (match_test "dead_or_set_regno_p (insn, CRIS_SRP_REGNUM)"))) (nil) (nil)]) +;; Enable choosing particular instructions. The discriminator choice +;; "v0" stands for "pre-v10", for brevity. +(define_attr "cpu_variant" "default,v0,v10" (const_string "default")) + (define_attr "enabled" "no,yes" (if_then_else - (eq_attr "cc_enabled" "normal") + (and + (eq_attr "cc_enabled" "normal") + (ior + (eq_attr "cpu_variant" "default") + (and (eq_attr "cpu_variant" "v10") + (match_test "TARGET_HAS_MUL_INSNS")) + (and (eq_attr "cpu_variant" "v0") + (not (match_test "TARGET_HAS_MUL_INSNS"))))) (const_string "yes") (const_string "no"))) @@ -579,13 +589,14 @@ (define_insn "*movsi_internal<setcc><setnz><setnzvc>" [(set (match_operand:SI 0 "nonimmediate_operand" - "=r,r, r,Q>,r,Q>,g,r,r,g,rQ>,x, m,x") + "=r,r, r,Q>,r,Q>,g,r,r,g,rQ>,x, m,x, Q>,r,g") (match_operand:SI 1 "general_operand" - "r,Q>,M,M, I,r, M,n,g,r,x, rQ>,x,gi")) + "r,Q>,M,M, I,r, M,n,g,r,x, rQ>,x,gi,r, g,r")) (clobber (reg:CC CRIS_CC0_REGNUM))] - ;; Note that we prefer not to use the S alternative (if for some reason - ;; it competes with others) above, but g matches S. - "" + ;; Avoid matching insns we know must be reloaded. Without one + ;; operand being a (pseudo-)register, reload chooses + ;; reload-registers suboptimally. + "REG_S_P (operands[0]) || REG_S_P (operands[1]) || operands[1] == const0_rtx" { /* Better to have c-switch here; it is worth it to optimize the size of move insns. The alternative would be to try to find more constraint @@ -597,6 +608,9 @@ case 5: case 8: case 9: + case 14: + case 15: + case 16: return "move.d %1,%0"; case 10: @@ -634,9 +648,10 @@ gcc_unreachable (); } } - [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,yes,yes,no,no") + [(set_attr "cpu_variant" "*,*,*,*,*,v0,*,*,v0,v0,*,*,*,*,v10,v10,v10") + (set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,yes,yes,no,no,yes,no,no") (set_attr "cc<cccc><ccnz><ccnzvc>" - "*,*,none,none,*,none,none,*,*,none,none,none,none,none")]) + "*,*,none,none,*,none,none,*,*,none,none,none,none,none,none,*,none")]) ;; FIXME: See movsi. diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc index 783fe3c..f065a13 100644 --- a/gcc/config/darwin.cc +++ b/gcc/config/darwin.cc @@ -3621,7 +3621,7 @@ tree darwin_fold_builtin (tree fndecl, int n_args, tree *argp, bool ARG_UNUSED (ignore)) { - unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); + int fcode = DECL_MD_FUNCTION_CODE (fndecl); if (fcode == darwin_builtin_cfstring) { diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 74819c6..402f025 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -4460,7 +4460,7 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec) pair of lanes, then on every pair of results from the previous iteration (thereby effectively reducing every 4 lanes) and so on until all lanes are reduced. */ - rtx in, out = src; + rtx in, out = force_reg (mode, src); for (int i = 0, shift = 1; i < 6; i++, shift <<= 1) { rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift); diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h index bc10c82..29511fd 100644 --- a/gcc/config/i386/avx512fintrin.h +++ b/gcc/config/i386/avx512fintrin.h @@ -1639,16 +1639,27 @@ _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B, #endif +/* Constant helper to represent the ternary logic operations among + vector A, B and C. */ +typedef enum +{ + _MM_TERNLOG_A = 0xF0, + _MM_TERNLOG_B = 0xCC, + _MM_TERNLOG_C = 0xAA +} _MM_TERNLOG_ENUM; + #ifdef __OPTIMIZE__ extern __inline __m512i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __C, __imm, - (__mmask8) -1); + return (__m512i) + __builtin_ia32_pternlogq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m512i @@ -1656,10 +1667,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, - (__v8di) __B, - (__v8di) __C, __imm, - (__mmask8) __U); + return (__m512i) + __builtin_ia32_pternlogq512_mask ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m512i @@ -1667,10 +1680,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, - (__v8di) __B, - (__v8di) __C, - __imm, (__mmask8) __U); + return (__m512i) + __builtin_ia32_pternlogq512_maskz ((__v8di) __A, + (__v8di) __B, + (__v8di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m512i @@ -1678,10 +1693,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __C, - __imm, (__mmask16) -1); + return (__m512i) + __builtin_ia32_pternlogd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + (unsigned char) __imm, + (__mmask16) -1); } extern __inline __m512i @@ -1689,10 +1706,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, - (__v16si) __B, - (__v16si) __C, - __imm, (__mmask16) __U); + return (__m512i) + __builtin_ia32_pternlogd512_mask ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + (unsigned char) __imm, + (__mmask16) __U); } extern __inline __m512i @@ -1700,33 +1719,56 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, __m512i __C, const int __imm) { - return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, - (__v16si) __B, - (__v16si) __C, - __imm, (__mmask16) __U); + return (__m512i) + __builtin_ia32_pternlogd512_maskz ((__v16si) __A, + (__v16si) __B, + (__v16si) __C, + (unsigned char) __imm, + (__mmask16) __U); } #else -#define _mm512_ternarylogic_epi64(A, B, C, I) \ - ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) -#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ - ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) -#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ - ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ - (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) -#define _mm512_ternarylogic_epi32(A, B, C, I) \ - ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ - (__mmask16)-1)) -#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ - ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ - (__mmask16)(U))) -#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ - ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ - (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ - (__mmask16)(U))) +#define _mm512_ternarylogic_epi64(A, B, C, I) \ + ((__m512i) \ + __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \ + (__v8di) (__m512i) (B), \ + (__v8di) (__m512i) (C), \ + (unsigned char) (I), \ + (__mmask8) -1)) +#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ + ((__m512i) \ + __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \ + (__v8di) (__m512i) (B), \ + (__v8di) (__m512i) (C), \ + (unsigned char)(I), \ + (__mmask8) (U))) +#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ + ((__m512i) \ + __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), \ + (__v8di) (__m512i) (B), \ + (__v8di) (__m512i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) +#define _mm512_ternarylogic_epi32(A, B, C, I) \ + ((__m512i) \ + __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \ + (__v16si) (__m512i) (B), \ + (__v16si) (__m512i) (C), \ + (unsigned char) (I), \ + (__mmask16) -1)) +#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ + ((__m512i) \ + __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \ + (__v16si) (__m512i) (B), \ + (__v16si) (__m512i) (C), \ + (unsigned char) (I), \ + (__mmask16) (U))) +#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ + ((__m512i) \ + __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), \ + (__v16si) (__m512i) (B), \ + (__v16si) (__m512i) (C), \ + (unsigned char) (I), \ + (__mmask16) (U))) #endif extern __inline __m512d diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h index bbced24..26b286e 100644 --- a/gcc/config/i386/avx512vlintrin.h +++ b/gcc/config/i386/avx512vlintrin.h @@ -10575,10 +10575,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_ternarylogic_epi64 (__m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, __imm, - (__mmask8) -1); + return (__m256i) + __builtin_ia32_pternlogq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m256i @@ -10587,10 +10589,12 @@ _mm256_mask_ternarylogic_epi64 (__m256i __A, __mmask8 __U, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogq256_mask ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogq256_mask ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256i @@ -10599,11 +10603,12 @@ _mm256_maskz_ternarylogic_epi64 (__mmask8 __U, __m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di) __A, - (__v4di) __B, - (__v4di) __C, - __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogq256_maskz ((__v4di) __A, + (__v4di) __B, + (__v4di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256i @@ -10611,10 +10616,12 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm256_ternarylogic_epi32 (__m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, __imm, - (__mmask8) -1); + return (__m256i) + __builtin_ia32_pternlogd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m256i @@ -10623,10 +10630,12 @@ _mm256_mask_ternarylogic_epi32 (__m256i __A, __mmask8 __U, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogd256_mask ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogd256_mask ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256i @@ -10635,11 +10644,12 @@ _mm256_maskz_ternarylogic_epi32 (__mmask8 __U, __m256i __A, __m256i __B, __m256i __C, const int __imm) { - return (__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si) __A, - (__v8si) __B, - (__v8si) __C, - __imm, - (__mmask8) __U); + return (__m256i) + __builtin_ia32_pternlogd256_maskz ((__v8si) __A, + (__v8si) __B, + (__v8si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i @@ -10647,33 +10657,40 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ternarylogic_epi64 (__m128i __A, __m128i __B, __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, __imm, - (__mmask8) -1); + return (__m128i) + __builtin_ia32_pternlogq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_ternarylogic_epi64 (__m128i __A, __mmask8 __U, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogq128_mask ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogq128_mask ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_ternarylogic_epi64 (__mmask8 __U, __m128i __A, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di) __A, - (__v2di) __B, - (__v2di) __C, - __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogq128_maskz ((__v2di) __A, + (__v2di) __B, + (__v2di) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i @@ -10681,33 +10698,40 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_ternarylogic_epi32 (__m128i __A, __m128i __B, __m128i __C, const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, __imm, - (__mmask8) -1); + return (__m128i) + __builtin_ia32_pternlogd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, + (unsigned char) __imm, + (__mmask8) -1); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_mask_ternarylogic_epi32 (__m128i __A, __mmask8 __U, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_mask ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogd128_mask ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m128i __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskz_ternarylogic_epi32 (__mmask8 __U, __m128i __A, - __m128i __B, __m128i __C, const int __imm) + __m128i __B, __m128i __C, + const int __imm) { - return (__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si) __A, - (__v4si) __B, - (__v4si) __C, - __imm, - (__mmask8) __U); + return (__m128i) + __builtin_ia32_pternlogd128_maskz ((__v4si) __A, + (__v4si) __B, + (__v4si) __C, + (unsigned char) __imm, + (__mmask8) __U); } extern __inline __m256 @@ -12910,53 +12934,101 @@ _mm256_permutex_pd (__m256d __X, const int __M) (__v2di)(__m128i)_mm_setzero_si128 (),\ (__mmask8)(U))) -#define _mm256_ternarylogic_epi64(A, B, C, I) \ - ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)-1)) - -#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \ - ((__m256i) __builtin_ia32_pternlogq256_mask ((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U))) - -#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \ - ((__m256i) __builtin_ia32_pternlogq256_maskz ((__v4di)(__m256i)(A), \ - (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), (int)(I), (__mmask8)(U))) - -#define _mm256_ternarylogic_epi32(A, B, C, I) \ - ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)-1)) - -#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \ - ((__m256i) __builtin_ia32_pternlogd256_mask ((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U))) - -#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \ - ((__m256i) __builtin_ia32_pternlogd256_maskz ((__v8si)(__m256i)(A), \ - (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), (int)(I), (__mmask8)(U))) - -#define _mm_ternarylogic_epi64(A, B, C, I) \ - ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)-1)) - -#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \ - ((__m128i) __builtin_ia32_pternlogq128_mask ((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U))) - -#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \ - ((__m128i) __builtin_ia32_pternlogq128_maskz ((__v2di)(__m128i)(A), \ - (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), (int)(I), (__mmask8)(U))) - -#define _mm_ternarylogic_epi32(A, B, C, I) \ - ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)-1)) - -#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \ - ((__m128i) __builtin_ia32_pternlogd128_mask ((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U))) - -#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \ - ((__m128i) __builtin_ia32_pternlogd128_maskz ((__v4si)(__m128i)(A), \ - (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), (int)(I), (__mmask8)(U))) +#define _mm256_ternarylogic_epi64(A, B, C, I) \ + ((__m256i) \ + __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), \ + (__v4di) (__m256i) (B), \ + (__v4di) (__m256i) (C), \ + (unsigned char) (I), \ + (__mmask8) -1)) + +#define _mm256_mask_ternarylogic_epi64(A, U, B, C, I) \ + ((__m256i) \ + __builtin_ia32_pternlogq256_mask ((__v4di) (__m256i) (A), \ + (__v4di) (__m256i) (B), \ + (__v4di) (__m256i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) + +#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, I) \ + ((__m256i) \ + __builtin_ia32_pternlogq256_maskz ((__v4di) (__m256i) (A), \ + (__v4di) (__m256i) (B), \ + (__v4di) (__m256i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) + +#define _mm256_ternarylogic_epi32(A, B, C, I) \ + ((__m256i) \ + __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), \ + (__v8si) (__m256i) (B), \ + (__v8si) (__m256i) (C), \ + (unsigned char) (I), \ + (__mmask8) -1)) + +#define _mm256_mask_ternarylogic_epi32(A, U, B, C, I) \ + ((__m256i) \ + __builtin_ia32_pternlogd256_mask ((__v8si) (__m256i) (A), \ + (__v8si) (__m256i) (B), \ + (__v8si) (__m256i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) + +#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, I) \ + ((__m256i) \ + __builtin_ia32_pternlogd256_maskz ((__v8si) (__m256i) (A), \ + (__v8si) (__m256i) (B), \ + (__v8si) (__m256i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) + +#define _mm_ternarylogic_epi64(A, B, C, I) \ + ((__m128i) \ + __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), \ + (__v2di) (__m128i) (B), \ + (__v2di) (__m128i) (C), \ + (unsigned char) (I), \ + (__mmask8) -1)) + +#define _mm_mask_ternarylogic_epi64(A, U, B, C, I) \ + ((__m128i) \ + __builtin_ia32_pternlogq128_mask ((__v2di) (__m128i) (A), \ + (__v2di) (__m128i) (B), \ + (__v2di) (__m128i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) + +#define _mm_maskz_ternarylogic_epi64(U, A, B, C, I) \ + ((__m128i) \ + __builtin_ia32_pternlogq128_maskz ((__v2di) (__m128i) (A), \ + (__v2di) (__m128i) (B), \ + (__v2di) (__m128i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) + +#define _mm_ternarylogic_epi32(A, B, C, I) \ + ((__m128i) \ + __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), \ + (__v4si) (__m128i) (B), \ + (__v4si) (__m128i) (C), \ + (unsigned char) (I), \ + (__mmask8) -1)) + +#define _mm_mask_ternarylogic_epi32(A, U, B, C, I) \ + ((__m128i) \ + __builtin_ia32_pternlogd128_mask ((__v4si) (__m128i) (A), \ + (__v4si) (__m128i) (B), \ + (__v4si) (__m128i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) + +#define _mm_maskz_ternarylogic_epi32(U, A, B, C, I) \ + ((__m128i) \ + __builtin_ia32_pternlogd128_maskz ((__v4si) (__m128i) (A), \ + (__v4si) (__m128i) (B), \ + (__v4si) (__m128i) (C), \ + (unsigned char) (I), \ + (__mmask8) (U))) #define _mm256_roundscale_ps(A, B) \ ((__m256) __builtin_ia32_rndscaleps_256_mask ((__v8sf)(__m256)(A), \ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index ed61130..8b3dc2b 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -86,7 +86,6 @@ #define bit_AVX2 (1 << 5) #define bit_BMI2 (1 << 8) #define bit_RTM (1 << 11) -#define bit_MPX (1 << 14) #define bit_AVX512F (1 << 16) #define bit_AVX512DQ (1 << 17) #define bit_RDSEED (1 << 18) @@ -136,10 +135,6 @@ #define bit_AMX_TILE (1 << 24) #define bit_AMX_INT8 (1 << 25) -/* XFEATURE_ENABLED_MASK register bits (%eax == 0xd, %ecx == 0) */ -#define bit_BNDREGS (1 << 3) -#define bit_BNDCSR (1 << 4) - /* Extended State Enumeration Sub-leaf (%eax == 0xd, %ecx == 1) */ #define bit_XSAVEOPT (1 << 0) #define bit_XSAVEC (1 << 1) diff --git a/gcc/config/i386/gnu-property.cc b/gcc/config/i386/gnu-property.cc index f08984f..ea63c1e 100644 --- a/gcc/config/i386/gnu-property.cc +++ b/gcc/config/i386/gnu-property.cc @@ -23,6 +23,7 @@ along with GCC; see the file COPYING3. If not see #include "tm.h" #include "output.h" #include "linux-common.h" +#include "i386-protos.h" static void emit_gnu_property (unsigned int type, unsigned int data) @@ -60,7 +61,9 @@ file_end_indicate_exec_stack_and_gnu_property (void) { file_end_indicate_exec_stack (); - if (flag_cf_protection == CF_NONE && !ix86_needed) + if (flag_cf_protection == CF_NONE + && !ix86_needed + && !ix86_has_no_direct_extern_access) return; unsigned int feature_1 = 0; @@ -121,4 +124,9 @@ file_end_indicate_exec_stack_and_gnu_property (void) /* Generate GNU_PROPERTY_X86_ISA_1_NEEDED. */ if (isa_1) emit_gnu_property (0xc0008002, isa_1); + + if (ix86_has_no_direct_extern_access) + /* Emite a GNU_PROPERTY_1_NEEDED note with + GNU_PROPERTY_1_NEEDED_INDIRECT_EXTERN_ACCESS. */ + emit_gnu_property (0xb0008000, (1U << 0)); } diff --git a/gcc/config/i386/host-cygwin.cc b/gcc/config/i386/host-cygwin.cc index fcf6333..05ad3a8 100644 --- a/gcc/config/i386/host-cygwin.cc +++ b/gcc/config/i386/host-cygwin.cc @@ -51,18 +51,18 @@ static void * cygwin_gt_pch_get_address (size_t sz, int fd) { void *base; - off_t p = lseek(fd, 0, SEEK_CUR); + off_t p = lseek (fd, 0, SEEK_CUR); if (p == (off_t) -1) - fatal_error (input_location, "can%'t get position in PCH file: %m"); + fatal_error (input_location, "cannot get position in PCH file: %m"); /* Cygwin requires that the underlying file be at least as large as the requested mapping. */ if ((size_t) p < sz) - { - if ( ftruncate (fd, sz) == -1 ) - fatal_error (input_location, "can%'t extend PCH file: %m"); - } + { + if (ftruncate (fd, sz) == -1) + fatal_error (input_location, "cannot extend PCH file: %m"); + } base = mmap (NULL, sz, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0); @@ -71,8 +71,8 @@ cygwin_gt_pch_get_address (size_t sz, int fd) else munmap (base, sz); - if (lseek (fd, p, SEEK_SET) == (off_t) -1 ) - fatal_error (input_location, "can%'t set position in PCH file: %m"); + if (lseek (fd, p, SEEK_SET) == (off_t) -1) + fatal_error (input_location, "cannot set position in PCH file: %m"); return base; } diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index eb1930b..530f83f 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -1407,6 +1407,9 @@ ix86_split_idivmod (machine_mode mode, rtx operands[], rtx scratch, tmp0, tmp1, tmp2; rtx (*gen_divmod4_1) (rtx, rtx, rtx, rtx); + operands[2] = force_reg (mode, operands[2]); + operands[3] = force_reg (mode, operands[3]); + switch (mode) { case E_SImode: @@ -2150,7 +2153,7 @@ void ix86_expand_copysign (rtx operands[]) { machine_mode mode, vmode; - rtx dest, op0, op1, mask, op2, op3; + rtx dest, vdest, op0, op1, mask, op2, op3; mode = GET_MODE (operands[0]); @@ -2171,8 +2174,13 @@ ix86_expand_copysign (rtx operands[]) return; } - dest = lowpart_subreg (vmode, operands[0], mode); - op1 = lowpart_subreg (vmode, operands[2], mode); + dest = operands[0]; + vdest = lowpart_subreg (vmode, dest, mode); + if (vdest == NULL_RTX) + vdest = gen_reg_rtx (vmode); + else + dest = NULL_RTX; + op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode); mask = ix86_build_signbit_mask (vmode, 0, 0); if (CONST_DOUBLE_P (operands[1])) @@ -2181,7 +2189,9 @@ ix86_expand_copysign (rtx operands[]) /* Optimize for 0, simplify b = copy_signf (0.0f, a) to b = mask & a. */ if (op0 == CONST0_RTX (mode)) { - emit_move_insn (dest, gen_rtx_AND (vmode, mask, op1)); + emit_move_insn (vdest, gen_rtx_AND (vmode, mask, op1)); + if (dest) + emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode)); return; } @@ -2190,7 +2200,7 @@ ix86_expand_copysign (rtx operands[]) op0 = force_reg (vmode, op0); } else - op0 = lowpart_subreg (vmode, operands[1], mode); + op0 = lowpart_subreg (vmode, force_reg (mode, operands[1]), mode); op2 = gen_reg_rtx (vmode); op3 = gen_reg_rtx (vmode); @@ -2198,7 +2208,9 @@ ix86_expand_copysign (rtx operands[]) gen_rtx_NOT (vmode, mask), op0)); emit_move_insn (op3, gen_rtx_AND (vmode, mask, op1)); - emit_move_insn (dest, gen_rtx_IOR (vmode, op2, op3)); + emit_move_insn (vdest, gen_rtx_IOR (vmode, op2, op3)); + if (dest) + emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode)); } /* Expand an xorsign operation. */ @@ -2207,7 +2219,7 @@ void ix86_expand_xorsign (rtx operands[]) { machine_mode mode, vmode; - rtx dest, op0, op1, mask, x, temp; + rtx dest, vdest, op0, op1, mask, x, temp; dest = operands[0]; op0 = operands[1]; @@ -2227,15 +2239,22 @@ ix86_expand_xorsign (rtx operands[]) temp = gen_reg_rtx (vmode); mask = ix86_build_signbit_mask (vmode, 0, 0); - op1 = lowpart_subreg (vmode, op1, mode); + op1 = lowpart_subreg (vmode, force_reg (mode, op1), mode); x = gen_rtx_AND (vmode, op1, mask); emit_insn (gen_rtx_SET (temp, x)); - op0 = lowpart_subreg (vmode, op0, mode); + op0 = lowpart_subreg (vmode, force_reg (mode, op0), mode); x = gen_rtx_XOR (vmode, temp, op0); - dest = lowpart_subreg (vmode, dest, mode); - emit_insn (gen_rtx_SET (dest, x)); + vdest = lowpart_subreg (vmode, dest, mode); + if (vdest == NULL_RTX) + vdest = gen_reg_rtx (vmode); + else + dest = NULL_RTX; + emit_insn (gen_rtx_SET (vdest, x)); + + if (dest) + emit_move_insn (dest, lowpart_subreg (mode, vdest, vmode)); } static rtx ix86_expand_compare (enum rtx_code code, rtx op0, rtx op1); @@ -14880,7 +14899,12 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode, dperm.one_operand_p = true; if (mode == V8HFmode) - tmp1 = lowpart_subreg (V8HFmode, force_reg (HFmode, val), HFmode); + { + tmp1 = force_reg (HFmode, val); + tmp2 = gen_reg_rtx (mode); + emit_insn (gen_vec_setv8hf_0 (tmp2, CONST0_RTX (mode), tmp1)); + tmp1 = gen_lowpart (mode, tmp2); + } else { /* Extend to SImode using a paradoxical SUBREG. */ @@ -23200,16 +23224,14 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val, enum rtx_code code, bool after, bool doubleword) { - rtx old_reg, new_reg, old_mem, success, oldval, new_mem; - rtx_code_label *loop_label, *pause_label, *done_label; + rtx old_reg, new_reg, old_mem, success; machine_mode mode = GET_MODE (target); + rtx_code_label *loop_label = NULL; old_reg = gen_reg_rtx (mode); new_reg = old_reg; - loop_label = gen_label_rtx (); - pause_label = gen_label_rtx (); - done_label = gen_label_rtx (); old_mem = copy_to_reg (mem); + loop_label = gen_label_rtx (); emit_label (loop_label); emit_move_insn (old_reg, old_mem); @@ -23231,50 +23253,128 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val, if (after) emit_move_insn (target, new_reg); - /* Load memory again inside loop. */ - new_mem = copy_to_reg (mem); - /* Compare mem value with expected value. */ + success = NULL_RTX; + ix86_expand_cmpxchg_loop (&success, old_mem, mem, old_reg, new_reg, + gen_int_mode (MEMMODEL_SYNC_SEQ_CST, + SImode), + doubleword, loop_label); +} + +/* Relax cmpxchg instruction, param loop_label indicates whether + the instruction should be relaxed with a pause loop. If not, + it will be relaxed to an atomic load + compare, and skip + cmpxchg instruction if mem != exp_input. */ + +void ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val, + rtx mem, rtx exp_input, rtx new_input, + rtx mem_model, bool doubleword, + rtx_code_label *loop_label) +{ + rtx_code_label *cmp_label = NULL; + rtx_code_label *done_label = NULL; + rtx target_bool = NULL_RTX, new_mem = NULL_RTX; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx) = NULL; + rtx (*gendw) (rtx, rtx, rtx, rtx, rtx, rtx) = NULL; + machine_mode mode = GET_MODE (target_val), hmode = mode; + + if (*ptarget_bool == NULL) + target_bool = gen_reg_rtx (QImode); + else + target_bool = *ptarget_bool; + + cmp_label = gen_label_rtx (); + done_label = gen_label_rtx (); + + new_mem = gen_reg_rtx (mode); + /* Load memory first. */ + expand_atomic_load (new_mem, mem, MEMMODEL_SEQ_CST); + + switch (mode) + { + case E_TImode: + gendw = gen_atomic_compare_and_swapti_doubleword; + hmode = DImode; + break; + case E_DImode: + if (doubleword) + { + gendw = gen_atomic_compare_and_swapdi_doubleword; + hmode = SImode; + } + else + gen = gen_atomic_compare_and_swapdi_1; + break; + case E_SImode: + gen = gen_atomic_compare_and_swapsi_1; + break; + case E_HImode: + gen = gen_atomic_compare_and_swaphi_1; + break; + case E_QImode: + gen = gen_atomic_compare_and_swapqi_1; + break; + default: + gcc_unreachable (); + } + + /* Compare mem value with expected value. */ if (doubleword) { - machine_mode half_mode = (mode == DImode)? SImode : DImode; - rtx low_new_mem = gen_lowpart (half_mode, new_mem); - rtx low_old_mem = gen_lowpart (half_mode, old_mem); - rtx high_new_mem = gen_highpart (half_mode, new_mem); - rtx high_old_mem = gen_highpart (half_mode, old_mem); - emit_cmp_and_jump_insns (low_new_mem, low_old_mem, NE, NULL_RTX, - half_mode, 1, pause_label, + rtx low_new_mem = gen_lowpart (hmode, new_mem); + rtx low_exp_input = gen_lowpart (hmode, exp_input); + rtx high_new_mem = gen_highpart (hmode, new_mem); + rtx high_exp_input = gen_highpart (hmode, exp_input); + emit_cmp_and_jump_insns (low_new_mem, low_exp_input, NE, NULL_RTX, + hmode, 1, cmp_label, profile_probability::guessed_never ()); - emit_cmp_and_jump_insns (high_new_mem, high_old_mem, NE, NULL_RTX, - half_mode, 1, pause_label, + emit_cmp_and_jump_insns (high_new_mem, high_exp_input, NE, NULL_RTX, + hmode, 1, cmp_label, profile_probability::guessed_never ()); } else - emit_cmp_and_jump_insns (new_mem, old_mem, NE, NULL_RTX, - GET_MODE (old_mem), 1, pause_label, + emit_cmp_and_jump_insns (new_mem, exp_input, NE, NULL_RTX, + GET_MODE (exp_input), 1, cmp_label, profile_probability::guessed_never ()); - success = NULL_RTX; - oldval = old_mem; - expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg, - new_reg, false, MEMMODEL_SYNC_SEQ_CST, - MEMMODEL_RELAXED); - if (oldval != old_mem) - emit_move_insn (old_mem, oldval); - - emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx, - GET_MODE (success), 1, loop_label, - profile_probability::guessed_never ()); - - emit_jump_insn (gen_jump (done_label)); - emit_barrier (); - - /* If mem is not expected, pause and loop back. */ - emit_label (pause_label); - emit_insn (gen_pause ()); - emit_jump_insn (gen_jump (loop_label)); - emit_barrier (); - emit_label (done_label); + /* Directly emits cmpxchg here. */ + if (doubleword) + emit_insn (gendw (target_val, mem, exp_input, + gen_lowpart (hmode, new_input), + gen_highpart (hmode, new_input), + mem_model)); + else + emit_insn (gen (target_val, mem, exp_input, new_input, mem_model)); + + if (!loop_label) + { + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + emit_label (cmp_label); + emit_move_insn (target_val, new_mem); + emit_label (done_label); + ix86_expand_setcc (target_bool, EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + } + else + { + ix86_expand_setcc (target_bool, EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + emit_cmp_and_jump_insns (target_bool, const0_rtx, EQ, const0_rtx, + GET_MODE (target_bool), 1, loop_label, + profile_probability::guessed_never ()); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + + /* If mem is not expected, pause and loop back. */ + emit_label (cmp_label); + emit_insn (gen_pause ()); + emit_jump_insn (gen_jump (loop_label)); + emit_barrier (); + emit_label (done_label); + } + + *ptarget_bool = target_bool; } #include "gt-i386-expand.h" diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 715d9a1..8055393 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1201,7 +1201,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], if (opt == N_OPTS) { error_at (loc, "attribute %qs argument %qs is unknown", - orig_p, attr_name); + attr_name, orig_p); ret = false; } @@ -3775,6 +3775,36 @@ ix86_handle_fentry_name (tree *node, tree name, tree args, return NULL_TREE; } +/* Handle a "nodirect_extern_access" attribute; arguments as in + struct attribute_spec.handler. */ + +static tree +handle_nodirect_extern_access_attribute (tree *pnode, tree name, + tree ARG_UNUSED (args), + int ARG_UNUSED (flags), + bool *no_add_attrs) +{ + tree node = *pnode; + + if (VAR_OR_FUNCTION_DECL_P (node)) + { + if ((!TREE_STATIC (node) && TREE_CODE (node) != FUNCTION_DECL + && !DECL_EXTERNAL (node)) || !TREE_PUBLIC (node)) + { + warning (OPT_Wattributes, + "%qE attribute have effect only on public objects", name); + *no_add_attrs = true; + } + } + else + { + warning (OPT_Wattributes, "%qE attribute ignored", name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + /* Table of valid machine attributes. */ const struct attribute_spec ix86_attribute_table[] = { @@ -3855,6 +3885,8 @@ const struct attribute_spec ix86_attribute_table[] = ix86_handle_fentry_name, NULL }, { "cf_check", 0, 0, true, false, false, false, ix86_handle_fndecl_attribute, NULL }, + { "nodirect_extern_access", 0, 0, true, false, false, false, + handle_nodirect_extern_access_attribute, NULL }, /* End element. */ { NULL, 0, 0, false, false, false, false, NULL, NULL } diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 6b3c951..d5e1125 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -79,7 +79,7 @@ extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool); extern bool constant_address_p (rtx); extern bool legitimate_pic_operand_p (rtx); extern bool legitimate_pic_address_disp_p (rtx); -extern bool ix86_force_load_from_GOT_p (rtx); +extern bool ix86_force_load_from_GOT_p (rtx, bool = false); extern void print_reg (rtx, int, FILE*); extern void ix86_print_operand (FILE *, rtx, int); @@ -221,6 +221,8 @@ extern void ix86_split_mmx_punpck (rtx[], bool); extern void ix86_expand_avx_vzeroupper (void); extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, rtx, enum rtx_code, bool, bool); +extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, + bool, rtx_code_label *); #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); @@ -401,3 +403,5 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area (gcc::context *); extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); + +extern bool ix86_has_no_direct_extern_access; diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index ad5a5ca..9521990 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -363,6 +363,9 @@ unsigned int ix86_default_incoming_stack_boundary; /* Alignment for incoming stack boundary in bits. */ unsigned int ix86_incoming_stack_boundary; +/* True if there is no direct access to extern symbols. */ +bool ix86_has_no_direct_extern_access; + /* Calling abi specific va_list type nodes. */ tree sysv_va_list_type_node; tree ms_va_list_type_node; @@ -1873,10 +1876,14 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, { machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); - /* There are no XFmode vector modes. */ + /* There are no XFmode vector modes ... */ if (innermode == XFmode) return mode; + /* ... and no decimal float vector modes. */ + if (DECIMAL_FLOAT_MODE_P (innermode)) + return mode; + if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) mode = MIN_MODE_VECTOR_FLOAT; else @@ -7400,7 +7407,8 @@ find_drap_reg (void) register in such case. */ if (DECL_STATIC_CHAIN (decl) || cfun->machine->no_caller_saved_registers - || crtl->tail_call_emit) + || crtl->tail_call_emit + || crtl->calls_eh_return) return DI_REG; /* Reuse static chain register if it isn't used for parameter @@ -10513,13 +10521,17 @@ darwin_local_data_pic (rtx disp) } /* True if the function symbol operand X should be loaded from GOT. + If CALL_P is true, X is a call operand. + + NB: -mno-direct-extern-access doesn't force load from GOT for + call. NB: In 32-bit mode, only non-PIC is allowed in inline assembly statements, since a PIC register could not be available at the call site. */ bool -ix86_force_load_from_GOT_p (rtx x) +ix86_force_load_from_GOT_p (rtx x, bool call_p) { return ((TARGET_64BIT || (!flag_pic && HAVE_AS_IX86_GOT32X)) && !TARGET_PECOFF && !TARGET_MACHO @@ -10527,11 +10539,16 @@ ix86_force_load_from_GOT_p (rtx x) && ix86_cmodel != CM_LARGE && ix86_cmodel != CM_LARGE_PIC && GET_CODE (x) == SYMBOL_REF - && SYMBOL_REF_FUNCTION_P (x) - && (!flag_plt - || (SYMBOL_REF_DECL (x) - && lookup_attribute ("noplt", - DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))) + && ((!call_p + && (!ix86_direct_extern_access + || (SYMBOL_REF_DECL (x) + && lookup_attribute ("nodirect_extern_access", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (x)))))) + || (SYMBOL_REF_FUNCTION_P (x) + && (!flag_plt + || (SYMBOL_REF_DECL (x) + && lookup_attribute ("noplt", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (x))))))) && !SYMBOL_REF_LOCAL_P (x)); } @@ -10798,7 +10815,11 @@ legitimate_pic_address_disp_p (rtx disp) } else if (!SYMBOL_REF_FAR_ADDR_P (op0) && (SYMBOL_REF_LOCAL_P (op0) - || (HAVE_LD_PIE_COPYRELOC + || ((ix86_direct_extern_access + && !(SYMBOL_REF_DECL (op0) + && lookup_attribute ("nodirect_extern_access", + DECL_ATTRIBUTES (SYMBOL_REF_DECL (op0))))) + && HAVE_LD_PIE_COPYRELOC && flag_pie && !SYMBOL_REF_WEAK (op0) && !SYMBOL_REF_FUNCTION_P (op0))) @@ -13754,7 +13775,7 @@ ix86_print_operand (FILE *file, rtx x, int code) if (code == 'P') { - if (ix86_force_load_from_GOT_p (x)) + if (ix86_force_load_from_GOT_p (x, true)) { /* For inline assembly statement, load function address from GOT with 'P' operand modifier to avoid PLT. */ @@ -14356,19 +14377,22 @@ ix86_check_avx_upper_register (const_rtx exp) static void ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) - { - if (ix86_check_avx_upper_register (dest)) +{ + if (ix86_check_avx_upper_register (dest)) { bool *used = (bool *) data; *used = true; } - } +} /* Return needed mode for entity in optimize_mode_switching pass. */ static int ix86_avx_u128_mode_needed (rtx_insn *insn) { + if (DEBUG_INSN_P (insn)) + return AVX_U128_ANY; + if (CALL_P (insn)) { rtx link; @@ -14408,6 +14432,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) return AVX_U128_CLEAN; } + subrtx_iterator::array_type array; + rtx set = single_set (insn); if (set) { @@ -14422,74 +14448,11 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) else return AVX_U128_ANY; } - else if (ix86_check_avx_upper_register (src)) + else { - /* This is an YMM/ZMM store. Check for the source operand - of SRC DEFs in the same basic block before INSN. */ - basic_block bb = BLOCK_FOR_INSN (insn); - rtx_insn *end = BB_END (bb); - - /* Return AVX_U128_DIRTY if there is no DEF in the same basic - block. */ - int status = AVX_U128_DIRTY; - - for (df_ref def = DF_REG_DEF_CHAIN (REGNO (src)); - def; def = DF_REF_NEXT_REG (def)) - if (DF_REF_BB (def) == bb) - { - /* Ignore DEF from different basic blocks. */ - rtx_insn *def_insn = DF_REF_INSN (def); - - /* Check if DEF_INSN is before INSN. */ - rtx_insn *next; - for (next = NEXT_INSN (def_insn); - next != nullptr && next != end && next != insn; - next = NEXT_INSN (next)) - ; - - /* Skip if DEF_INSN isn't before INSN. */ - if (next != insn) - continue; - - /* Return AVX_U128_DIRTY if the source operand of - DEF_INSN isn't constant zero. */ - - if (CALL_P (def_insn)) - { - bool avx_upper_reg_found = false; - note_stores (def_insn, ix86_check_avx_upper_stores, - &avx_upper_reg_found); - - /* Return AVX_U128_DIRTY if call returns AVX. */ - if (avx_upper_reg_found) - return AVX_U128_DIRTY; - - continue; - } - - set = single_set (def_insn); - if (!set) - return AVX_U128_DIRTY; - - dest = SET_DEST (set); - - /* Skip if DEF_INSN is not an AVX load. */ - if (ix86_check_avx_upper_register (dest)) - { - src = SET_SRC (set); - /* Return AVX_U128_DIRTY if the source operand isn't - constant zero. */ - if (standard_sse_constant_p (src, GET_MODE (dest)) - != 1) - return AVX_U128_DIRTY; - } - - /* We get here only if all AVX loads are from constant - zero. */ - status = AVX_U128_ANY; - } - - return status; + FOR_EACH_SUBRTX (iter, array, src, NONCONST) + if (ix86_check_avx_upper_register (*iter)) + return AVX_U128_DIRTY; } /* This isn't YMM/ZMM load/store. */ @@ -14500,7 +14463,6 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) Hardware changes state only when a 256bit register is written to, but we need to prevent the compiler from moving optimal insertion point above eventual read from 256bit or 512 bit register. */ - subrtx_iterator::array_type array; FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) if (ix86_check_avx_upper_register (*iter)) return AVX_U128_DIRTY; @@ -18641,6 +18603,8 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) do_shift: gcc_assert (n_args >= 2); + if (!gimple_call_lhs (stmt)) + break; arg0 = gimple_call_arg (stmt, 0); arg1 = gimple_call_arg (stmt, 1); elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); @@ -22520,10 +22484,10 @@ int asm_preferred_eh_data_format (int code, int global) { /* PE-COFF is effectively always -fPIC because of the .reloc section. */ - if (flag_pic || TARGET_PECOFF) + if (flag_pic || TARGET_PECOFF || !ix86_direct_extern_access) { int type = DW_EH_PE_sdata8; - if (!TARGET_64BIT + if (ptr_mode == SImode || ix86_cmodel == CM_SMALL_PIC || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) type = DW_EH_PE_sdata4; @@ -23018,8 +22982,8 @@ class ix86_vector_costs : public vector_costs using vector_costs::vector_costs; unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, - int misalign, + stmt_vec_info stmt_info, slp_tree node, + tree vectype, int misalign, vect_cost_model_location where) override; }; @@ -23033,8 +22997,9 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar) unsigned ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, - int misalign, vect_cost_model_location where) + stmt_vec_info stmt_info, slp_tree node, + tree vectype, int misalign, + vect_cost_model_location where) { unsigned retval = 0; bool scalar_p @@ -23195,6 +23160,49 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); } + else if (kind == vec_construct + && node + && SLP_TREE_DEF_TYPE (node) == vect_external_def + && INTEGRAL_TYPE_P (TREE_TYPE (vectype))) + { + stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); + unsigned i; + tree op; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) + if (TREE_CODE (op) == SSA_NAME) + TREE_VISITED (op) = 0; + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) + { + if (TREE_CODE (op) != SSA_NAME + || TREE_VISITED (op)) + continue; + TREE_VISITED (op) = 1; + gimple *def = SSA_NAME_DEF_STMT (op); + tree tem; + if (is_gimple_assign (def) + && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)) + && ((tem = gimple_assign_rhs1 (def)), true) + && TREE_CODE (tem) == SSA_NAME + /* A sign-change expands to nothing. */ + && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)), + TREE_TYPE (tem))) + def = SSA_NAME_DEF_STMT (tem); + /* When the component is loaded from memory we can directly + move it to a vector register, otherwise we have to go + via a GPR or via vpinsr which involves similar cost. + Likewise with a BIT_FIELD_REF extracting from a vector + register we can hope to avoid using a GPR. */ + if (!is_gimple_assign (def) + || (!gimple_assign_load_p (def) + && (gimple_assign_rhs_code (def) != BIT_FIELD_REF + || !VECTOR_TYPE_P (TREE_TYPE + (TREE_OPERAND (gimple_assign_rhs1 (def), 0)))))) + stmt_cost += ix86_cost->sse_to_integer; + } + FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) + if (TREE_CODE (op) == SSA_NAME) + TREE_VISITED (op) = 0; + } if (stmt_cost == -1) stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign); @@ -23613,10 +23621,28 @@ ix86_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) static bool ix86_binds_local_p (const_tree exp) { - return default_binds_local_p_3 (exp, flag_shlib != 0, true, true, - (!flag_pic - || (TARGET_64BIT - && HAVE_LD_PIE_COPYRELOC != 0))); + bool direct_extern_access + = (ix86_direct_extern_access + && !(VAR_OR_FUNCTION_DECL_P (exp) + && lookup_attribute ("nodirect_extern_access", + DECL_ATTRIBUTES (exp)))); + if (!direct_extern_access) + ix86_has_no_direct_extern_access = true; + return default_binds_local_p_3 (exp, flag_shlib != 0, true, + direct_extern_access, + (direct_extern_access + && (!flag_pic + || (TARGET_64BIT + && HAVE_LD_PIE_COPYRELOC != 0)))); +} + +/* If flag_pic or ix86_direct_extern_access is false, then neither + local nor global relocs should be placed in readonly memory. */ + +static int +ix86_reloc_rw_mask (void) +{ + return (flag_pic || !ix86_direct_extern_access) ? 3 : 0; } #endif @@ -23760,24 +23786,7 @@ ix86_optab_supported_p (int op, machine_mode mode1, machine_mode, rtx ix86_gen_scratch_sse_rtx (machine_mode mode) { - if (TARGET_SSE && !lra_in_progress) - { - unsigned int regno; - if (TARGET_64BIT) - { - /* In 64-bit mode, use XMM31 to avoid vzeroupper and always - use XMM31 for CSE. */ - if (ix86_hard_regno_mode_ok (LAST_EXT_REX_SSE_REG, mode)) - regno = LAST_EXT_REX_SSE_REG; - else - regno = LAST_REX_SSE_REG; - } - else - regno = LAST_SSE_REG; - return gen_rtx_REG (mode, regno); - } - else - return gen_reg_rtx (mode); + return gen_reg_rtx (mode); } /* Address space support. @@ -24681,6 +24690,11 @@ ix86_libgcc_floating_mode_supported_p #undef TARGET_IFUNC_REF_LOCAL_OK #define TARGET_IFUNC_REF_LOCAL_OK hook_bool_void_true +#if !TARGET_MACHO && !TARGET_DLLIMPORT_DECL_ATTRIBUTES +# undef TARGET_ASM_RELOC_RW_MASK +# define TARGET_ASM_RELOC_RW_MASK ix86_reloc_rw_mask +#endif + static bool ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED) { #ifdef OPTION_GLIBC diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f41e090..b37d4a9 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2414,6 +2414,7 @@ enum ix86_stack_slot SLOT_CW_FLOOR, SLOT_CW_CEIL, SLOT_STV_TEMP, + SLOT_FLOATxFDI_387, MAX_386_STACK_LOCALS }; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 74da0d4..d15170e 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -1079,11 +1079,11 @@ (HI "TARGET_HIMODE_MATH") SI]) -;; Math-dependant integer modes with DImode (enabled for 32bit with STV). -(define_mode_iterator SWIM1248s +;; Math-dependant integer modes with DImode. +(define_mode_iterator SWIM1248x [(QI "TARGET_QIMODE_MATH") (HI "TARGET_HIMODE_MATH") - SI (DI "TARGET_64BIT || (TARGET_STV && TARGET_SSE2)")]) + SI DI]) ;; Math-dependant single word integer modes without QImode. (define_mode_iterator SWIM248 [(HI "TARGET_HIMODE_MATH") @@ -4838,8 +4838,8 @@ movddup is available. */ if (REG_P (operands[1])) { - if (!TARGET_SSE3 - && REGNO (operands[0]) != REGNO (operands[1])) + if ((!TARGET_SSE3 && REGNO (operands[0]) != REGNO (operands[1])) + || (EXT_REX_SSE_REG_P (operands[1]) && !TARGET_AVX512VL)) { rtx tmp = lowpart_subreg (DFmode, operands[0], SFmode); emit_move_insn (tmp, operands[1]); @@ -5412,9 +5412,8 @@ && can_create_pseudo_p ()" [(const_int 0)] { - emit_insn (gen_floatdi<mode>2_i387_with_xmm - (operands[0], operands[1], - assign_386_stack_local (DImode, SLOT_TEMP))); + rtx s = assign_386_stack_local (DImode, SLOT_FLOATxFDI_387); + emit_insn (gen_floatdi<mode>2_i387_with_xmm (operands[0], operands[1], s)); DONE; }) @@ -9694,9 +9693,9 @@ ;; it should be done with splitters. (define_expand "and<mode>3" - [(set (match_operand:SWIM1248s 0 "nonimmediate_operand") - (and:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand") - (match_operand:SWIM1248s 2 "<general_szext_operand>")))] + [(set (match_operand:SWIM1248x 0 "nonimmediate_operand") + (and:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand") + (match_operand:SWIM1248x 2 "<general_szext_operand>")))] "" { machine_mode mode = <MODE>mode; @@ -9734,7 +9733,7 @@ (match_operand:DI 1 "nonimmediate_operand") (match_operand:DI 2 "x86_64_szext_general_operand"))) (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_STV && TARGET_SSE2 + "!TARGET_64BIT && ix86_binary_operator_ok (AND, DImode, operands) && ix86_pre_reload_split ()" "#" @@ -10338,9 +10337,9 @@ ;; If this is considered useful, it should be done with splitters. (define_expand "<code><mode>3" - [(set (match_operand:SWIM1248s 0 "nonimmediate_operand") - (any_or:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand") - (match_operand:SWIM1248s 2 "<general_operand>")))] + [(set (match_operand:SWIM1248x 0 "nonimmediate_operand") + (any_or:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand") + (match_operand:SWIM1248x 2 "<general_operand>")))] "" "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;") @@ -10350,7 +10349,7 @@ (match_operand:DI 1 "nonimmediate_operand") (match_operand:DI 2 "x86_64_szext_general_operand"))) (clobber (reg:CC FLAGS_REG))] - "!TARGET_64BIT && TARGET_STV && TARGET_SSE2 + "!TARGET_64BIT && ix86_binary_operator_ok (<CODE>, DImode, operands) && ix86_pre_reload_split ()" "#" @@ -11012,6 +11011,19 @@ [(set_attr "type" "negnot") (set_attr "mode" "<MODE>")]) +;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384) +(define_peephole2 + [(set (match_operand:SWI 0 "general_reg_operand") + (match_operand:SWI 1 "general_reg_operand")) + (parallel [(set (match_dup 0) (neg:SWI (match_dup 0))) + (clobber (reg:CC FLAGS_REG))]) + (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))] + "" + [(set (match_dup 0) (match_dup 1)) + (parallel [(set (reg:CCZ FLAGS_REG) + (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0))) + (set (match_dup 0) (neg:SWI (match_dup 0)))])]) + ;; Special expand pattern to handle integer mode abs (define_expand "abs<mode>2" @@ -11415,15 +11427,15 @@ ;; One complement instructions (define_expand "one_cmpl<mode>2" - [(set (match_operand:SWIM1248s 0 "nonimmediate_operand") - (not:SWIM1248s (match_operand:SWIM1248s 1 "nonimmediate_operand")))] + [(set (match_operand:SWIM1248x 0 "nonimmediate_operand") + (not:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")))] "" "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;") (define_insn_and_split "*one_cmpldi2_doubleword" [(set (match_operand:DI 0 "nonimmediate_operand") (not:DI (match_operand:DI 1 "nonimmediate_operand")))] - "!TARGET_64BIT && TARGET_STV && TARGET_SSE2 + "!TARGET_64BIT && ix86_unary_operator_ok (NOT, DImode, operands) && ix86_pre_reload_split ()" "#" diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index eb829d1..d8e8656 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1206,3 +1206,7 @@ Support MWAIT and MONITOR built-in functions and code generation. mavx512fp16 Target Mask(ISA2_AVX512FP16) Var(ix86_isa_flags2) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F and AVX512FP16 built-in functions and code generation. + +mdirect-extern-access +Target Var(ix86_direct_extern_access) Init(1) +Do not use GOT to access external symbols. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d8cb7b6..3066ea3 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -9223,7 +9223,7 @@ (define_expand "vec_unpacks_float_hi_v8si" [(set (match_dup 2) (vec_select:V4SI - (match_operand:V8SI 1 "vector_operand") + (match_operand:V8SI 1 "register_operand") (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)]))) (set (match_operand:V4DF 0 "register_operand") @@ -17025,8 +17025,8 @@ "@ p<logic>\t{%2, %0|%0, %2} vp<logic>\t{%2, %1, %0|%0, %1, %2} - vp<logic>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,avx,avx") + vp<logic>d\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "isa" "noavx,avx,avx512vl") (set_attr "prefix" "orig,vex,evex") (set_attr "prefix_data16" "1,*,*") (set_attr "type" "sselog") @@ -24153,8 +24153,9 @@ negate = true; } par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); + tmp = lowpart_subreg (QImode, operands[2], SImode); for (i = 0; i < 16; i++) - XVECEXP (par, 0, i) = operands[2]; + XVECEXP (par, 0, i) = tmp; tmp = gen_reg_rtx (V16QImode); emit_insn (gen_vec_initv16qiqi (tmp, par)); diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 36417c5..820e9ca 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -373,11 +373,20 @@ (match_operand:SI 7 "const_int_operand")] ;; failure model "TARGET_CMPXCHG" { - emit_insn - (gen_atomic_compare_and_swap<mode>_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); - ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx); + if (TARGET_RELAX_CMPXCHG_LOOP) + { + ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2], + operands[3], operands[4], operands[6], + false, NULL); + } + else + { + emit_insn + (gen_atomic_compare_and_swap<mode>_1 + (operands[1], operands[2], operands[3], operands[4], operands[6])); + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + } DONE; }) @@ -397,25 +406,35 @@ (match_operand:SI 7 "const_int_operand")] ;; failure model "TARGET_CMPXCHG" { - if (<MODE>mode == DImode && TARGET_64BIT) - { - emit_insn - (gen_atomic_compare_and_swapdi_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); - } + int doubleword = !(<MODE>mode == DImode && TARGET_64BIT); + if (TARGET_RELAX_CMPXCHG_LOOP) + { + ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2], + operands[3], operands[4], operands[6], + doubleword, NULL); + } else - { - machine_mode hmode = <CASHMODE>mode; - - emit_insn - (gen_atomic_compare_and_swap<mode>_doubleword - (operands[1], operands[2], operands[3], - gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), - operands[6])); - } - - ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx); + { + if (!doubleword) + { + emit_insn + (gen_atomic_compare_and_swapdi_1 + (operands[1], operands[2], operands[3], operands[4], operands[6])); + } + else + { + machine_mode hmode = <CASHMODE>mode; + + emit_insn + (gen_atomic_compare_and_swap<mode>_doubleword + (operands[1], operands[2], operands[3], + gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), + operands[6])); + } + + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + } DONE; }) diff --git a/gcc/config/nvptx/gen-copyright.sh b/gcc/config/nvptx/gen-copyright.sh new file mode 100644 index 0000000..79f4899 --- /dev/null +++ b/gcc/config/nvptx/gen-copyright.sh @@ -0,0 +1,82 @@ +#!/bin/sh + +# Copyright (C) 2022 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +style="$1" +case $style in + opt) + ;; + c) + first=true + ;; + *) + echo "Unknown style: \"$style\"" + exit 1 + ;; +esac + +( cat <<EOF +Copyright (C) 2022 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. +EOF +) | while read line; do + case $style in + opt) + if [ "$line" = "" ]; then + echo ";" + else + echo "; $line" + fi + ;; + c) + if $first; then + echo "/* $line" + first=false + else + if [ "$line" = "" ]; then + echo + else + echo " $line" + fi + fi + ;; + esac +done + + +case $style in + c) + echo "*/" + ;; +esac diff --git a/gcc/config/nvptx/gen-h.sh b/gcc/config/nvptx/gen-h.sh new file mode 100644 index 0000000..605f874 --- /dev/null +++ b/gcc/config/nvptx/gen-h.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +# Copyright (C) 2022 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +nvptx_sm_def="$1/nvptx-sm.def" +gen_copyright_sh="$1/gen-copyright.sh" + +sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//') + +cat <<EOF +/* -*- buffer-read-only: t -*- + Generated automatically by gen-h.sh from nvptx-sm.def. +*/ +EOF + +# Separator. +echo + +. $gen_copyright_sh c + +# Separator. +echo + +for sm in $sms; do + cat <<EOF +#define TARGET_SM$sm (ptx_isa_option >= PTX_ISA_SM$sm) +EOF +done diff --git a/gcc/config/nvptx/gen-omp-device-properties.sh b/gcc/config/nvptx/gen-omp-device-properties.sh new file mode 100644 index 0000000..175092c --- /dev/null +++ b/gcc/config/nvptx/gen-omp-device-properties.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +# Copyright (C) 2022 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +nvptx_sm_def="$1/nvptx-sm.def" + +sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//') + +echo kind: gpu +echo arch: nvptx + +isa="" +for sm in $sms; do + isa="$isa sm_$sm" +done + +echo isa: $isa diff --git a/gcc/config/nvptx/gen-opt.sh b/gcc/config/nvptx/gen-opt.sh new file mode 100644 index 0000000..5248ed2 --- /dev/null +++ b/gcc/config/nvptx/gen-opt.sh @@ -0,0 +1,66 @@ +#!/bin/sh + +# Copyright (C) 2022 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +nvptx_sm_def="$1/nvptx-sm.def" +gen_copyright_sh="$1/gen-copyright.sh" + +sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//') + +last= +for sm in $sms; do + last="$sm" +done + +cat <<EOF +; -*- buffer-read-only: t -*- +; Generated automatically by gen-opt.sh from nvptx-sm.def. +EOF + +# Separator. +echo + +. $gen_copyright_sh opt + +# Separator. +echo + +cat <<EOF +Enum +Name(ptx_isa) Type(int) +Known PTX ISA versions (for use with the -misa= option): +EOF + +# Separator. +echo + +for sm in $sms; do + cat <<EOF +EnumValue +Enum(ptx_isa) String(sm_$sm) Value(PTX_ISA_SM$sm) +EOF + + if [ "$sm" == "$last" ]; then + # Don't end with trailing empty line. + continue + fi + + # Separator. + echo +done diff --git a/gcc/config/nvptx/nvptx-c.cc b/gcc/config/nvptx/nvptx-c.cc index d68b991..02f7562 100644 --- a/gcc/config/nvptx/nvptx-c.cc +++ b/gcc/config/nvptx/nvptx-c.cc @@ -39,15 +39,15 @@ nvptx_cpu_cpp_builtins (void) cpp_define (parse_in, "__nvptx_softstack__"); if (TARGET_UNIFORM_SIMT) cpp_define (parse_in,"__nvptx_unisimt__"); - if (TARGET_SM80) - cpp_define (parse_in, "__PTX_SM__=800"); - else if (TARGET_SM75) - cpp_define (parse_in, "__PTX_SM__=750"); - else if (TARGET_SM53) - cpp_define (parse_in, "__PTX_SM__=530"); - else if (TARGET_SM35) - cpp_define (parse_in, "__PTX_SM__=350"); - else - cpp_define (parse_in,"__PTX_SM__=300"); + + const char *ptx_sm = NULL; +#define NVPTX_SM(XX, SEP) \ + { \ + if (TARGET_SM ## XX) \ + ptx_sm = "__PTX_SM__=" #XX "0"; \ + } +#include "nvptx-sm.def" +#undef NVPTX_SM + cpp_define (parse_in, ptx_sm); } diff --git a/gcc/config/nvptx/nvptx-gen.h b/gcc/config/nvptx/nvptx-gen.h new file mode 100644 index 0000000..1d6f0db --- /dev/null +++ b/gcc/config/nvptx/nvptx-gen.h @@ -0,0 +1,29 @@ +/* -*- buffer-read-only: t -*- + Generated automatically by gen-h.sh from nvptx-sm.def. +*/ + +/* Copyright (C) 2022 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it under + the terms of the GNU General Public License as published by the Free + Software Foundation; either version 3, or (at your option) any later + version. + + GCC is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. +*/ + +#define TARGET_SM30 (ptx_isa_option >= PTX_ISA_SM30) +#define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35) +#define TARGET_SM53 (ptx_isa_option >= PTX_ISA_SM53) +#define TARGET_SM70 (ptx_isa_option >= PTX_ISA_SM70) +#define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75) +#define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80) diff --git a/gcc/config/nvptx/nvptx-gen.opt b/gcc/config/nvptx/nvptx-gen.opt new file mode 100644 index 0000000..b6d433e --- /dev/null +++ b/gcc/config/nvptx/nvptx-gen.opt @@ -0,0 +1,42 @@ +; -*- buffer-read-only: t -*- +; Generated automatically by gen-opt.sh from nvptx-sm.def. + +; Copyright (C) 2022 Free Software Foundation, Inc. +; +; This file is part of GCC. +; +; GCC is free software; you can redistribute it and/or modify it under +; the terms of the GNU General Public License as published by the Free +; Software Foundation; either version 3, or (at your option) any later +; version. +; +; GCC is distributed in the hope that it will be useful, but WITHOUT ANY +; WARRANTY; without even the implied warranty of MERCHANTABILITY or +; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +; for more details. +; +; You should have received a copy of the GNU General Public License +; along with GCC; see the file COPYING3. If not see +; <http://www.gnu.org/licenses/>. + +Enum +Name(ptx_isa) Type(int) +Known PTX ISA versions (for use with the -misa= option): + +EnumValue +Enum(ptx_isa) String(sm_30) Value(PTX_ISA_SM30) + +EnumValue +Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35) + +EnumValue +Enum(ptx_isa) String(sm_53) Value(PTX_ISA_SM53) + +EnumValue +Enum(ptx_isa) String(sm_70) Value(PTX_ISA_SM70) + +EnumValue +Enum(ptx_isa) String(sm_75) Value(PTX_ISA_SM75) + +EnumValue +Enum(ptx_isa) String(sm_80) Value(PTX_ISA_SM80) diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h index daae72f..86b433c 100644 --- a/gcc/config/nvptx/nvptx-opts.h +++ b/gcc/config/nvptx/nvptx-opts.h @@ -22,16 +22,20 @@ enum ptx_isa { - PTX_ISA_SM30, - PTX_ISA_SM35, - PTX_ISA_SM53, - PTX_ISA_SM75, - PTX_ISA_SM80 +#define NVPTX_SM(XX, SEP) PTX_ISA_SM ## XX SEP +#define NVPTX_SM_SEP , +#include "nvptx-sm.def" +#undef NVPTX_SM_SEP +#undef NVPTX_SM }; enum ptx_version { + PTX_VERSION_default, + PTX_VERSION_3_0, PTX_VERSION_3_1, + PTX_VERSION_4_2, + PTX_VERSION_6_0, PTX_VERSION_6_3, PTX_VERSION_7_0 }; diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index 3d6ad14..0bf9af4 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -59,5 +59,7 @@ extern const char *nvptx_output_simt_enter (rtx, rtx, rtx); extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); +extern bool nvptx_mem_local_p (rtx); +extern bool nvptx_mem_maybe_shared_p (const_rtx); #endif #endif diff --git a/gcc/config/nvptx/nvptx-sm.def b/gcc/config/nvptx/nvptx-sm.def new file mode 100644 index 0000000..c552eb0 --- /dev/null +++ b/gcc/config/nvptx/nvptx-sm.def @@ -0,0 +1,30 @@ +/* Copyright (C) 2022 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#ifndef NVPTX_SM_SEP +#define NVPTX_SM_SEP +#endif + +NVPTX_SM (30, NVPTX_SM_SEP) +NVPTX_SM (35, NVPTX_SM_SEP) +NVPTX_SM (53, NVPTX_SM_SEP) +NVPTX_SM (70, NVPTX_SM_SEP) +NVPTX_SM (75, NVPTX_SM_SEP) +NVPTX_SM (80,) + +#undef NVPTX_SM_SEP diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index db6a405..6ca99a6 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -76,6 +76,7 @@ #include "intl.h" #include "opts.h" #include "tree-pretty-print.h" +#include "rtl-iter.h" /* This file should be included last. */ #include "target-def.h" @@ -205,6 +206,105 @@ diagnose_openacc_conflict (bool optval, const char *optname) error ("option %s is not supported together with %<-fopenacc%>", optname); } +static enum ptx_version +first_ptx_version_supporting_sm (enum ptx_isa sm) +{ + switch (sm) + { + case PTX_ISA_SM30: + return PTX_VERSION_3_0; + case PTX_ISA_SM35: + return PTX_VERSION_3_1; + case PTX_ISA_SM53: + return PTX_VERSION_4_2; + case PTX_ISA_SM70: + return PTX_VERSION_6_0; + case PTX_ISA_SM75: + return PTX_VERSION_6_3; + case PTX_ISA_SM80: + return PTX_VERSION_7_0; + default: + gcc_unreachable (); + } +} + +static enum ptx_version +default_ptx_version_option (void) +{ + enum ptx_version first + = first_ptx_version_supporting_sm ((enum ptx_isa) ptx_isa_option); + + /* Pick a version that supports the sm. */ + enum ptx_version res = first; + + /* Pick at least 3.1. This has been the smallest version historically. */ + res = MAX (res, PTX_VERSION_3_1); + + /* Pick at least 6.0, to enable using bar.warp.sync to have a way to force + warp convergence. */ + res = MAX (res, PTX_VERSION_6_0); + + /* Verify that we pick a version that supports the sm. */ + gcc_assert (first <= res); + return res; +} + +static const char * +ptx_version_to_string (enum ptx_version v) +{ + switch (v) + { + case PTX_VERSION_3_0: + return "3.0"; + case PTX_VERSION_3_1: + return "3.1"; + case PTX_VERSION_4_2: + return "4.2"; + case PTX_VERSION_6_0: + return "6.0"; + case PTX_VERSION_6_3: + return "6.3"; + case PTX_VERSION_7_0: + return "7.0"; + default: + gcc_unreachable (); + } +} + +static const char * +sm_version_to_string (enum ptx_isa sm) +{ + switch (sm) + { +#define NVPTX_SM(XX, SEP) \ + case PTX_ISA_SM ## XX: \ + return #XX; +#include "nvptx-sm.def" +#undef NVPTX_SM + default: + gcc_unreachable (); + } +} + +static void +handle_ptx_version_option (void) +{ + if (!OPTION_SET_P (ptx_version_option) + || ptx_version_option == PTX_VERSION_default) + { + ptx_version_option = default_ptx_version_option (); + return; + } + + enum ptx_version first + = first_ptx_version_supporting_sm ((enum ptx_isa) ptx_isa_option); + + if (ptx_version_option < first) + error ("PTX version (-mptx) needs to be at least %s to support selected" + " -misa (sm_%s)", ptx_version_to_string (first), + sm_version_to_string ((enum ptx_isa)ptx_isa_option)); +} + /* Implement TARGET_OPTION_OVERRIDE. */ static void @@ -212,6 +312,8 @@ nvptx_option_override (void) { init_machine_status = nvptx_init_machine_status; + handle_ptx_version_option (); + /* Set toplevel_reorder, unless explicitly disabled. We need reordering so that we emit necessary assembler decls of undeclared variables. */ @@ -938,10 +1040,13 @@ write_fn_proto_1 (std::stringstream &s, bool is_defn, if (DECL_STATIC_CHAIN (decl)) argno = write_arg_type (s, -1, argno, ptr_type_node, true); - if (!argno && strcmp (name, "main") == 0) + if (argno < 2 && strcmp (name, "main") == 0) { - argno = write_arg_type (s, -1, argno, integer_type_node, true); - argno = write_arg_type (s, -1, argno, ptr_type_node, true); + if (argno == 0) + argno = write_arg_type (s, -1, argno, integer_type_node, true); + + if (argno == 1) + argno = write_arg_type (s, -1, argno, ptr_type_node, true); } if (argno) @@ -1836,6 +1941,23 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind) switch (GET_MODE (dst)) { + case E_DCmode: + case E_CDImode: + { + gcc_assert (GET_CODE (dst) == CONCAT); + gcc_assert (GET_CODE (src) == CONCAT); + rtx dst_real = XEXP (dst, 0); + rtx dst_imag = XEXP (dst, 1); + rtx src_real = XEXP (src, 0); + rtx src_imag = XEXP (src, 1); + + start_sequence (); + emit_insn (nvptx_gen_shuffle (dst_real, src_real, idx, kind)); + emit_insn (nvptx_gen_shuffle (dst_imag, src_imag, idx, kind)); + res = get_insns (); + end_sequence (); + } + break; case E_SImode: res = gen_nvptx_shufflesi (dst, src, idx, GEN_INT (kind)); break; @@ -2679,6 +2801,27 @@ nvptx_print_operand_address (FILE *file, machine_mode mode, rtx addr) nvptx_print_address_operand (file, addr, mode); } +static nvptx_data_area +nvptx_mem_data_area (const_rtx x) +{ + gcc_assert (GET_CODE (x) == MEM); + + const_rtx addr = XEXP (x, 0); + subrtx_iterator::array_type array; + FOR_EACH_SUBRTX (iter, array, addr, ALL) + if (SYMBOL_REF_P (*iter)) + return SYMBOL_DATA_AREA (*iter); + + return DATA_AREA_GENERIC; +} + +bool +nvptx_mem_maybe_shared_p (const_rtx x) +{ + nvptx_data_area area = nvptx_mem_data_area (x); + return area == DATA_AREA_SHARED || area == DATA_AREA_GENERIC; +} + /* Print an operand, X, to FILE, with an optional modifier in CODE. Meaning of CODE: @@ -3118,12 +3261,18 @@ nvptx_call_insn_is_syscall_p (rtx_insn *insn) /* If SET subexpression of INSN sets a register, emit a shuffle instruction to propagate its value from lane MASTER to current lane. */ -static void +static bool nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master) { rtx reg; if (GET_CODE (set) == SET && REG_P (reg = SET_DEST (set))) - emit_insn_after (nvptx_gen_shuffle (reg, reg, master, SHUFFLE_IDX), insn); + { + emit_insn_after (nvptx_gen_shuffle (reg, reg, master, SHUFFLE_IDX), + insn); + return true; + } + + return false; } /* Adjust code for uniform-simt code generation variant by making atomics and @@ -3138,19 +3287,64 @@ nvptx_reorg_uniform_simt () for (insn = get_insns (); insn; insn = next) { next = NEXT_INSN (insn); - if (!(CALL_P (insn) && nvptx_call_insn_is_syscall_p (insn)) - && !(NONJUMP_INSN_P (insn) - && GET_CODE (PATTERN (insn)) == PARALLEL - && get_attr_atomic (insn))) + + /* Skip NOTE, USE, etc. */ + if (!INSN_P (insn) || recog_memoized (insn) == -1) continue; + + if (CALL_P (insn) && nvptx_call_insn_is_syscall_p (insn)) + { + /* Handle syscall. */ + } + else if (get_attr_atomic (insn)) + { + /* Handle atomic insn. */ + } + else + continue; + rtx pat = PATTERN (insn); rtx master = nvptx_get_unisimt_master (); - for (int i = 0; i < XVECLEN (pat, 0); i++) - nvptx_unisimt_handle_set (XVECEXP (pat, 0, i), insn, master); + bool shuffle_p = false; + switch (GET_CODE (pat)) + { + case PARALLEL: + for (int i = 0; i < XVECLEN (pat, 0); i++) + shuffle_p + |= nvptx_unisimt_handle_set (XVECEXP (pat, 0, i), insn, master); + break; + case SET: + shuffle_p |= nvptx_unisimt_handle_set (pat, insn, master); + break; + default: + gcc_unreachable (); + } + + if (shuffle_p && TARGET_PTX_6_0) + { + /* The shuffle is a sync, so uniformity is guaranteed. */ + } + else + { + if (TARGET_PTX_6_0) + { + gcc_assert (!shuffle_p); + /* Emit after the insn, to guarantee uniformity. */ + emit_insn_after (gen_nvptx_warpsync (), insn); + } + else + { + /* Emit after the insn (and before the shuffle, if there are any) + to check uniformity. */ + emit_insn_after (gen_nvptx_uniform_warp_check (), insn); + } + } + rtx pred = nvptx_get_unisimt_predicate (); pred = gen_rtx_NE (BImode, pred, const0_rtx); pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); - validate_change (insn, &PATTERN (insn), pat, false); + bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); + gcc_assert (changed_p); } } @@ -4598,6 +4792,7 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) rtx_insn *neuter_start = NULL; rtx_insn *worker_label = NULL, *vector_label = NULL; rtx_insn *worker_jump = NULL, *vector_jump = NULL; + rtx_insn *warp_sync = NULL; for (mode = GOMP_DIM_WORKER; mode <= GOMP_DIM_VECTOR; mode++) if (GOMP_DIM_MASK (mode) & skip_mask) { @@ -4630,11 +4825,29 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) if (tail_branch) { label_insn = emit_label_before (label, before); + if (mode == GOMP_DIM_VECTOR) + { + if (TARGET_PTX_6_0) + warp_sync = emit_insn_after (gen_nvptx_warpsync (), + label_insn); + else + warp_sync = emit_insn_after (gen_nvptx_uniform_warp_check (), + label_insn); + } before = label_insn; } else { label_insn = emit_label_after (label, tail); + if (mode == GOMP_DIM_VECTOR) + { + if (TARGET_PTX_6_0) + warp_sync = emit_insn_after (gen_nvptx_warpsync (), + label_insn); + else + warp_sync = emit_insn_after (gen_nvptx_uniform_warp_check (), + label_insn); + } if ((mode == GOMP_DIM_VECTOR || mode == GOMP_DIM_WORKER) && CALL_P (tail) && find_reg_note (tail, REG_NORETURN, NULL)) emit_insn_after (gen_exit (), label_insn); @@ -4702,6 +4915,8 @@ nvptx_single (unsigned mask, basic_block from, basic_block to) setp.ne.u32 %rcond,%rcondu32,0; */ rtx_insn *label = PREV_INSN (tail); + if (label == warp_sync) + label = PREV_INSN (label); gcc_assert (label && LABEL_P (label)); rtx tmp = gen_reg_rtx (BImode); emit_insn_before (gen_movbi (tmp, const0_rtx), @@ -5103,7 +5318,16 @@ prevent_branch_around_nothing (void) case CODE_FOR_nvptx_forked: case CODE_FOR_nvptx_joining: case CODE_FOR_nvptx_join: + case CODE_FOR_nop: continue; + case -1: + /* Handle asm ("") and similar. */ + if (GET_CODE (PATTERN (insn)) == ASM_INPUT + || GET_CODE (PATTERN (insn)) == ASM_OPERANDS + || (GET_CODE (PATTERN (insn)) == PARALLEL + && asm_noperands (PATTERN (insn)) >= 0)) + continue; + /* FALLTHROUGH. */ default: seen_label = NULL; continue; @@ -5161,6 +5385,232 @@ workaround_barsyncs (void) } #endif +static rtx +gen_comment (const char *s) +{ + const char *sep = " "; + size_t len = strlen (ASM_COMMENT_START) + strlen (sep) + strlen (s) + 1; + char *comment = (char *) alloca (len); + snprintf (comment, len, "%s%s%s", ASM_COMMENT_START, sep, s); + return gen_rtx_ASM_INPUT_loc (VOIDmode, ggc_strdup (comment), + DECL_SOURCE_LOCATION (cfun->decl)); +} + +/* Initialize all declared regs at function entry. + Advantage : Fool-proof. + Disadvantage: Potentially creates a lot of long live ranges and adds a lot + of insns. */ + +static void +workaround_uninit_method_1 (void) +{ + rtx_insn *first = get_insns (); + rtx_insn *insert_here = NULL; + + for (int ix = LAST_VIRTUAL_REGISTER + 1; ix < max_reg_num (); ix++) + { + rtx reg = regno_reg_rtx[ix]; + + /* Skip undeclared registers. */ + if (reg == const0_rtx) + continue; + + gcc_assert (CONST0_RTX (GET_MODE (reg))); + + start_sequence (); + if (nvptx_comment && first != NULL) + emit_insn (gen_comment ("Start: Added by -minit-regs=1")); + emit_move_insn (reg, CONST0_RTX (GET_MODE (reg))); + rtx_insn *inits = get_insns (); + end_sequence (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init)) + fprintf (dump_file, "Default init of reg %u inserted: insn %u\n", + ix, INSN_UID (init)); + + if (first != NULL) + { + insert_here = emit_insn_before (inits, first); + first = NULL; + } + else + insert_here = emit_insn_after (inits, insert_here); + } + + if (nvptx_comment && insert_here != NULL) + emit_insn_after (gen_comment ("End: Added by -minit-regs=1"), insert_here); +} + +/* Find uses of regs that are not defined on all incoming paths, and insert a + corresponding def at function entry. + Advantage : Simple. + Disadvantage: Potentially creates long live ranges. + May not catch all cases. F.i. a clobber cuts a live range in + the compiler and may prevent entry_lr_in from being set for a + reg, but the clobber does not translate to a ptx insn, so in + ptx there still may be an uninitialized ptx reg. See f.i. + gcc.c-torture/compile/20020926-1.c. */ + +static void +workaround_uninit_method_2 (void) +{ + auto_bitmap entry_pseudo_uninit; + { + auto_bitmap not_pseudo; + bitmap_set_range (not_pseudo, 0, LAST_VIRTUAL_REGISTER); + + bitmap entry_lr_in = DF_LR_IN (ENTRY_BLOCK_PTR_FOR_FN (cfun)); + bitmap_and_compl (entry_pseudo_uninit, entry_lr_in, not_pseudo); + } + + rtx_insn *first = get_insns (); + rtx_insn *insert_here = NULL; + + bitmap_iterator iterator; + unsigned ix; + EXECUTE_IF_SET_IN_BITMAP (entry_pseudo_uninit, 0, ix, iterator) + { + rtx reg = regno_reg_rtx[ix]; + gcc_assert (CONST0_RTX (GET_MODE (reg))); + + start_sequence (); + if (nvptx_comment && first != NULL) + emit_insn (gen_comment ("Start: Added by -minit-regs=2:")); + emit_move_insn (reg, CONST0_RTX (GET_MODE (reg))); + rtx_insn *inits = get_insns (); + end_sequence (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init)) + fprintf (dump_file, "Missing init of reg %u inserted: insn %u\n", + ix, INSN_UID (init)); + + if (first != NULL) + { + insert_here = emit_insn_before (inits, first); + first = NULL; + } + else + insert_here = emit_insn_after (inits, insert_here); + } + + if (nvptx_comment && insert_here != NULL) + emit_insn_after (gen_comment ("End: Added by -minit-regs=2"), insert_here); +} + +/* Find uses of regs that are not defined on all incoming paths, and insert a + corresponding def on those. + Advantage : Doesn't create long live ranges. + Disadvantage: More complex, and potentially also more defs. */ + +static void +workaround_uninit_method_3 (void) +{ + auto_bitmap not_pseudo; + bitmap_set_range (not_pseudo, 0, LAST_VIRTUAL_REGISTER); + + basic_block bb; + FOR_EACH_BB_FN (bb, cfun) + { + if (single_pred_p (bb)) + continue; + + auto_bitmap bb_pseudo_uninit; + bitmap_and_compl (bb_pseudo_uninit, DF_LIVE_IN (bb), DF_MIR_IN (bb)); + bitmap_and_compl_into (bb_pseudo_uninit, not_pseudo); + + bitmap_iterator iterator; + unsigned ix; + EXECUTE_IF_SET_IN_BITMAP (bb_pseudo_uninit, 0, ix, iterator) + { + bool have_false = false; + bool have_true = false; + + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (bitmap_bit_p (DF_LIVE_OUT (e->src), ix)) + have_true = true; + else + have_false = true; + } + if (have_false ^ have_true) + continue; + + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (bitmap_bit_p (DF_LIVE_OUT (e->src), ix)) + continue; + + rtx reg = regno_reg_rtx[ix]; + gcc_assert (CONST0_RTX (GET_MODE (reg))); + + start_sequence (); + emit_move_insn (reg, CONST0_RTX (GET_MODE (reg))); + rtx_insn *inits = get_insns (); + end_sequence (); + + if (dump_file && (dump_flags & TDF_DETAILS)) + for (rtx_insn *init = inits; init != NULL; + init = NEXT_INSN (init)) + fprintf (dump_file, + "Missing init of reg %u inserted on edge: %d -> %d:" + " insn %u\n", ix, e->src->index, e->dest->index, + INSN_UID (init)); + + insert_insn_on_edge (inits, e); + } + } + } + + if (nvptx_comment) + FOR_EACH_BB_FN (bb, cfun) + { + if (single_pred_p (bb)) + continue; + + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + if (e->insns.r == NULL_RTX) + continue; + start_sequence (); + emit_insn (gen_comment ("Start: Added by -minit-regs=3:")); + emit_insn (e->insns.r); + emit_insn (gen_comment ("End: Added by -minit-regs=3:")); + e->insns.r = get_insns (); + end_sequence (); + } + } + + commit_edge_insertions (); +} + +static void +workaround_uninit (void) +{ + switch (nvptx_init_regs) + { + case 0: + /* Skip. */ + break; + case 1: + workaround_uninit_method_1 (); + break; + case 2: + workaround_uninit_method_2 (); + break; + case 3: + workaround_uninit_method_3 (); + break; + default: + gcc_unreachable (); + } +} + /* PTX-specific reorganization - Split blocks at fork and join instructions - Compute live registers @@ -5190,6 +5640,8 @@ nvptx_reorg (void) df_set_flags (DF_NO_INSN_RESCAN | DF_NO_HARD_REGS); df_live_add_problem (); df_live_set_all_dirty (); + if (nvptx_init_regs == 3) + df_mir_add_problem (); df_analyze (); regstat_init_n_sets_and_refs (); @@ -5202,6 +5654,8 @@ nvptx_reorg (void) if (REG_N_SETS (i) == 0 && REG_N_REFS (i) == 0) regno_reg_rtx[i] = const0_rtx; + workaround_uninit (); + /* Determine launch dimensions of the function. If it is not an offloaded function (i.e. this is a regular compiler), the function has no neutering. */ @@ -5404,23 +5858,19 @@ static void nvptx_file_start (void) { fputs ("// BEGIN PREAMBLE\n", asm_out_file); - if (TARGET_PTX_7_0) - fputs ("\t.version\t7.0\n", asm_out_file); - else if (TARGET_PTX_6_3) - fputs ("\t.version\t6.3\n", asm_out_file); - else - fputs ("\t.version\t3.1\n", asm_out_file); - if (TARGET_SM80) - fputs ("\t.target\tsm_80\n", asm_out_file); - else if (TARGET_SM75) - fputs ("\t.target\tsm_75\n", asm_out_file); - else if (TARGET_SM53) - fputs ("\t.target\tsm_53\n", asm_out_file); - else if (TARGET_SM35) - fputs ("\t.target\tsm_35\n", asm_out_file); - else - fputs ("\t.target\tsm_30\n", asm_out_file); + + fputs ("\t.version\t", asm_out_file); + fputs (ptx_version_to_string ((enum ptx_version)ptx_version_option), + asm_out_file); + fputs ("\n", asm_out_file); + + fputs ("\t.target\tsm_", asm_out_file); + fputs (sm_version_to_string ((enum ptx_isa)ptx_isa_option), + asm_out_file); + fputs ("\n", asm_out_file); + fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode)); + fputs ("// END PREAMBLE\n", asm_out_file); } @@ -5622,6 +6072,8 @@ enum nvptx_builtins NVPTX_BUILTIN_VECTOR_ADDR, NVPTX_BUILTIN_CMP_SWAP, NVPTX_BUILTIN_CMP_SWAPLL, + NVPTX_BUILTIN_MEMBAR_GL, + NVPTX_BUILTIN_MEMBAR_CTA, NVPTX_BUILTIN_MAX }; @@ -5652,6 +6104,7 @@ nvptx_init_builtins (void) #define UINT unsigned_type_node #define LLUINT long_long_unsigned_type_node #define PTRVOID ptr_type_node +#define VOID void_type_node DEF (SHUFFLE, "shuffle", (UINT, UINT, UINT, UINT, NULL_TREE)); DEF (SHUFFLELL, "shufflell", (LLUINT, LLUINT, UINT, UINT, NULL_TREE)); @@ -5661,6 +6114,8 @@ nvptx_init_builtins (void) (PTRVOID, ST, UINT, UINT, NULL_TREE)); DEF (CMP_SWAP, "cmp_swap", (UINT, PTRVOID, UINT, UINT, NULL_TREE)); DEF (CMP_SWAPLL, "cmp_swapll", (LLUINT, PTRVOID, LLUINT, LLUINT, NULL_TREE)); + DEF (MEMBAR_GL, "membar_gl", (VOID, VOID, NULL_TREE)); + DEF (MEMBAR_CTA, "membar_cta", (VOID, VOID, NULL_TREE)); #undef DEF #undef ST @@ -5696,6 +6151,14 @@ nvptx_expand_builtin (tree exp, rtx target, rtx ARG_UNUSED (subtarget), case NVPTX_BUILTIN_CMP_SWAPLL: return nvptx_expand_cmp_swap (exp, target, mode, ignore); + case NVPTX_BUILTIN_MEMBAR_GL: + emit_insn (gen_nvptx_membar_gl ()); + return NULL_RTX; + + case NVPTX_BUILTIN_MEMBAR_CTA: + emit_insn (gen_nvptx_membar_cta ()); + return NULL_RTX; + default: gcc_unreachable (); } } @@ -5724,12 +6187,13 @@ nvptx_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait, case omp_device_arch: return strcmp (name, "nvptx") == 0; case omp_device_isa: - if (strcmp (name, "sm_30") == 0) - return !TARGET_SM35; - if (strcmp (name, "sm_35") == 0) - return TARGET_SM35 && !TARGET_SM53; - if (strcmp (name, "sm_53") == 0) - return TARGET_SM53; +#define NVPTX_SM(XX, SEP) \ + { \ + if (strcmp (name, "sm_" #XX) == 0) \ + return ptx_isa_option == PTX_ISA_SM ## XX; \ + } +#include "nvptx-sm.def" +#undef NVPTX_SM return 0; default: gcc_unreachable (); @@ -6243,7 +6707,7 @@ nvptx_lockless_update (location_t loc, gimple_stmt_iterator *gsi, static tree nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, - tree ptr, tree var, tree_code op) + tree ptr, tree var, tree_code op, int level) { tree var_type = TREE_TYPE (var); tree swap_fn = nvptx_builtin_decl (NVPTX_BUILTIN_CMP_SWAP, true); @@ -6295,8 +6759,17 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, lock_loop->any_estimate = true; add_loop (lock_loop, entry_bb->loop_father); - /* Build and insert the reduction calculation. */ + /* Build the pre-barrier. */ gimple_seq red_seq = NULL; + enum nvptx_builtins barrier_builtin + = (level == GOMP_DIM_GANG + ? NVPTX_BUILTIN_MEMBAR_GL + : NVPTX_BUILTIN_MEMBAR_CTA); + tree barrier_fn = nvptx_builtin_decl (barrier_builtin, true); + tree barrier_expr = build_call_expr_loc (loc, barrier_fn, 0); + gimplify_stmt (&barrier_expr, &red_seq); + + /* Build the reduction calculation. */ tree acc_in = make_ssa_name (var_type); tree ref_in = build_simple_mem_ref (ptr); TREE_THIS_VOLATILE (ref_in) = 1; @@ -6310,6 +6783,11 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, TREE_THIS_VOLATILE (ref_out) = 1; gimplify_assign (ref_out, acc_out, &red_seq); + /* Build the post-barrier. */ + barrier_expr = build_call_expr_loc (loc, barrier_fn, 0); + gimplify_stmt (&barrier_expr, &red_seq); + + /* Insert the reduction calculation. */ gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT); /* Build & insert the unlock sequence. */ @@ -6330,7 +6808,7 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi, static tree nvptx_reduction_update (location_t loc, gimple_stmt_iterator *gsi, - tree ptr, tree var, tree_code op) + tree ptr, tree var, tree_code op, int level) { tree type = TREE_TYPE (var); tree size = TYPE_SIZE (type); @@ -6339,7 +6817,7 @@ nvptx_reduction_update (location_t loc, gimple_stmt_iterator *gsi, || size == TYPE_SIZE (long_long_unsigned_type_node)) return nvptx_lockless_update (loc, gsi, ptr, var, op); else - return nvptx_lockfull_update (loc, gsi, ptr, var, op); + return nvptx_lockfull_update (loc, gsi, ptr, var, op, level); } /* NVPTX implementation of GOACC_REDUCTION_SETUP. */ @@ -6531,7 +7009,7 @@ nvptx_goacc_reduction_fini (gcall *call, offload_attrs *oa) gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT); seq = NULL; r = nvptx_reduction_update (gimple_location (call), &gsi, - accum, var, op); + accum, var, op, level); } } @@ -6845,6 +7323,28 @@ nvptx_libc_has_function (enum function_class fn_class, tree type) return default_libc_has_function (fn_class, type); } +bool +nvptx_mem_local_p (rtx mem) +{ + gcc_assert (GET_CODE (mem) == MEM); + + struct address_info info; + decompose_mem_address (&info, mem); + + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + if (TARGET_SOFT_STACK) + { + /* Frame-related doesn't mean local. */ + } + else + return true; + } + + return false; +} + #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE nvptx_option_override diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index 9fda2f0..3ca22a5 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -32,7 +32,7 @@ /* Default needs to be in sync with default for misa in nvptx.opt. We add a default here to work around a hard-coded sm_30 default in nvptx-as. */ -#define ASM_SPEC "%{misa=*:-m %*; :-m sm_35}" +#define ASM_SPEC "%{misa=*:-m %*; :-m sm_35}%{misa=sm_30:--no-verify}" #define TARGET_CPU_CPP_BUILTINS() nvptx_cpu_cpp_builtins () @@ -86,11 +86,9 @@ #define Pmode (TARGET_ABI64 ? DImode : SImode) #define STACK_SIZE_MODE Pmode -#define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35) -#define TARGET_SM53 (ptx_isa_option >= PTX_ISA_SM53) -#define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75) -#define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80) +#include "nvptx-gen.h" +#define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0) #define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3) #define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0) diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 5cf190a..a453c1d 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -27,6 +27,7 @@ UNSPEC_SIN UNSPEC_COS UNSPEC_TANH + UNSPEC_ISINF UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC @@ -54,10 +55,15 @@ (define_c_enum "unspecv" [ UNSPECV_LOCK UNSPECV_CAS + UNSPECV_CAS_LOCAL UNSPECV_XCHG + UNSPECV_ST UNSPECV_BARSYNC + UNSPECV_WARPSYNC + UNSPECV_UNIFORM_WARP_CHECK UNSPECV_MEMBAR UNSPECV_MEMBAR_CTA + UNSPECV_MEMBAR_GL UNSPECV_DIM_POS UNSPECV_FORK @@ -88,6 +94,18 @@ return register_operand (op, mode); }) +(define_predicate "nvptx_register_or_complex_di_df_register_operand" + (ior (match_code "reg") + (match_code "concat")) +{ + if (GET_CODE (op) == CONCAT) + return ((GET_MODE (op) == DCmode || GET_MODE (op) == CDImode) + && nvptx_register_operand (XEXP (op, 0), mode) + && nvptx_register_operand (XEXP (op, 1), mode)); + + return nvptx_register_operand (op, mode); +}) + (define_predicate "nvptx_nonimmediate_operand" (match_code "mem,reg") { @@ -282,7 +300,8 @@ "@ %.\\tmov.b16\\t%0, %1; %.\\tld.b16\\t%0, %1; - %.\\tst.b16\\t%0, %1;") + %.\\tst.b16\\t%0, %1;" + [(set_attr "subregs_ok" "true")]) (define_expand "movhf" [(set (match_operand:HF 0 "nonimmediate_operand" "") @@ -501,7 +520,14 @@ (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] "" - "%.\\tsub%t0\\t%0, %1, %2;") + { + if (GET_MODE (operands[0]) == HImode) + /* Workaround https://developer.nvidia.com/nvidia_bug/3527713. + See PR97005. */ + return "%.\\tsub.s16\\t%0, %1, %2;"; + + return "%.\\tsub%t0\\t%0, %1, %2;"; + }) (define_insn "mul<mode>3" [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") @@ -592,6 +618,12 @@ "" "%.\\tnot.b%T0\\t%0, %1;") +(define_insn "one_cmplbi2" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (not:BI (match_operand:BI 1 "nvptx_register_operand" "R")))] + "" + "%.\\tnot.pred\\t%0, %1;") + (define_insn "*cnot<mode>2" [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") (eq:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") @@ -667,7 +699,57 @@ "" "%.\\tmul.wide.u32\\t%0, %1, %2;") -(define_insn "smulhi3_highpart" +(define_expand "mulditi3" + [(set (match_operand:TI 0 "nvptx_register_operand") + (mult:TI (sign_extend:TI + (match_operand:DI 1 "nvptx_register_operand")) + (sign_extend:DI + (match_operand:DI 2 "nvptx_nonmemory_operand"))))] + "" +{ + rtx hi = gen_reg_rtx (DImode); + rtx lo = gen_reg_rtx (DImode); + emit_insn (gen_smuldi3_highpart (hi, operands[1], operands[2])); + emit_insn (gen_muldi3 (lo, operands[1], operands[2])); + emit_move_insn (gen_highpart (DImode, operands[0]), hi); + emit_move_insn (gen_lowpart (DImode, operands[0]), lo); + DONE; +}) + +(define_expand "umulditi3" + [(set (match_operand:TI 0 "nvptx_register_operand") + (mult:TI (zero_extend:TI + (match_operand:DI 1 "nvptx_register_operand")) + (zero_extend:DI + (match_operand:DI 2 "nvptx_nonmemory_operand"))))] + "" +{ + rtx hi = gen_reg_rtx (DImode); + rtx lo = gen_reg_rtx (DImode); + emit_insn (gen_umuldi3_highpart (hi, operands[1], operands[2])); + emit_insn (gen_muldi3 (lo, operands[1], operands[2])); + emit_move_insn (gen_highpart (DImode, operands[0]), hi); + emit_move_insn (gen_lowpart (DImode, operands[0]), lo); + DONE; +}) + +(define_insn "smul<mode>3_highpart" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (smul_highpart:HSDIM + (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmul.hi.s%T0\\t%0, %1, %2;") + +(define_insn "umul<mode>3_highpart" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (umul_highpart:HSDIM + (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] + "" + "%.\\tmul.hi.u%T0\\t%0, %1, %2;") + +(define_insn "*smulhi3_highpart_2" [(set (match_operand:HI 0 "nvptx_register_operand" "=R") (truncate:HI (lshiftrt:SI @@ -679,7 +761,7 @@ "" "%.\\tmul.hi.s16\\t%0, %1, %2;") -(define_insn "smulsi3_highpart" +(define_insn "*smulsi3_highpart_2" [(set (match_operand:SI 0 "nvptx_register_operand" "=R") (truncate:SI (lshiftrt:DI @@ -691,7 +773,7 @@ "" "%.\\tmul.hi.s32\\t%0, %1, %2;") -(define_insn "umulhi3_highpart" +(define_insn "*umulhi3_highpart_2" [(set (match_operand:HI 0 "nvptx_register_operand" "=R") (truncate:HI (lshiftrt:SI @@ -703,7 +785,7 @@ "" "%.\\tmul.hi.u16\\t%0, %1, %2;") -(define_insn "umulsi3_highpart" +(define_insn "*umulsi3_highpart_2" [(set (match_operand:SI 0 "nvptx_register_operand" "=R") (truncate:SI (lshiftrt:DI @@ -738,32 +820,60 @@ "" "%.\\tshr.u%T0\\t%0, %1, %2;") +(define_insn "rotlsi3" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (rotate:SI (match_operand:SI 1 "nvptx_register_operand" "R") + (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") + (const_int 31))))] + "TARGET_SM35" + "%.\\tshf.l.wrap.b32\\t%0, %1, %1, %2;") + +(define_insn "rotrsi3" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (rotatert:SI (match_operand:SI 1 "nvptx_register_operand" "R") + (and:SI (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri") + (const_int 31))))] + "TARGET_SM35" + "%.\\tshf.r.wrap.b32\\t%0, %1, %1, %2;") + ;; Logical operations -(define_insn "and<mode>3" - [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") - (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") - (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] - "" - "%.\\tand.b%T0\\t%0, %1, %2;") +(define_code_iterator any_logic [and ior xor]) +(define_code_attr logic [(and "and") (ior "or") (xor "xor")]) +(define_code_attr ilogic [(and "and") (ior "ior") (xor "xor")]) -(define_insn "ior<mode>3" - [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") - (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") - (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] +(define_insn "<ilogic><mode>3" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (any_logic:HSDIM + (match_operand:HSDIM 1 "nvptx_register_operand" "R") + (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))] "" - "%.\\tor.b%T0\\t%0, %1, %2;") + "%.\\t<logic>.b%T0\\t%0, %1, %2;") -(define_insn "xor<mode>3" - [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R") - (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R") - (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))] +(define_insn "<ilogic>bi3" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (any_logic:BI (match_operand:BI 1 "nvptx_register_operand" "R") + (match_operand:BI 2 "nvptx_register_operand" "R")))] "" - "%.\\txor.b%T0\\t%0, %1, %2;") + "%.\\t<logic>.pred\\t%0, %1, %2;") + +(define_split + [(set (match_operand:HSDIM 0 "nvptx_register_operand") + (any_logic:HSDIM + (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand") + (const_int 0)) + (ne:HSDIM (match_operand:BI 2 "nvptx_register_operand") + (const_int 0))))] + "can_create_pseudo_p ()" + [(set (match_dup 3) (any_logic:BI (match_dup 1) (match_dup 2))) + (set (match_dup 0) (ne:HSDIM (match_dup 3) (const_int 0)))] +{ + operands[3] = gen_reg_rtx (BImode); +}) ;; Comparisons and branches -(define_insn "*cmp<mode>" +(define_insn "cmp<mode>" [(set (match_operand:BI 0 "nvptx_register_operand" "=R") (match_operator:BI 1 "nvptx_comparison_operator" [(match_operand:HSDIM 2 "nvptx_register_operand" "R") @@ -779,6 +889,14 @@ "" "%.\\tsetp%c1\\t%0, %2, %3;") +(define_insn "*cmphf" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (match_operator:BI 1 "nvptx_float_comparison_operator" + [(match_operand:HF 2 "nvptx_register_operand" "R") + (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")]))] + "TARGET_SM53" + "%.\\tsetp%c1\\t%0, %2, %3;") + (define_insn "jump" [(set (pc) (label_ref (match_operand 0 "" "")))] @@ -867,29 +985,36 @@ ;; Conditional stores (define_insn "setcc<mode>_from_bi" - [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") - (ne:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R") + [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") + (ne:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)))] "" "%.\\tselp%t0\\t%0, 1, 0, %1;") -(define_insn "extendbi<mode>2" +(define_insn "*setcc<mode>_from_not_bi" [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") - (sign_extend:HSDIM + (eq:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R") + (const_int 0)))] + "" + "%.\\tselp%t0\\t%0, 0, 1, %1;") + +(define_insn "extendbi<mode>2" + [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") + (sign_extend:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R")))] "" "%.\\tselp%t0\\t%0, -1, 0, %1;") (define_insn "zero_extendbi<mode>2" - [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") - (zero_extend:HSDIM + [(set (match_operand:QHSDIM 0 "nvptx_register_operand" "=R") + (zero_extend:QHSDIM (match_operand:BI 1 "nvptx_register_operand" "R")))] "" "%.\\tselp%t0\\t%0, 1, 0, %1;") (define_insn "sel_true<mode>" [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") - (if_then_else:HSDIM + (if_then_else:HSDIM (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] @@ -898,7 +1023,7 @@ (define_insn "sel_true<mode>" [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") - (if_then_else:SDFM + (if_then_else:SDFM (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] @@ -907,7 +1032,7 @@ (define_insn "sel_false<mode>" [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") - (if_then_else:HSDIM + (if_then_else:HSDIM (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))] @@ -916,13 +1041,63 @@ (define_insn "sel_false<mode>" [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") - (if_then_else:SDFM + (if_then_else:SDFM (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0)) (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))] "" "%.\\tselp%t0\\t%0, %3, %2, %1;") +(define_code_iterator eqne [eq ne]) + +;; Split negation of a predicate into a conditional move. +(define_insn_and_split "*selp<mode>_neg_<code>" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (neg:HSDIM (eqne:HSDIM + (match_operand:BI 1 "nvptx_register_operand" "R") + (const_int 0))))] + "" + "#" + "&& 1" + [(set (match_dup 0) + (if_then_else:HSDIM + (eqne (match_dup 1) (const_int 0)) + (const_int -1) + (const_int 0)))]) + +;; Split bitwise not of a predicate into a conditional move. +(define_insn_and_split "*selp<mode>_not_<code>" + [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") + (not:HSDIM (eqne:HSDIM + (match_operand:BI 1 "nvptx_register_operand" "R") + (const_int 0))))] + "" + "#" + "&& 1" + [(set (match_dup 0) + (if_then_else:HSDIM + (eqne (match_dup 1) (const_int 0)) + (const_int -2) + (const_int -1)))]) + +(define_insn "*setcc_int<mode>" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (neg:SI + (match_operator:SI 1 "nvptx_comparison_operator" + [(match_operand:HSDIM 2 "nvptx_register_operand" "R") + (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))] + "" + "%.\\tset%t0%c1\\t%0, %2, %3;") + +(define_insn "*setcc_int<mode>" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (neg:SI + (match_operator:SI 1 "nvptx_float_comparison_operator" + [(match_operand:SDFM 2 "nvptx_register_operand" "R") + (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))] + "" + "%.\\tset%t0%c1\\t%0, %2, %3;") + (define_insn "setcc_float<mode>" [(set (match_operand:SF 0 "nvptx_register_operand" "=R") (match_operator:SF 1 "nvptx_comparison_operator" @@ -969,6 +1144,21 @@ DONE; }) +(define_expand "cstorehf4" + [(set (match_operand:SI 0 "nvptx_register_operand") + (match_operator:SI 1 "nvptx_float_comparison_operator" + [(match_operand:HF 2 "nvptx_register_operand") + (match_operand:HF 3 "nvptx_nonmemory_operand")]))] + "TARGET_SM53" +{ + rtx reg = gen_reg_rtx (BImode); + rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode, + operands[2], operands[3]); + emit_move_insn (reg, cmp); + emit_insn (gen_setccsi_from_bi (operands[0], reg)); + DONE; +}) + ;; Calls (define_insn "call_insn_<mode>" @@ -1056,8 +1246,8 @@ (define_insn "copysign<mode>3" [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") - (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R") - (match_operand:SDFM 2 "nvptx_register_operand" "R")] + (unspec:SDFM [(match_operand:SDFM 1 "nvptx_nonmemory_operand" "RF") + (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")] UNSPEC_COPYSIGN))] "" "%.\\tcopysign%t0\\t%0, %2, %1;") @@ -1133,6 +1323,25 @@ "flag_unsafe_math_optimizations" "%.\\tex2.approx%t0\\t%0, %1;") +(define_insn "setcc_isinf<mode>" + [(set (match_operand:BI 0 "nvptx_register_operand" "=R") + (unspec:BI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + UNSPEC_ISINF))] + "" + "%.\\ttestp.infinite%t1\\t%0, %1;") + +(define_expand "isinf<mode>2" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (unspec:SI [(match_operand:SDFM 1 "nvptx_register_operand" "R")] + UNSPEC_ISINF))] + "" +{ + rtx pred = gen_reg_rtx (BImode); + emit_insn (gen_setcc_isinf<mode> (pred, operands[1])); + emit_insn (gen_setccsi_from_bi (operands[0], pred)); + DONE; +}) + ;; HFmode floating point arithmetic. (define_insn "addhf3" @@ -1156,6 +1365,26 @@ "TARGET_SM53" "%.\\tmul.f16\\t%0, %1, %2;") +(define_insn "fmahf4" + [(set (match_operand:HF 0 "nvptx_register_operand" "=R") + (fma:HF (match_operand:HF 1 "nvptx_register_operand" "R") + (match_operand:HF 2 "nvptx_nonmemory_operand" "RF") + (match_operand:HF 3 "nvptx_nonmemory_operand" "RF")))] + "TARGET_SM53" + "%.\\tfma%#.f16\\t%0, %1, %2, %3;") + +(define_insn "neghf2" + [(set (match_operand:HF 0 "nvptx_register_operand" "=R") + (neg:HF (match_operand:HF 1 "nvptx_register_operand" "R")))] + "" + "%.\\txor.b16\\t%0, %1, -32768;") + +(define_insn "abshf2" + [(set (match_operand:HF 0 "nvptx_register_operand" "=R") + (abs:HF (match_operand:HF 1 "nvptx_register_operand" "R")))] + "" + "%.\\tand.b16\\t%0, %1, 32767;") + (define_insn "exp2hf2" [(set (match_operand:HF 0 "nvptx_register_operand" "=R") (unspec:HF [(match_operand:HF 1 "nvptx_register_operand" "R")] @@ -1599,7 +1828,7 @@ UNSPEC_SHUFFLE))] "" { - if (TARGET_PTX_6_3) + if (TARGET_PTX_6_0) return "%.\\tshfl.sync%S3.b32\\t%0, %1, %2, 31, 0xffffffff;"; else return "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;"; @@ -1611,7 +1840,7 @@ UNSPEC_VOTE_BALLOT))] "" { - if (TARGET_PTX_6_3) + if (TARGET_PTX_6_0) return "%.\\tvote.sync.ballot.b32\\t%0, %1, 0xffffffff;"; else return "%.\\tvote.ballot.b32\\t%0, %1;"; @@ -1685,8 +1914,8 @@ ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange ;; across lanes (define_expand "omp_simt_xchg_bfly" - [(match_operand 0 "nvptx_register_operand" "=R") - (match_operand 1 "nvptx_register_operand" "R") + [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R") + (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R") (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] "" { @@ -1698,8 +1927,8 @@ ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1 ;; from lane given by index in operand 2 to operand 0 in all lanes (define_expand "omp_simt_xchg_idx" - [(match_operand 0 "nvptx_register_operand" "=R") - (match_operand 1 "nvptx_register_operand" "R") + [(match_operand 0 "nvptx_register_or_complex_di_df_register_operand" "=R") + (match_operand 1 "nvptx_register_or_complex_di_df_register_operand" "R") (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")] "" { @@ -1768,8 +1997,14 @@ (match_operand:SI 7 "const_int_operand")] ;; failure model "" { - emit_insn (gen_atomic_compare_and_swap<mode>_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); + if (nvptx_mem_local_p (operands[2])) + emit_insn (gen_atomic_compare_and_swap<mode>_1_local + (operands[1], operands[2], operands[3], operands[4], + operands[6])); + else + emit_insn (gen_atomic_compare_and_swap<mode>_1 + (operands[1], operands[2], operands[3], operands[4], + operands[6])); rtx cond = gen_reg_rtx (BImode); emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3])); @@ -1777,6 +2012,31 @@ DONE; }) +(define_insn "atomic_compare_and_swap<mode>_1_local" + [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "memory_operand" "+m") + (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri") + (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_CAS_LOCAL)) + (set (match_dup 1) + (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS_LOCAL))] + "" + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg.pred" "\\t" "%%eq_p;", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("\\t" "setp.eq%t0" "\\t" "%%eq_p, %%val, %2;", + operands); + output_asm_insn ("@%%eq_p\\t" "st%A1%t0" "\\t" "%1,%3;", operands); + output_asm_insn ("\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } + [(set_attr "predicable" "false")]) + (define_insn "atomic_compare_and_swap<mode>_1" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (unspec_volatile:SDIM @@ -1805,12 +2065,82 @@ (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input "" { + if (nvptx_mem_local_p (operands[1])) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%2;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\tatom%A1.exch.b%T0\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr "atomic" "true")]) +(define_expand "atomic_store<mode>" + [(match_operand:SDIM 0 "memory_operand" "=m") ;; memory + (match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input + (match_operand:SI 2 "const_int_operand")] ;; model + "" +{ + struct address_info info; + decompose_mem_address (&info, operands[0]); + if (info.base != NULL && REG_P (*info.base) + && REGNO_PTR_FRAME_P (REGNO (*info.base))) + { + emit_insn (gen_mov<mode> (operands[0], operands[1])); + DONE; + } + + if (TARGET_SM70) + { + emit_insn (gen_nvptx_atomic_store_sm70<mode> (operands[0], operands[1], + operands[2])); + DONE; + } + + bool maybe_shared_p = nvptx_mem_maybe_shared_p (operands[0]); + if (!maybe_shared_p) + /* Fall back to expand_atomic_store. */ + FAIL; + + emit_insn (gen_nvptx_atomic_store<mode> (operands[0], operands[1], + operands[2])); + DONE; +}) + +(define_insn "nvptx_atomic_store_sm70<mode>" + [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_ST))] + "TARGET_SM70" + { + const char *t + = "%.\tst%A0.b%T0\t%0, %1;"; + return nvptx_output_atomic_insn (t, operands, 0, 2); + } + [(set_attr "atomic" "false")]) ;; Note: st is not an atomic insn. + +(define_insn "nvptx_atomic_store<mode>" + [(set (match_operand:SDIM 0 "memory_operand" "+m") ;; memory + (unspec_volatile:SDIM + [(match_operand:SDIM 1 "nvptx_nonmemory_operand" "Ri") ;; input + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPECV_ST))] + "!TARGET_SM70" + { + const char *t + = "%.\tatom%A0.exch.b%T0\t_, %0, %1;"; + return nvptx_output_atomic_insn (t, operands, 0, 2); + } + [(set_attr "atomic" "true")]) + (define_insn "atomic_fetch_add<mode>" [(set (match_operand:SDIM 1 "memory_operand" "+m") (unspec_volatile:SDIM @@ -1822,6 +2152,19 @@ (match_dup 1))] "" { + if (nvptx_mem_local_p (operands[1])) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", + operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); @@ -1839,15 +2182,25 @@ (match_dup 1))] "" { + if (nvptx_mem_local_p (operands[1])) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" ".reg%t0" "\\t" "%%update;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "add%t0" "\\t" "%%update,%%val,%2;", + operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr "atomic" "true")]) -(define_code_iterator any_logic [and ior xor]) -(define_code_attr logic [(and "and") (ior "or") (xor "xor")]) - (define_insn "atomic_fetch_<logic><mode>" [(set (match_operand:SDIM 1 "memory_operand" "+m") (unspec_volatile:SDIM @@ -1859,6 +2212,19 @@ (match_dup 1))] "<MODE>mode == SImode || TARGET_SM35" { + if (nvptx_mem_local_p (operands[1])) + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%val;", operands); + output_asm_insn ("\\t" ".reg.b%T0" "\\t" "%%update;", operands); + output_asm_insn ("%.\\t" "ld%A1%t0" "\\t" "%%val,%1;", operands); + output_asm_insn ("%.\\t" "<logic>.b%T0" "\\t" "%%update,%%val,%2;", + operands); + output_asm_insn ("%.\\t" "st%A1%t0" "\\t" "%1,%%update;", operands); + output_asm_insn ("%.\\t" "mov%t0" "\\t" "%0,%%val;", operands); + output_asm_insn ("}", NULL); + return ""; + } const char *t = "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); @@ -1889,9 +2255,36 @@ "" { if (INTVAL (operands[1]) == 0) - return "\\tbar.sync\\t%0;"; + return (TARGET_PTX_6_0 + ? "\\tbarrier.sync.aligned\\t%0;" + : "\\tbar.sync\\t%0;"); else - return "\\tbar.sync\\t%0, %1;"; + return (TARGET_PTX_6_0 + ? "\\tbarrier.sync\\t%0, %1;" + : "\\tbar.sync\\t%0, %1;"); + } + [(set_attr "predicable" "false")]) + +(define_insn "nvptx_warpsync" + [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)] + "TARGET_PTX_6_0" + "\\tbar.warp.sync\\t0xffffffff;" + [(set_attr "predicable" "false")]) + +(define_insn "nvptx_uniform_warp_check" + [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)] + "" + { + output_asm_insn ("{", NULL); + output_asm_insn ("\\t" ".reg.b32" "\\t" "act;", NULL); + output_asm_insn ("\\t" "vote.ballot.b32" "\\t" "act,1;", NULL); + output_asm_insn ("\\t" ".reg.pred" "\\t" "uni;", NULL); + output_asm_insn ("\\t" "setp.eq.b32" "\\t" "uni,act,0xffffffff;", + NULL); + output_asm_insn ("@ !uni\\t" "trap;", NULL); + output_asm_insn ("@ !uni\\t" "exit;", NULL); + output_asm_insn ("}", NULL); + return ""; } [(set_attr "predicable" "false")]) @@ -1932,6 +2325,22 @@ "\\tmembar.cta;" [(set_attr "predicable" "false")]) +(define_expand "nvptx_membar_gl" + [(set (match_dup 0) + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*nvptx_membar_gl" + [(set (match_operand:BLK 0 "" "") + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] + "" + "\\tmembar.gl;" + [(set_attr "predicable" "false")]) + (define_insn "nvptx_nounroll" [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] "" @@ -1947,3 +2356,103 @@ return nvptx_output_red_partition (operands[0], operands[1]); } [(set_attr "predicable" "false")]) + +;; Expand QI mode operations using SI mode instructions. +(define_code_iterator any_sbinary [plus minus smin smax]) +(define_code_attr sbinary [(plus "add") (minus "sub") (smin "smin") (smax "smax")]) + +(define_code_iterator any_ubinary [and ior xor umin umax]) +(define_code_attr ubinary [(and "and") (ior "ior") (xor "xor") (umin "umin") + (umax "umax")]) + +(define_code_iterator any_sunary [neg abs]) +(define_code_attr sunary [(neg "neg") (abs "abs")]) + +(define_code_iterator any_uunary [not]) +(define_code_attr uunary [(not "one_cmpl")]) + +(define_expand "<sbinary>qi3" + [(set (match_operand:QI 0 "nvptx_register_operand") + (any_sbinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand") + (match_operand:QI 2 "nvptx_nonmemory_operand")))] + "" +{ + rtx reg = gen_reg_rtx (SImode); + rtx op0 = convert_modes (SImode, QImode, operands[1], 0); + rtx op1 = convert_modes (SImode, QImode, operands[2], 0); + if (<CODE> == MINUS) + op0 = force_reg (SImode, op0); + emit_insn (gen_<sbinary>si3 (reg, op0, op1)); + emit_insn (gen_truncsiqi2 (operands[0], reg)); + DONE; +}) + +(define_expand "<ubinary>qi3" + [(set (match_operand:QI 0 "nvptx_register_operand") + (any_ubinary:QI (match_operand:QI 1 "nvptx_nonmemory_operand") + (match_operand:QI 2 "nvptx_nonmemory_operand")))] + "" +{ + rtx reg = gen_reg_rtx (SImode); + rtx op0 = convert_modes (SImode, QImode, operands[1], 1); + rtx op1 = convert_modes (SImode, QImode, operands[2], 1); + emit_insn (gen_<ubinary>si3 (reg, op0, op1)); + emit_insn (gen_truncsiqi2 (operands[0], reg)); + DONE; +}) + +(define_expand "<sunary>qi2" + [(set (match_operand:QI 0 "nvptx_register_operand") + (any_sunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))] + "" +{ + rtx reg = gen_reg_rtx (SImode); + rtx op0 = convert_modes (SImode, QImode, operands[1], 0); + emit_insn (gen_<sunary>si2 (reg, op0)); + emit_insn (gen_truncsiqi2 (operands[0], reg)); + DONE; +}) + +(define_expand "<uunary>qi2" + [(set (match_operand:QI 0 "nvptx_register_operand") + (any_uunary:QI (match_operand:QI 1 "nvptx_nonmemory_operand")))] + "" +{ + rtx reg = gen_reg_rtx (SImode); + rtx op0 = convert_modes (SImode, QImode, operands[1], 1); + emit_insn (gen_<uunary>si2 (reg, op0)); + emit_insn (gen_truncsiqi2 (operands[0], reg)); + DONE; +}) + +(define_expand "cstoreqi4" + [(set (match_operand:SI 0 "nvptx_register_operand") + (match_operator:SI 1 "nvptx_comparison_operator" + [(match_operand:QI 2 "nvptx_nonmemory_operand") + (match_operand:QI 3 "nvptx_nonmemory_operand")]))] + "" +{ + rtx reg = gen_reg_rtx (BImode); + enum rtx_code code = GET_CODE (operands[1]); + int unsignedp = unsigned_condition_p (code); + rtx op2 = convert_modes (SImode, QImode, operands[2], unsignedp); + rtx op3 = convert_modes (SImode, QImode, operands[3], unsignedp); + rtx cmp = gen_rtx_fmt_ee (code, SImode, op2, op3); + emit_insn (gen_cmpsi (reg, cmp, op2, op3)); + emit_insn (gen_setccsi_from_bi (operands[0], reg)); + DONE; +}) + +(define_insn "*ext_truncsi2_qi" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (sign_extend:SI + (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))] + "" + "%.\\tcvt.s32.s8\\t%0, %1;") + +(define_insn "*zext_truncsi2_qi" + [(set (match_operand:SI 0 "nvptx_register_operand" "=R") + (zero_extend:SI + (truncate:QI (match_operand:SI 1 "nvptx_register_operand" "R"))))] + "" + "%.\\tcvt.u32.u8\\t%0, %1;") diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index 6514dd3..c83ceb3 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -51,25 +51,6 @@ mgomp Target Mask(GOMP) Generate code for OpenMP offloading: enables -msoft-stack and -muniform-simt. -Enum -Name(ptx_isa) Type(int) -Known PTX ISA versions (for use with the -misa= option): - -EnumValue -Enum(ptx_isa) String(sm_30) Value(PTX_ISA_SM30) - -EnumValue -Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35) - -EnumValue -Enum(ptx_isa) String(sm_53) Value(PTX_ISA_SM53) - -EnumValue -Enum(ptx_isa) String(sm_75) Value(PTX_ISA_SM75) - -EnumValue -Enum(ptx_isa) String(sm_80) Value(PTX_ISA_SM80) - ; Default needs to be in sync with default in ASM_SPEC in nvptx.h. misa= Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM35) @@ -83,11 +64,24 @@ EnumValue Enum(ptx_version) String(3.1) Value(PTX_VERSION_3_1) EnumValue +Enum(ptx_version) String(6.0) Value(PTX_VERSION_6_0) + +EnumValue Enum(ptx_version) String(6.3) Value(PTX_VERSION_6_3) EnumValue Enum(ptx_version) String(7.0) Value(PTX_VERSION_7_0) +EnumValue +Enum(ptx_version) String(_) Value(PTX_VERSION_default) + mptx= -Target RejectNegative ToLower Joined Enum(ptx_version) Var(ptx_version_option) Init(PTX_VERSION_3_1) +Target RejectNegative ToLower Joined Enum(ptx_version) Var(ptx_version_option) Specify the version of the ptx version to use. + +minit-regs= +Target Var(nvptx_init_regs) IntegerRange(0, 3) Joined UInteger Init(3) +Initialize ptx registers. + +mptx-comment +Target Var(nvptx_comment) Init(1) Undocumented diff --git a/gcc/config/nvptx/t-nvptx b/gcc/config/nvptx/t-nvptx index b170766..8f67264 100644 --- a/gcc/config/nvptx/t-nvptx +++ b/gcc/config/nvptx/t-nvptx @@ -13,4 +13,23 @@ mkoffload$(exeext): mkoffload.o collect-utils.o libcommon-target.a $(LIBIBERTY) +$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \ mkoffload.o collect-utils.o libcommon-target.a $(LIBIBERTY) $(LIBS) +$(srcdir)/config/nvptx/nvptx.h: $(srcdir)/config/nvptx/nvptx-gen.h +$(srcdir)/config/nvptx/nvptx-gen.h: s-nvptx-gen-h; @true +s-nvptx-gen-h: $(srcdir)/config/nvptx/nvptx-sm.def + $(SHELL) $(srcdir)/config/nvptx/gen-h.sh "$(srcdir)/config/nvptx" \ + > tmp-nvptx-gen.h + $(SHELL) $(srcdir)/../move-if-change \ + tmp-nvptx-gen.h $(srcdir)/config/nvptx/nvptx-gen.h + $(STAMP) s-nvptx-gen-h + +$(srcdir)/config/nvptx/nvptx-gen.opt: s-nvptx-gen-opt; @true +s-nvptx-gen-opt: $(srcdir)/config/nvptx/nvptx-sm.def + $(SHELL) $(srcdir)/config/nvptx/gen-opt.sh "$(srcdir)/config/nvptx" \ + > tmp-nvptx-gen.opt + $(SHELL) $(srcdir)/../move-if-change \ + tmp-nvptx-gen.opt $(srcdir)/config/nvptx/nvptx-gen.opt + $(STAMP) s-nvptx-gen-opt + MULTILIB_OPTIONS = mgomp + +MULTILIB_EXTRA_OPTS = misa=sm_30 mptx=3.1 diff --git a/gcc/config/nvptx/t-omp-device b/gcc/config/nvptx/t-omp-device index 8765d9f..c2b28a4 100644 --- a/gcc/config/nvptx/t-omp-device +++ b/gcc/config/nvptx/t-omp-device @@ -1,4 +1,3 @@ -omp-device-properties-nvptx: $(srcdir)/config/nvptx/nvptx.cc - echo kind: gpu > $@ - echo arch: nvptx >> $@ - echo isa: sm_30 sm_35 >> $@ +omp-device-properties-nvptx: $(srcdir)/config/nvptx/nvptx-sm.def + $(SHELL) $(srcdir)/config/nvptx/gen-omp-device-properties.sh \ + "$(srcdir)/config/nvptx" > $@ diff --git a/gcc/config/or1k/linux.h b/gcc/config/or1k/linux.h index 52909af..80f77c7 100644 --- a/gcc/config/or1k/linux.h +++ b/gcc/config/or1k/linux.h @@ -32,6 +32,8 @@ #undef MUSL_DYNAMIC_LINKER #define MUSL_DYNAMIC_LINKER "/lib/ld-musl-or1k.so.1" +#define CPP_SPEC "%{pthread:-D_REENTRANT}" + #undef LINK_SPEC #define LINK_SPEC "%{h*} \ %{static:-Bstatic} \ diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 6956684..8a4d2cf 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -60,6 +60,7 @@ extern const char *riscv_default_mtune (int argc, const char **argv); --with-arch is ignored if -march or -mcpu is specified. --with-abi is ignored if -mabi is specified. --with-tune is ignored if -mtune or -mcpu is specified. + --with-isa-spec is ignored if -misa-spec is specified. But using default -march/-mtune value if -mcpu don't have valid option. */ #define OPTION_DEFAULT_SPECS \ @@ -70,6 +71,7 @@ extern const char *riscv_default_mtune (int argc, const char **argv); " %{!mcpu=*:-march=%(VALUE)}" \ " %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" }, \ {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \ + {"isa_spec", "%{!misa-spec=*:-misa-spec=%(VALUE)}" }, \ #ifdef IN_LIBGCC2 #undef TARGET_64BIT diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv index 096d70e..19736b3 100644 --- a/gcc/config/riscv/t-riscv +++ b/gcc/config/riscv/t-riscv @@ -6,7 +6,7 @@ riscv-builtins.o: $(srcdir)/config/riscv/riscv-builtins.cc $(CONFIG_H) \ $(srcdir)/config/riscv/riscv-builtins.cc riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) \ - $(SYSTEM_H) + $(SYSTEM_H) $(TM_H) $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/riscv/riscv-sr.cc diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h index eb7a0c0..ad3238b 100644 --- a/gcc/config/rs6000/aix.h +++ b/gcc/config/rs6000/aix.h @@ -23,7 +23,6 @@ #define DEFAULT_ABI ABI_AIX #undef TARGET_AIX #define TARGET_AIX 1 -#define OPTION_GLIBC 0 /* Linux64.h wants to redefine TARGET_AIX based on -m64, but it can't be used in the #if conditional in options-default.h, so provide another macro. */ diff --git a/gcc/config/rs6000/bmi2intrin.h b/gcc/config/rs6000/bmi2intrin.h index f2d7eb5..b7a7ded 100644 --- a/gcc/config/rs6000/bmi2intrin.h +++ b/gcc/config/rs6000/bmi2intrin.h @@ -77,39 +77,39 @@ extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pdep_u64 (unsigned long long __X, unsigned long long __M) { - unsigned long result = 0x0UL; - const unsigned long mask = 0x8000000000000000UL; - unsigned long m = __M; - unsigned long c, t; - unsigned long p; + unsigned long __result = 0x0UL; + const unsigned long __mask = 0x8000000000000000UL; + unsigned long __m = __M; + unsigned long __c, __t; + unsigned long __p; /* The pop-count of the mask gives the number of the bits from source to process. This is also needed to shift bits from the source into the correct position for the result. */ - p = 64 - __builtin_popcountl (__M); + __p = 64 - __builtin_popcountl (__M); /* The loop is for the number of '1' bits in the mask and clearing each mask bit as it is processed. */ - while (m != 0) + while (__m != 0) { - c = __builtin_clzl (m); - t = __X << (p - c); - m ^= (mask >> c); - result |= (t & (mask >> c)); - p++; + __c = __builtin_clzl (__m); + __t = __X << (__p - __c); + __m ^= (__mask >> __c); + __result |= (__t & (__mask >> __c)); + __p++; } - return (result); + return __result; } extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _pext_u64 (unsigned long long __X, unsigned long long __M) { - unsigned long p = 0x4040404040404040UL; // initial bit permute control - const unsigned long mask = 0x8000000000000000UL; - unsigned long m = __M; - unsigned long c; - unsigned long result; + unsigned long __p = 0x4040404040404040UL; // initial bit permute control + const unsigned long __mask = 0x8000000000000000UL; + unsigned long __m = __M; + unsigned long __c; + unsigned long __result; /* if the mask is constant and selects 8 bits or less we can use the Power8 Bit permute instruction. */ @@ -118,35 +118,35 @@ _pext_u64 (unsigned long long __X, unsigned long long __M) /* Also if the pext mask is constant, then the popcount is constant, we can evaluate the following loop at compile time and use a constant bit permute vector. */ - long i; - for (i = 0; i < __builtin_popcountl (__M); i++) + long __i; + for (__i = 0; __i < __builtin_popcountl (__M); __i++) { - c = __builtin_clzl (m); - p = (p << 8) | c; - m ^= (mask >> c); + __c = __builtin_clzl (__m); + __p = (__p << 8) | __c; + __m ^= (__mask >> __c); } - result = __builtin_bpermd (p, __X); + __result = __builtin_bpermd (__p, __X); } else { - p = 64 - __builtin_popcountl (__M); - result = 0; + __p = 64 - __builtin_popcountl (__M); + __result = 0; /* We could a use a for loop here, but that combined with -funroll-loops can expand to a lot of code. The while loop avoids unrolling and the compiler commons the xor from clearing the mask bit with the (m != 0) test. The result is a more compact loop setup and body. */ - while (m != 0) + while (__m != 0) { - unsigned long t; - c = __builtin_clzl (m); - t = (__X & (mask >> c)) >> (p - c); - m ^= (mask >> c); - result |= (t); - p++; + unsigned long __t; + __c = __builtin_clzl (__m); + __t = (__X & (__mask >> __c)) >> (__p - __c); + __m ^= (__mask >> __c); + __result |= (__t); + __p++; } } - return (result); + return __result; } /* these 32-bit implementations depend on 64-bit pdep/pext diff --git a/gcc/config/rs6000/darwin.md b/gcc/config/rs6000/darwin.md index 8443585..e73d59e 100644 --- a/gcc/config/rs6000/darwin.md +++ b/gcc/config/rs6000/darwin.md @@ -121,21 +121,32 @@ You should have received a copy of the GNU General Public License stw %0,lo16(%2)(%1)" [(set_attr "type" "store")]) -;; 64-bit MachO load/store support - ;; Mach-O PIC. (define_insn "@macho_high_<mode>" [(set (match_operand:P 0 "gpc_reg_operand" "=b*r") (high:P (match_operand 1 "" "")))] - "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN)" + "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && !flag_pic" "lis %0,ha16(%1)") (define_insn "@macho_low_<mode>" [(set (match_operand:P 0 "gpc_reg_operand" "=r") (lo_sum:P (match_operand:P 1 "gpc_reg_operand" "b") (match_operand 2 "" "")))] - "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN)" + "TARGET_MACHO && (DEFAULT_ABI == ABI_DARWIN) && !flag_pic" + "la %0,lo16(%2)(%1)") + +(define_insn "@machopic_high_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=b*r") + (high:P (match_operand 1 "macho_pic_address" "")))] + "TARGET_MACHO && flag_pic" + "lis %0,ha16(%1)") + +(define_insn "@machopic_low_<mode>" + [(set (match_operand:P 0 "gpc_reg_operand" "=r") + (lo_sum:P (match_operand:P 1 "gpc_reg_operand" "b") + (match_operand 2 "macho_pic_address" "")))] + "TARGET_MACHO && flag_pic" "la %0,lo16(%2)(%1)") (define_split diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h index 71abcca..8329679 100644 --- a/gcc/config/rs6000/emmintrin.h +++ b/gcc/config/rs6000/emmintrin.h @@ -141,9 +141,9 @@ _mm_setzero_pd (void) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_sd (__m128d __A, __m128d __B) { - __v2df result = (__v2df) __A; - result [0] = ((__v2df) __B)[0]; - return (__m128d) result; + __v2df __result = (__v2df) __A; + __result [0] = ((__v2df) __B)[0]; + return (__m128d) __result; } /* Load two DPFP values from P. The address must be 16-byte aligned. */ @@ -329,9 +329,9 @@ _mm_sqrt_pd (__m128d __A) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_sd (__m128d __A, __m128d __B) { - __v2df c; - c = vec_sqrt ((__v2df) _mm_set1_pd (__B[0])); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __c; + __c = vec_sqrt ((__v2df) _mm_set1_pd (__B[0])); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -343,11 +343,11 @@ _mm_min_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = vec_min (a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = vec_min (__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -359,11 +359,11 @@ _mm_max_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = vec_max (a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = vec_max (__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -399,8 +399,8 @@ _mm_cmpge_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_pd (__m128d __A, __m128d __B) { - __v2df temp = (__v2df) vec_cmpeq ((__v2df) __A, (__v2df)__B); - return ((__m128d)vec_nor (temp, temp)); + __v2df __temp = (__v2df) vec_cmpeq ((__v2df) __A, (__v2df)__B); + return ((__m128d)vec_nor (__temp, __temp)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -430,163 +430,163 @@ _mm_cmpnge_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_pd (__m128d __A, __m128d __B) { - __v2du c, d; + __v2du __c, __d; /* Compare against self will return false (0's) if NAN. */ - c = (__v2du)vec_cmpeq (__A, __A); - d = (__v2du)vec_cmpeq (__B, __B); + __c = (__v2du)vec_cmpeq (__A, __A); + __d = (__v2du)vec_cmpeq (__B, __B); /* A != NAN and B != NAN. */ - return ((__m128d)vec_and(c, d)); + return ((__m128d)vec_and(__c, __d)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_pd (__m128d __A, __m128d __B) { #if _ARCH_PWR8 - __v2du c, d; + __v2du __c, __d; /* Compare against self will return false (0's) if NAN. */ - c = (__v2du)vec_cmpeq ((__v2df)__A, (__v2df)__A); - d = (__v2du)vec_cmpeq ((__v2df)__B, (__v2df)__B); + __c = (__v2du)vec_cmpeq ((__v2df)__A, (__v2df)__A); + __d = (__v2du)vec_cmpeq ((__v2df)__B, (__v2df)__B); /* A == NAN OR B == NAN converts too: NOT(A != NAN) OR NOT(B != NAN). */ - c = vec_nor (c, c); - return ((__m128d)vec_orc(c, d)); + __c = vec_nor (__c, __c); + return ((__m128d)vec_orc(__c, __d)); #else - __v2du c, d; + __v2du __c, __d; /* Compare against self will return false (0's) if NAN. */ - c = (__v2du)vec_cmpeq ((__v2df)__A, (__v2df)__A); - d = (__v2du)vec_cmpeq ((__v2df)__B, (__v2df)__B); + __c = (__v2du)vec_cmpeq ((__v2df)__A, (__v2df)__A); + __d = (__v2du)vec_cmpeq ((__v2df)__B, (__v2df)__B); /* Convert the true ('1's) is NAN. */ - c = vec_nor (c, c); - d = vec_nor (d, d); - return ((__m128d)vec_or(c, d)); + __c = vec_nor (__c, __c); + __d = vec_nor (__d, __d); + return ((__m128d)vec_or(__c, __d)); #endif } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_sd(__m128d __A, __m128d __B) { - __v2df a, b, c; + __v2df __a, __b, __c; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we do the operation. */ - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = (__v2df) vec_cmpeq(a, b); + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = (__v2df) vec_cmpeq(__a, __b); /* Then we merge the lower double result with the original upper double from __A. */ - return (__m128d) _mm_setr_pd (c[0], __A[1]); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = (__v2df) vec_cmplt(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = (__v2df) vec_cmplt(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = (__v2df) vec_cmple(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = (__v2df) vec_cmple(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = (__v2df) vec_cmpgt(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = (__v2df) vec_cmpgt(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = (__v2df) vec_cmpge(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = (__v2df) vec_cmpge(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); - c = (__v2df) vec_cmpeq(a, b); - c = vec_nor (c, c); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); + __c = (__v2df) vec_cmpeq(__a, __b); + __c = vec_nor (__c, __c); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); /* Not less than is just greater than or equal. */ - c = (__v2df) vec_cmpge(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __c = (__v2df) vec_cmpge(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); /* Not less than or equal is just greater than. */ - c = (__v2df) vec_cmpge(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __c = (__v2df) vec_cmpge(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); /* Not greater than is just less than or equal. */ - c = (__v2df) vec_cmple(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __c = (__v2df) vec_cmple(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_sd (__m128d __A, __m128d __B) { - __v2df a, b, c; - a = vec_splats (__A[0]); - b = vec_splats (__B[0]); + __v2df __a, __b, __c; + __a = vec_splats (__A[0]); + __b = vec_splats (__B[0]); /* Not greater than or equal is just less than. */ - c = (__v2df) vec_cmplt(a, b); - return (__m128d) _mm_setr_pd (c[0], __A[1]); + __c = (__v2df) vec_cmplt(__a, __b); + return (__m128d) _mm_setr_pd (__c[0], __A[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_sd (__m128d __A, __m128d __B) { - __v2df r; - r = (__v2df)_mm_cmpord_pd (vec_splats (__A[0]), vec_splats (__B[0])); - return (__m128d) _mm_setr_pd (r[0], ((__v2df)__A)[1]); + __v2df __r; + __r = (__v2df)_mm_cmpord_pd (vec_splats (__A[0]), vec_splats (__B[0])); + return (__m128d) _mm_setr_pd (__r[0], ((__v2df)__A)[1]); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_sd (__m128d __A, __m128d __B) { - __v2df r; - r = _mm_cmpunord_pd (vec_splats (__A[0]), vec_splats (__B[0])); - return (__m128d) _mm_setr_pd (r[0], __A[1]); + __v2df __r; + __r = _mm_cmpunord_pd (vec_splats (__A[0]), vec_splats (__B[0])); + return (__m128d) _mm_setr_pd (__r[0], __A[1]); } /* FIXME @@ -845,12 +845,12 @@ _mm_setzero_si128 (void) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtepi32_pd (__m128i __A) { - __v2di val; + __v2di __val; /* For LE need to generate Vector Unpack Low Signed Word. Which is generated from unpackh. */ - val = (__v2di)vec_unpackh ((__v4si)__A); + __val = (__v2di)vec_unpackh ((__v4si)__A); - return (__m128d)vec_ctf (val, 0); + return (__m128d)vec_ctf (__val, 0); } #endif @@ -863,116 +863,116 @@ _mm_cvtepi32_ps (__m128i __A) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_epi32 (__m128d __A) { - __v2df rounded = vec_rint (__A); - __v4si result, temp; - const __v4si vzero = + __v2df __rounded = vec_rint (__A); + __v4si __result, __temp; + const __v4si __vzero = { 0, 0, 0, 0 }; /* VSX Vector truncate Double-Precision to integer and Convert to Signed Integer Word format with Saturate. */ __asm__( "xvcvdpsxws %x0,%x1" - : "=wa" (temp) - : "wa" (rounded) + : "=wa" (__temp) + : "wa" (__rounded) : ); #ifdef _ARCH_PWR8 #ifdef __LITTLE_ENDIAN__ - temp = vec_mergeo (temp, temp); + __temp = vec_mergeo (__temp, __temp); #else - temp = vec_mergee (temp, temp); + __temp = vec_mergee (__temp, __temp); #endif - result = (__v4si) vec_vpkudum ((__vector long long) temp, - (__vector long long) vzero); + __result = (__v4si) vec_vpkudum ((__vector long long) __temp, + (__vector long long) __vzero); #else { - const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f }; - result = (__v4si) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm); + __result = (__v4si) vec_perm ((__v16qu) __temp, (__v16qu) __vzero, __pkperm); } #endif - return (__m128i) result; + return (__m128i) __result; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_pi32 (__m128d __A) { - __m128i result = _mm_cvtpd_epi32(__A); + __m128i __result = _mm_cvtpd_epi32(__A); - return (__m64) result[0]; + return (__m64) __result[0]; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpd_ps (__m128d __A) { - __v4sf result; - __v4si temp; - const __v4si vzero = { 0, 0, 0, 0 }; + __v4sf __result; + __v4si __temp; + const __v4si __vzero = { 0, 0, 0, 0 }; __asm__( "xvcvdpsp %x0,%x1" - : "=wa" (temp) + : "=wa" (__temp) : "wa" (__A) : ); #ifdef _ARCH_PWR8 #ifdef __LITTLE_ENDIAN__ - temp = vec_mergeo (temp, temp); + __temp = vec_mergeo (__temp, __temp); #else - temp = vec_mergee (temp, temp); + __temp = vec_mergee (__temp, __temp); #endif - result = (__v4sf) vec_vpkudum ((__vector long long) temp, - (__vector long long) vzero); + __result = (__v4sf) vec_vpkudum ((__vector long long) __temp, + (__vector long long) __vzero); #else { - const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f }; - result = (__v4sf) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm); + __result = (__v4sf) vec_perm ((__v16qu) __temp, (__v16qu) __vzero, __pkperm); } #endif - return ((__m128)result); + return ((__m128)__result); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_epi32 (__m128d __A) { - __v4si result; - __v4si temp; - const __v4si vzero = { 0, 0, 0, 0 }; + __v4si __result; + __v4si __temp; + const __v4si __vzero = { 0, 0, 0, 0 }; /* VSX Vector truncate Double-Precision to integer and Convert to Signed Integer Word format with Saturate. */ __asm__( "xvcvdpsxws %x0,%x1" - : "=wa" (temp) + : "=wa" (__temp) : "wa" (__A) : ); #ifdef _ARCH_PWR8 #ifdef __LITTLE_ENDIAN__ - temp = vec_mergeo (temp, temp); + __temp = vec_mergeo (__temp, __temp); #else - temp = vec_mergee (temp, temp); + __temp = vec_mergee (__temp, __temp); #endif - result = (__v4si) vec_vpkudum ((__vector long long) temp, - (__vector long long) vzero); + __result = (__v4si) vec_vpkudum ((__vector long long) __temp, + (__vector long long) __vzero); #else { - const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + const __v16qu __pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f }; - result = (__v4si) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm); + __result = (__v4si) vec_perm ((__v16qu) __temp, (__v16qu) __vzero, __pkperm); } #endif - return ((__m128i) result); + return ((__m128i) __result); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttpd_pi32 (__m128d __A) { - __m128i result = _mm_cvttpd_epi32 (__A); + __m128i __result = _mm_cvttpd_epi32 (__A); - return (__m64) result[0]; + return (__m64) __result[0]; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -985,35 +985,35 @@ _mm_cvtsi128_si32 (__m128i __A) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32_pd (__m64 __A) { - __v4si temp; - __v2di tmp2; - __v2df result; + __v4si __temp; + __v2di __tmp2; + __v2df __result; - temp = (__v4si)vec_splats (__A); - tmp2 = (__v2di)vec_unpackl (temp); - result = vec_ctf ((__vector signed long long) tmp2, 0); - return (__m128d)result; + __temp = (__v4si)vec_splats (__A); + __tmp2 = (__v2di)vec_unpackl (__temp); + __result = vec_ctf ((__vector signed long long) __tmp2, 0); + return (__m128d)__result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_epi32 (__m128 __A) { - __v4sf rounded; - __v4si result; + __v4sf __rounded; + __v4si __result; - rounded = vec_rint((__v4sf) __A); - result = vec_cts (rounded, 0); - return (__m128i) result; + __rounded = vec_rint((__v4sf) __A); + __result = vec_cts (__rounded, 0); + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttps_epi32 (__m128 __A) { - __v4si result; + __v4si __result; - result = vec_cts ((__v4sf) __A, 0); - return (__m128i) result; + __result = vec_cts ((__v4sf) __A, 0); + return (__m128i) __result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1025,48 +1025,48 @@ _mm_cvtps_pd (__m128 __A) #else /* Otherwise the compiler is not current and so need to generate the equivalent code. */ - __v4sf a = (__v4sf)__A; - __v4sf temp; - __v2df result; + __v4sf __a = (__v4sf)__A; + __v4sf __temp; + __v2df __result; #ifdef __LITTLE_ENDIAN__ /* The input float values are in elements {[0], [1]} but the convert instruction needs them in elements {[1], [3]}, So we use two shift left double vector word immediates to get the elements lined up. */ - temp = __builtin_vsx_xxsldwi (a, a, 3); - temp = __builtin_vsx_xxsldwi (a, temp, 2); + __temp = __builtin_vsx_xxsldwi (__a, __a, 3); + __temp = __builtin_vsx_xxsldwi (__a, __temp, 2); #else /* The input float values are in elements {[0], [1]} but the convert instruction needs them in elements {[0], [2]}, So we use two shift left double vector word immediates to get the elements lined up. */ - temp = vec_vmrghw (a, a); + __temp = vec_vmrghw (__a, __a); #endif __asm__( " xvcvspdp %x0,%x1" - : "=wa" (result) - : "wa" (temp) + : "=wa" (__result) + : "wa" (__temp) : ); - return (__m128d) result; + return (__m128d) __result; #endif } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si32 (__m128d __A) { - __v2df rounded = vec_rint((__v2df) __A); - int result = ((__v2df)rounded)[0]; + __v2df __rounded = vec_rint((__v2df) __A); + int __result = ((__v2df)__rounded)[0]; - return result; + return __result; } /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_si64 (__m128d __A) { - __v2df rounded = vec_rint ((__v2df) __A ); - long long result = ((__v2df) rounded)[0]; + __v2df __rounded = vec_rint ((__v2df) __A ); + long long __result = ((__v2df) __rounded)[0]; - return result; + return __result; } /* Microsoft intrinsic. */ @@ -1079,18 +1079,18 @@ _mm_cvtsd_si64x (__m128d __A) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si32 (__m128d __A) { - int result = ((__v2df)__A)[0]; + int __result = ((__v2df)__A)[0]; - return result; + return __result; } /* Intel intrinsic. */ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttsd_si64 (__m128d __A) { - long long result = ((__v2df)__A)[0]; + long long __result = ((__v2df)__A)[0]; - return result; + return __result; } /* Microsoft intrinsic. */ @@ -1103,46 +1103,46 @@ _mm_cvttsd_si64x (__m128d __A) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsd_ss (__m128 __A, __m128d __B) { - __v4sf result = (__v4sf)__A; + __v4sf __result = (__v4sf)__A; #ifdef __LITTLE_ENDIAN__ - __v4sf temp_s; + __v4sf __temp_s; /* Copy double element[0] to element [1] for conversion. */ - __v2df temp_b = vec_splat((__v2df)__B, 0); + __v2df __temp_b = vec_splat((__v2df)__B, 0); /* Pre-rotate __A left 3 (logically right 1) elements. */ - result = __builtin_vsx_xxsldwi (result, result, 3); + __result = __builtin_vsx_xxsldwi (__result, __result, 3); /* Convert double to single float scalar in a vector. */ __asm__( "xscvdpsp %x0,%x1" - : "=wa" (temp_s) - : "wa" (temp_b) + : "=wa" (__temp_s) + : "wa" (__temp_b) : ); /* Shift the resulting scalar into vector element [0]. */ - result = __builtin_vsx_xxsldwi (result, temp_s, 1); + __result = __builtin_vsx_xxsldwi (__result, __temp_s, 1); #else - result [0] = ((__v2df)__B)[0]; + __result [0] = ((__v2df)__B)[0]; #endif - return (__m128) result; + return (__m128) __result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_sd (__m128d __A, int __B) { - __v2df result = (__v2df)__A; - double db = __B; - result [0] = db; - return (__m128d)result; + __v2df __result = (__v2df)__A; + double __db = __B; + __result [0] = __db; + return (__m128d)__result; } /* Intel intrinsic. */ extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_sd (__m128d __A, long long __B) { - __v2df result = (__v2df)__A; - double db = __B; - result [0] = db; - return (__m128d)result; + __v2df __result = (__v2df)__A; + double __db = __B; + __result [0] = __db; + return (__m128d)__result; } /* Microsoft intrinsic. */ @@ -1157,45 +1157,45 @@ _mm_cvtss_sd (__m128d __A, __m128 __B) { #ifdef __LITTLE_ENDIAN__ /* Use splat to move element [0] into position for the convert. */ - __v4sf temp = vec_splat ((__v4sf)__B, 0); - __v2df res; + __v4sf __temp = vec_splat ((__v4sf)__B, 0); + __v2df __res; /* Convert single float scalar to double in a vector. */ __asm__( "xscvspdp %x0,%x1" - : "=wa" (res) - : "wa" (temp) + : "=wa" (__res) + : "wa" (__temp) : ); - return (__m128d) vec_mergel (res, (__v2df)__A); + return (__m128d) vec_mergel (__res, (__v2df)__A); #else - __v2df res = (__v2df)__A; - res [0] = ((__v4sf)__B) [0]; - return (__m128d) res; + __v2df __res = (__v2df)__A; + __res [0] = ((__v4sf)__B) [0]; + return (__m128d) __res; #endif } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) { - __vector double result; - const int litmsk = __mask & 0x3; + __vector double __result; + const int __litmsk = __mask & 0x3; - if (litmsk == 0) - result = vec_mergeh (__A, __B); + if (__litmsk == 0) + __result = vec_mergeh (__A, __B); #if __GNUC__ < 6 - else if (litmsk == 1) - result = vec_xxpermdi (__B, __A, 2); - else if (litmsk == 2) - result = vec_xxpermdi (__B, __A, 1); + else if (__litmsk == 1) + __result = vec_xxpermdi (__B, __A, 2); + else if (__litmsk == 2) + __result = vec_xxpermdi (__B, __A, 1); #else - else if (litmsk == 1) - result = vec_xxpermdi (__A, __B, 2); - else if (litmsk == 2) - result = vec_xxpermdi (__A, __B, 1); + else if (__litmsk == 1) + __result = vec_xxpermdi (__A, __B, 2); + else if (__litmsk == 2) + __result = vec_xxpermdi (__A, __B, 1); #endif else - result = vec_mergel (__A, __B); + __result = vec_mergel (__A, __B); - return result; + return __result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1213,17 +1213,17 @@ _mm_unpacklo_pd (__m128d __A, __m128d __B) extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadh_pd (__m128d __A, double const *__B) { - __v2df result = (__v2df)__A; - result [1] = *__B; - return (__m128d)result; + __v2df __result = (__v2df)__A; + __result [1] = *__B; + return (__m128d)__result; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadl_pd (__m128d __A, double const *__B) { - __v2df result = (__v2df)__A; - result [0] = *__B; - return (__m128d)result; + __v2df __result = (__v2df)__A; + __result [0] = *__B; + return (__m128d)__result; } #ifdef _ARCH_PWR8 @@ -1236,8 +1236,8 @@ _mm_movemask_pd (__m128d __A) #ifdef _ARCH_PWR10 return vec_extractm ((__v2du) __A); #else - __vector unsigned long long result; - static const __vector unsigned int perm_mask = + __vector unsigned long long __result; + static const __vector unsigned int __perm_mask = { #ifdef __LITTLE_ENDIAN__ 0x80800040, 0x80808080, 0x80808080, 0x80808080 @@ -1246,14 +1246,14 @@ _mm_movemask_pd (__m128d __A) #endif }; - result = ((__vector unsigned long long) + __result = ((__vector unsigned long long) vec_vbpermq ((__vector unsigned char) __A, - (__vector unsigned char) perm_mask)); + (__vector unsigned char) __perm_mask)); #ifdef __LITTLE_ENDIAN__ - return result[1]; + return __result[1]; #else - return result[0]; + return __result[0]; #endif #endif /* !_ARCH_PWR10 */ } @@ -1426,17 +1426,17 @@ _mm_subs_epu16 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_madd_epi16 (__m128i __A, __m128i __B) { - __vector signed int zero = {0, 0, 0, 0}; + __vector signed int __zero = {0, 0, 0, 0}; - return (__m128i) vec_vmsumshm ((__v8hi)__A, (__v8hi)__B, zero); + return (__m128i) vec_vmsumshm ((__v8hi)__A, (__v8hi)__B, __zero); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epi16 (__m128i __A, __m128i __B) { - __vector signed int w0, w1; + __vector signed int __w0, __w1; - __vector unsigned char xform1 = { + __vector unsigned char __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F @@ -1446,9 +1446,9 @@ _mm_mulhi_epi16 (__m128i __A, __m128i __B) #endif }; - w0 = vec_vmulesh ((__v8hi)__A, (__v8hi)__B); - w1 = vec_vmulosh ((__v8hi)__A, (__v8hi)__B); - return (__m128i) vec_perm (w0, w1, xform1); + __w0 = vec_vmulesh ((__v8hi)__A, (__v8hi)__B); + __w1 = vec_vmulosh ((__v8hi)__A, (__v8hi)__B); + return (__m128i) vec_perm (__w0, __w1, __xform1); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1460,10 +1460,10 @@ _mm_mullo_epi16 (__m128i __A, __m128i __B) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mul_su32 (__m64 __A, __m64 __B) { - unsigned int a = __A; - unsigned int b = __B; + unsigned int __a = __A; + unsigned int __b = __B; - return ((__m64)a * (__m64)b); + return ((__m64)__a * (__m64)__b); } #ifdef _ARCH_PWR8 @@ -1471,24 +1471,24 @@ extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __arti _mm_mul_epu32 (__m128i __A, __m128i __B) { #if __GNUC__ < 8 - __v2du result; + __v2du __result; #ifdef __LITTLE_ENDIAN__ /* VMX Vector Multiply Odd Unsigned Word. */ __asm__( "vmulouw %0,%1,%2" - : "=v" (result) + : "=v" (__result) : "v" (__A), "v" (__B) : ); #else /* VMX Vector Multiply Even Unsigned Word. */ __asm__( "vmuleuw %0,%1,%2" - : "=v" (result) + : "=v" (__result) : "v" (__A), "v" (__B) : ); #endif - return (__m128i) result; + return (__m128i) __result; #else return (__m128i) vec_mule ((__v4su)__A, (__v4su)__B); #endif @@ -1498,122 +1498,122 @@ _mm_mul_epu32 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi16 (__m128i __A, int __B) { - __v8hu lshift; - __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 }; + __v8hu __lshift; + __v8hi __result = { 0, 0, 0, 0, 0, 0, 0, 0 }; if (__B >= 0 && __B < 16) { if (__builtin_constant_p(__B)) - lshift = (__v8hu) vec_splat_s16(__B); + __lshift = (__v8hu) vec_splat_s16(__B); else - lshift = vec_splats ((unsigned short) __B); + __lshift = vec_splats ((unsigned short) __B); - result = vec_sl ((__v8hi) __A, lshift); + __result = vec_sl ((__v8hi) __A, __lshift); } - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi32 (__m128i __A, int __B) { - __v4su lshift; - __v4si result = { 0, 0, 0, 0 }; + __v4su __lshift; + __v4si __result = { 0, 0, 0, 0 }; if (__B >= 0 && __B < 32) { if (__builtin_constant_p(__B) && __B < 16) - lshift = (__v4su) vec_splat_s32(__B); + __lshift = (__v4su) vec_splat_s32(__B); else - lshift = vec_splats ((unsigned int) __B); + __lshift = vec_splats ((unsigned int) __B); - result = vec_sl ((__v4si) __A, lshift); + __result = vec_sl ((__v4si) __A, __lshift); } - return (__m128i) result; + return (__m128i) __result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_epi64 (__m128i __A, int __B) { - __v2du lshift; - __v2di result = { 0, 0 }; + __v2du __lshift; + __v2di __result = { 0, 0 }; if (__B >= 0 && __B < 64) { if (__builtin_constant_p(__B) && __B < 16) - lshift = (__v2du) vec_splat_s32(__B); + __lshift = (__v2du) vec_splat_s32(__B); else - lshift = (__v2du) vec_splats ((unsigned int) __B); + __lshift = (__v2du) vec_splats ((unsigned int) __B); - result = vec_sl ((__v2di) __A, lshift); + __result = vec_sl ((__v2di) __A, __lshift); } - return (__m128i) result; + return (__m128i) __result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi16 (__m128i __A, int __B) { - __v8hu rshift = { 15, 15, 15, 15, 15, 15, 15, 15 }; - __v8hi result; + __v8hu __rshift = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hi __result; if (__B < 16) { if (__builtin_constant_p(__B)) - rshift = (__v8hu) vec_splat_s16(__B); + __rshift = (__v8hu) vec_splat_s16(__B); else - rshift = vec_splats ((unsigned short) __B); + __rshift = vec_splats ((unsigned short) __B); } - result = vec_sra ((__v8hi) __A, rshift); + __result = vec_sra ((__v8hi) __A, __rshift); - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srai_epi32 (__m128i __A, int __B) { - __v4su rshift = { 31, 31, 31, 31 }; - __v4si result; + __v4su __rshift = { 31, 31, 31, 31 }; + __v4si __result; if (__B < 32) { if (__builtin_constant_p(__B)) { if (__B < 16) - rshift = (__v4su) vec_splat_s32(__B); + __rshift = (__v4su) vec_splat_s32(__B); else - rshift = (__v4su) vec_splats((unsigned int)__B); + __rshift = (__v4su) vec_splats((unsigned int)__B); } else - rshift = vec_splats ((unsigned int) __B); + __rshift = vec_splats ((unsigned int) __B); } - result = vec_sra ((__v4si) __A, rshift); + __result = vec_sra ((__v4si) __A, __rshift); - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bslli_si128 (__m128i __A, const int __N) { - __v16qu result; - const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + __v16qu __result; + const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; if (__N < 16) - result = vec_sld ((__v16qu) __A, zeros, __N); + __result = vec_sld ((__v16qu) __A, __zeros, __N); else - result = zeros; + __result = __zeros; - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_bsrli_si128 (__m128i __A, const int __N) { - __v16qu result; - const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + __v16qu __result; + const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; if (__N < 16) #ifdef __LITTLE_ENDIAN__ @@ -1621,21 +1621,21 @@ _mm_bsrli_si128 (__m128i __A, const int __N) /* Would like to use Vector Shift Left Double by Octet Immediate here to use the immediate form and avoid load of __N * 8 value into a separate VR. */ - result = vec_sld (zeros, (__v16qu) __A, (16 - __N)); + __result = vec_sld (__zeros, (__v16qu) __A, (16 - __N)); else #endif { - __v16qu shift = vec_splats((unsigned char)(__N*8)); + __v16qu __shift = vec_splats((unsigned char)(__N*8)); #ifdef __LITTLE_ENDIAN__ - result = vec_sro ((__v16qu)__A, shift); + __result = vec_sro ((__v16qu)__A, __shift); #else - result = vec_slo ((__v16qu)__A, shift); + __result = vec_slo ((__v16qu)__A, __shift); #endif } else - result = zeros; + __result = __zeros; - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1647,239 +1647,239 @@ _mm_srli_si128 (__m128i __A, const int __N) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_slli_si128 (__m128i __A, const int _imm5) { - __v16qu result; - const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + __v16qu __result; + const __v16qu __zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; if (_imm5 < 16) #ifdef __LITTLE_ENDIAN__ - result = vec_sld ((__v16qu) __A, zeros, _imm5); + __result = vec_sld ((__v16qu) __A, __zeros, _imm5); #else - result = vec_sld (zeros, (__v16qu) __A, (16 - _imm5)); + __result = vec_sld (__zeros, (__v16qu) __A, (16 - _imm5)); #endif else - result = zeros; + __result = __zeros; - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi16 (__m128i __A, int __B) { - __v8hu rshift; - __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 }; + __v8hu __rshift; + __v8hi __result = { 0, 0, 0, 0, 0, 0, 0, 0 }; if (__B < 16) { if (__builtin_constant_p(__B)) - rshift = (__v8hu) vec_splat_s16(__B); + __rshift = (__v8hu) vec_splat_s16(__B); else - rshift = vec_splats ((unsigned short) __B); + __rshift = vec_splats ((unsigned short) __B); - result = vec_sr ((__v8hi) __A, rshift); + __result = vec_sr ((__v8hi) __A, __rshift); } - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi32 (__m128i __A, int __B) { - __v4su rshift; - __v4si result = { 0, 0, 0, 0 }; + __v4su __rshift; + __v4si __result = { 0, 0, 0, 0 }; if (__B < 32) { if (__builtin_constant_p(__B)) { if (__B < 16) - rshift = (__v4su) vec_splat_s32(__B); + __rshift = (__v4su) vec_splat_s32(__B); else - rshift = (__v4su) vec_splats((unsigned int)__B); + __rshift = (__v4su) vec_splats((unsigned int)__B); } else - rshift = vec_splats ((unsigned int) __B); + __rshift = vec_splats ((unsigned int) __B); - result = vec_sr ((__v4si) __A, rshift); + __result = vec_sr ((__v4si) __A, __rshift); } - return (__m128i) result; + return (__m128i) __result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srli_epi64 (__m128i __A, int __B) { - __v2du rshift; - __v2di result = { 0, 0 }; + __v2du __rshift; + __v2di __result = { 0, 0 }; if (__B < 64) { if (__builtin_constant_p(__B)) { if (__B < 16) - rshift = (__v2du) vec_splat_s32(__B); + __rshift = (__v2du) vec_splat_s32(__B); else - rshift = (__v2du) vec_splats((unsigned long long)__B); + __rshift = (__v2du) vec_splats((unsigned long long)__B); } else - rshift = (__v2du) vec_splats ((unsigned int) __B); + __rshift = (__v2du) vec_splats ((unsigned int) __B); - result = vec_sr ((__v2di) __A, rshift); + __result = vec_sr ((__v2di) __A, __rshift); } - return (__m128i) result; + return (__m128i) __result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi16 (__m128i __A, __m128i __B) { - __v8hu lshift; - __vector __bool short shmask; - const __v8hu shmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; - __v8hu result; + __v8hu __lshift; + __vector __bool short __shmask; + const __v8hu __shmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hu __result; #ifdef __LITTLE_ENDIAN__ - lshift = vec_splat ((__v8hu) __B, 0); + __lshift = vec_splat ((__v8hu) __B, 0); #else - lshift = vec_splat ((__v8hu) __B, 3); + __lshift = vec_splat ((__v8hu) __B, 3); #endif - shmask = vec_cmple (lshift, shmax); - result = vec_sl ((__v8hu) __A, lshift); - result = vec_sel ((__v8hu) shmask, result, shmask); + __shmask = vec_cmple (__lshift, __shmax); + __result = vec_sl ((__v8hu) __A, __lshift); + __result = vec_sel ((__v8hu) __shmask, __result, __shmask); - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi32 (__m128i __A, __m128i __B) { - __v4su lshift; - __vector __bool int shmask; - const __v4su shmax = { 32, 32, 32, 32 }; - __v4su result; + __v4su __lshift; + __vector __bool int __shmask; + const __v4su __shmax = { 32, 32, 32, 32 }; + __v4su __result; #ifdef __LITTLE_ENDIAN__ - lshift = vec_splat ((__v4su) __B, 0); + __lshift = vec_splat ((__v4su) __B, 0); #else - lshift = vec_splat ((__v4su) __B, 1); + __lshift = vec_splat ((__v4su) __B, 1); #endif - shmask = vec_cmplt (lshift, shmax); - result = vec_sl ((__v4su) __A, lshift); - result = vec_sel ((__v4su) shmask, result, shmask); + __shmask = vec_cmplt (__lshift, __shmax); + __result = vec_sl ((__v4su) __A, __lshift); + __result = vec_sel ((__v4su) __shmask, __result, __shmask); - return (__m128i) result; + return (__m128i) __result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_epi64 (__m128i __A, __m128i __B) { - __v2du lshift; - __vector __bool long long shmask; - const __v2du shmax = { 64, 64 }; - __v2du result; + __v2du __lshift; + __vector __bool long long __shmask; + const __v2du __shmax = { 64, 64 }; + __v2du __result; - lshift = vec_splat ((__v2du) __B, 0); - shmask = vec_cmplt (lshift, shmax); - result = vec_sl ((__v2du) __A, lshift); - result = vec_sel ((__v2du) shmask, result, shmask); + __lshift = vec_splat ((__v2du) __B, 0); + __shmask = vec_cmplt (__lshift, __shmax); + __result = vec_sl ((__v2du) __A, __lshift); + __result = vec_sel ((__v2du) __shmask, __result, __shmask); - return (__m128i) result; + return (__m128i) __result; } #endif extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi16 (__m128i __A, __m128i __B) { - const __v8hu rshmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; - __v8hu rshift; - __v8hi result; + const __v8hu __rshmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hu __rshift; + __v8hi __result; #ifdef __LITTLE_ENDIAN__ - rshift = vec_splat ((__v8hu)__B, 0); + __rshift = vec_splat ((__v8hu)__B, 0); #else - rshift = vec_splat ((__v8hu)__B, 3); + __rshift = vec_splat ((__v8hu)__B, 3); #endif - rshift = vec_min (rshift, rshmax); - result = vec_sra ((__v8hi) __A, rshift); + __rshift = vec_min (__rshift, __rshmax); + __result = vec_sra ((__v8hi) __A, __rshift); - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_epi32 (__m128i __A, __m128i __B) { - const __v4su rshmax = { 31, 31, 31, 31 }; - __v4su rshift; - __v4si result; + const __v4su __rshmax = { 31, 31, 31, 31 }; + __v4su __rshift; + __v4si __result; #ifdef __LITTLE_ENDIAN__ - rshift = vec_splat ((__v4su)__B, 0); + __rshift = vec_splat ((__v4su)__B, 0); #else - rshift = vec_splat ((__v4su)__B, 1); + __rshift = vec_splat ((__v4su)__B, 1); #endif - rshift = vec_min (rshift, rshmax); - result = vec_sra ((__v4si) __A, rshift); + __rshift = vec_min (__rshift, __rshmax); + __result = vec_sra ((__v4si) __A, __rshift); - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi16 (__m128i __A, __m128i __B) { - __v8hu rshift; - __vector __bool short shmask; - const __v8hu shmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; - __v8hu result; + __v8hu __rshift; + __vector __bool short __shmask; + const __v8hu __shmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hu __result; #ifdef __LITTLE_ENDIAN__ - rshift = vec_splat ((__v8hu) __B, 0); + __rshift = vec_splat ((__v8hu) __B, 0); #else - rshift = vec_splat ((__v8hu) __B, 3); + __rshift = vec_splat ((__v8hu) __B, 3); #endif - shmask = vec_cmple (rshift, shmax); - result = vec_sr ((__v8hu) __A, rshift); - result = vec_sel ((__v8hu) shmask, result, shmask); + __shmask = vec_cmple (__rshift, __shmax); + __result = vec_sr ((__v8hu) __A, __rshift); + __result = vec_sel ((__v8hu) __shmask, __result, __shmask); - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi32 (__m128i __A, __m128i __B) { - __v4su rshift; - __vector __bool int shmask; - const __v4su shmax = { 32, 32, 32, 32 }; - __v4su result; + __v4su __rshift; + __vector __bool int __shmask; + const __v4su __shmax = { 32, 32, 32, 32 }; + __v4su __result; #ifdef __LITTLE_ENDIAN__ - rshift = vec_splat ((__v4su) __B, 0); + __rshift = vec_splat ((__v4su) __B, 0); #else - rshift = vec_splat ((__v4su) __B, 1); + __rshift = vec_splat ((__v4su) __B, 1); #endif - shmask = vec_cmplt (rshift, shmax); - result = vec_sr ((__v4su) __A, rshift); - result = vec_sel ((__v4su) shmask, result, shmask); + __shmask = vec_cmplt (__rshift, __shmax); + __result = vec_sr ((__v4su) __A, __rshift); + __result = vec_sel ((__v4su) __shmask, __result, __shmask); - return (__m128i) result; + return (__m128i) __result; } #ifdef _ARCH_PWR8 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_epi64 (__m128i __A, __m128i __B) { - __v2du rshift; - __vector __bool long long shmask; - const __v2du shmax = { 64, 64 }; - __v2du result; + __v2du __rshift; + __vector __bool long long __shmask; + const __v2du __shmax = { 64, 64 }; + __v2du __result; - rshift = vec_splat ((__v2du) __B, 0); - shmask = vec_cmplt (rshift, shmax); - result = vec_sr ((__v2du) __A, rshift); - result = vec_sel ((__v2du) shmask, result, shmask); + __rshift = vec_splat ((__v2du) __B, 0); + __shmask = vec_cmplt (__rshift, __shmax); + __result = vec_sr ((__v2du) __A, __rshift); + __result = vec_sel ((__v2du) __shmask, __result, __shmask); - return (__m128i) result; + return (__m128i) __result; } #endif @@ -1994,11 +1994,11 @@ _mm_extract_epi16 (__m128i const __A, int const __N) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi16 (__m128i const __A, int const __D, int const __N) { - __v8hi result = (__v8hi)__A; + __v8hi __result = (__v8hi)__A; - result [(__N & 7)] = __D; + __result [(__N & 7)] = __D; - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -2037,21 +2037,21 @@ _mm_movemask_epi8 (__m128i __A) #ifdef _ARCH_PWR10 return vec_extractm ((__v16qu) __A); #else - __vector unsigned long long result; - static const __vector unsigned char perm_mask = + __vector unsigned long long __result; + static const __vector unsigned char __perm_mask = { 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00 }; - result = ((__vector unsigned long long) + __result = ((__vector unsigned long long) vec_vbpermq ((__vector unsigned char) __A, - (__vector unsigned char) perm_mask)); + (__vector unsigned char) __perm_mask)); #ifdef __LITTLE_ENDIAN__ - return result[1]; + return __result[1]; #else - return result[0]; + return __result[0]; #endif #endif /* !_ARCH_PWR10 */ } @@ -2060,8 +2060,8 @@ _mm_movemask_epi8 (__m128i __A) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_epu16 (__m128i __A, __m128i __B) { - __v4su w0, w1; - __v16qu xform1 = { + __v4su __w0, __w1; + __v16qu __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F @@ -2071,19 +2071,19 @@ _mm_mulhi_epu16 (__m128i __A, __m128i __B) #endif }; - w0 = vec_vmuleuh ((__v8hu)__A, (__v8hu)__B); - w1 = vec_vmulouh ((__v8hu)__A, (__v8hu)__B); - return (__m128i) vec_perm (w0, w1, xform1); + __w0 = vec_vmuleuh ((__v8hu)__A, (__v8hu)__B); + __w1 = vec_vmulouh ((__v8hu)__A, (__v8hu)__B); + return (__m128i) vec_perm (__w0, __w1, __xform1); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflehi_epi16 (__m128i __A, const int __mask) { - unsigned long element_selector_98 = __mask & 0x03; - unsigned long element_selector_BA = (__mask >> 2) & 0x03; - unsigned long element_selector_DC = (__mask >> 4) & 0x03; - unsigned long element_selector_FE = (__mask >> 6) & 0x03; - static const unsigned short permute_selectors[4] = + unsigned long __element_selector_98 = __mask & 0x03; + unsigned long __element_selector_BA = (__mask >> 2) & 0x03; + unsigned long __element_selector_DC = (__mask >> 4) & 0x03; + unsigned long __element_selector_FE = (__mask >> 6) & 0x03; + static const unsigned short __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x0908, 0x0B0A, 0x0D0C, 0x0F0E @@ -2091,33 +2091,33 @@ _mm_shufflehi_epi16 (__m128i __A, const int __mask) 0x0809, 0x0A0B, 0x0C0D, 0x0E0F #endif }; - __v2du pmask = + __v2du __pmask = #ifdef __LITTLE_ENDIAN__ { 0x1716151413121110UL, 0UL}; #else { 0x1011121314151617UL, 0UL}; #endif - __m64_union t; - __v2du a, r; + __m64_union __t; + __v2du __a, __r; - t.as_short[0] = permute_selectors[element_selector_98]; - t.as_short[1] = permute_selectors[element_selector_BA]; - t.as_short[2] = permute_selectors[element_selector_DC]; - t.as_short[3] = permute_selectors[element_selector_FE]; - pmask[1] = t.as_m64; - a = (__v2du)__A; - r = vec_perm (a, a, (__vector unsigned char)pmask); - return (__m128i) r; + __t.as_short[0] = __permute_selectors[__element_selector_98]; + __t.as_short[1] = __permute_selectors[__element_selector_BA]; + __t.as_short[2] = __permute_selectors[__element_selector_DC]; + __t.as_short[3] = __permute_selectors[__element_selector_FE]; + __pmask[1] = __t.as_m64; + __a = (__v2du)__A; + __r = vec_perm (__a, __a, (__vector unsigned char)__pmask); + return (__m128i) __r; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shufflelo_epi16 (__m128i __A, const int __mask) { - unsigned long element_selector_10 = __mask & 0x03; - unsigned long element_selector_32 = (__mask >> 2) & 0x03; - unsigned long element_selector_54 = (__mask >> 4) & 0x03; - unsigned long element_selector_76 = (__mask >> 6) & 0x03; - static const unsigned short permute_selectors[4] = + unsigned long __element_selector_10 = __mask & 0x03; + unsigned long __element_selector_32 = (__mask >> 2) & 0x03; + unsigned long __element_selector_54 = (__mask >> 4) & 0x03; + unsigned long __element_selector_76 = (__mask >> 6) & 0x03; + static const unsigned short __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x0100, 0x0302, 0x0504, 0x0706 @@ -2125,32 +2125,32 @@ _mm_shufflelo_epi16 (__m128i __A, const int __mask) 0x0001, 0x0203, 0x0405, 0x0607 #endif }; - __v2du pmask = + __v2du __pmask = #ifdef __LITTLE_ENDIAN__ { 0UL, 0x1f1e1d1c1b1a1918UL}; #else { 0UL, 0x18191a1b1c1d1e1fUL}; #endif - __m64_union t; - __v2du a, r; - t.as_short[0] = permute_selectors[element_selector_10]; - t.as_short[1] = permute_selectors[element_selector_32]; - t.as_short[2] = permute_selectors[element_selector_54]; - t.as_short[3] = permute_selectors[element_selector_76]; - pmask[0] = t.as_m64; - a = (__v2du)__A; - r = vec_perm (a, a, (__vector unsigned char)pmask); - return (__m128i) r; + __m64_union __t; + __v2du __a, __r; + __t.as_short[0] = __permute_selectors[__element_selector_10]; + __t.as_short[1] = __permute_selectors[__element_selector_32]; + __t.as_short[2] = __permute_selectors[__element_selector_54]; + __t.as_short[3] = __permute_selectors[__element_selector_76]; + __pmask[0] = __t.as_m64; + __a = (__v2du)__A; + __r = vec_perm (__a, __a, (__vector unsigned char)__pmask); + return (__m128i) __r; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_epi32 (__m128i __A, const int __mask) { - unsigned long element_selector_10 = __mask & 0x03; - unsigned long element_selector_32 = (__mask >> 2) & 0x03; - unsigned long element_selector_54 = (__mask >> 4) & 0x03; - unsigned long element_selector_76 = (__mask >> 6) & 0x03; - static const unsigned int permute_selectors[4] = + unsigned long __element_selector_10 = __mask & 0x03; + unsigned long __element_selector_32 = (__mask >> 2) & 0x03; + unsigned long __element_selector_54 = (__mask >> 4) & 0x03; + unsigned long __element_selector_76 = (__mask >> 6) & 0x03; + static const unsigned int __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C @@ -2158,26 +2158,26 @@ _mm_shuffle_epi32 (__m128i __A, const int __mask) 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F #endif }; - __v4su t; + __v4su __t; - t[0] = permute_selectors[element_selector_10]; - t[1] = permute_selectors[element_selector_32]; - t[2] = permute_selectors[element_selector_54] + 0x10101010; - t[3] = permute_selectors[element_selector_76] + 0x10101010; - return (__m128i)vec_perm ((__v4si) __A, (__v4si)__A, (__vector unsigned char)t); + __t[0] = __permute_selectors[__element_selector_10]; + __t[1] = __permute_selectors[__element_selector_32]; + __t[2] = __permute_selectors[__element_selector_54] + 0x10101010; + __t[3] = __permute_selectors[__element_selector_76] + 0x10101010; + return (__m128i)vec_perm ((__v4si) __A, (__v4si)__A, (__vector unsigned char)__t); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) { - __v2du hibit = { 0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL}; - __v16qu mask, tmp; - __m128i_u *p = (__m128i_u*)__C; + __v2du __hibit = { 0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL}; + __v16qu __mask, __tmp; + __m128i_u *__p = (__m128i_u*)__C; - tmp = (__v16qu)_mm_loadu_si128(p); - mask = (__v16qu)vec_cmpgt ((__v16qu)__B, (__v16qu)hibit); - tmp = vec_sel (tmp, (__v16qu)__A, mask); - _mm_storeu_si128 (p, (__m128i)tmp); + __tmp = (__v16qu)_mm_loadu_si128(__p); + __mask = (__v16qu)vec_cmpgt ((__v16qu)__B, (__v16qu)__hibit); + __tmp = vec_sel (__tmp, (__v16qu)__A, __mask); + _mm_storeu_si128 (__p, (__m128i)__tmp); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -2196,26 +2196,26 @@ _mm_avg_epu16 (__m128i __A, __m128i __B) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sad_epu8 (__m128i __A, __m128i __B) { - __v16qu a, b; - __v16qu vabsdiff; - __v4si vsum; - const __v4su zero = { 0, 0, 0, 0 }; - __v4si result; + __v16qu __a, __b; + __v16qu __vabsdiff; + __v4si __vsum; + const __v4su __zero = { 0, 0, 0, 0 }; + __v4si __result; - a = (__v16qu) __A; - b = (__v16qu) __B; + __a = (__v16qu) __A; + __b = (__v16qu) __B; #ifndef _ARCH_PWR9 - __v16qu vmin = vec_min (a, b); - __v16qu vmax = vec_max (a, b); - vabsdiff = vec_sub (vmax, vmin); + __v16qu __vmin = vec_min (__a, __b); + __v16qu __vmax = vec_max (__a, __b); + __vabsdiff = vec_sub (__vmax, __vmin); #else - vabsdiff = vec_absd (a, b); + __vabsdiff = vec_absd (__a, __b); #endif /* Sum four groups of bytes into integers. */ - vsum = (__vector signed int) vec_sum4s (vabsdiff, zero); + __vsum = (__vector signed int) vec_sum4s (__vabsdiff, __zero); #ifdef __LITTLE_ENDIAN__ /* Sum across four integers with two integer results. */ - __asm__ ("vsum2sws %0,%1,%2" : "=v" (result) : "v" (vsum), "v" (zero)); + __asm__ ("vsum2sws %0,%1,%2" : "=v" (__result) : "v" (__vsum), "v" (__zero)); /* Note: vec_sum2s could be used here, but on little-endian, vector shifts are added that are not needed for this use-case. A vector shift to correctly position the 32-bit integer results @@ -2224,11 +2224,11 @@ _mm_sad_epu8 (__m128i __A, __m128i __B) integers ([1]|[0] and [3]|[2]). Thus, no shift is performed. */ #else /* Sum across four integers with two integer results. */ - result = vec_sum2s (vsum, (__vector signed int) zero); + __result = vec_sum2s (__vsum, (__vector signed int) __zero); /* Rotate the sums into the correct position. */ - result = vec_sld (result, result, 6); + __result = vec_sld (__result, __result, 6); #endif - return (__m128i) result; + return (__m128i) __result; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md index 6f9f534..15f0c16 100644 --- a/gcc/config/rs6000/fusion.md +++ b/gcc/config/rs6000/fusion.md @@ -25,7 +25,7 @@ (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m") (match_operand:DI 3 "const_m1_to_1_operand" "n"))) (clobber (match_scratch:DI 0 "=r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "ld%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -46,7 +46,7 @@ (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m") (match_operand:DI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:DI 0 "=r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "ld%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -67,7 +67,7 @@ (compare:CC (match_operand:DI 1 "ds_form_mem_operand" "m") (match_operand:DI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "ld%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -88,7 +88,7 @@ (compare:CCUNS (match_operand:DI 1 "ds_form_mem_operand" "m") (match_operand:DI 3 "const_0_to_1_operand" "n"))) (set (match_operand:DI 0 "gpc_reg_operand" "=r") (match_dup 1))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "ld%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -109,7 +109,7 @@ (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m") (match_operand:SI 3 "const_m1_to_1_operand" "n"))) (clobber (match_scratch:SI 0 "=r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lwa%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -130,7 +130,7 @@ (compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m") (match_operand:SI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:SI 0 "=r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lwz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -151,7 +151,7 @@ (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m") (match_operand:SI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lwa%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -172,7 +172,7 @@ (compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m") (match_operand:SI 3 "const_0_to_1_operand" "n"))) (set (match_operand:SI 0 "gpc_reg_operand" "=r") (match_dup 1))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lwz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -193,7 +193,7 @@ (compare:CC (match_operand:SI 1 "ds_form_mem_operand" "m") (match_operand:SI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (sign_extend:EXTSI (match_dup 1)))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lwa%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -214,7 +214,7 @@ (compare:CCUNS (match_operand:SI 1 "non_update_memory_operand" "m") (match_operand:SI 3 "const_0_to_1_operand" "n"))) (set (match_operand:EXTSI 0 "gpc_reg_operand" "=r") (zero_extend:EXTSI (match_dup 1)))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lwz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -235,7 +235,7 @@ (compare:CC (match_operand:HI 1 "non_update_memory_operand" "m") (match_operand:HI 3 "const_m1_to_1_operand" "n"))) (clobber (match_scratch:GPR 0 "=r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lha%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -256,7 +256,7 @@ (compare:CCUNS (match_operand:HI 1 "non_update_memory_operand" "m") (match_operand:HI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:GPR 0 "=r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lhz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -277,7 +277,7 @@ (compare:CC (match_operand:HI 1 "non_update_memory_operand" "m") (match_operand:HI 3 "const_m1_to_1_operand" "n"))) (set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") (sign_extend:EXTHI (match_dup 1)))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lha%X1 %0,%1\;cmpdi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -298,7 +298,7 @@ (compare:CCUNS (match_operand:HI 1 "non_update_memory_operand" "m") (match_operand:HI 3 "const_0_to_1_operand" "n"))) (set (match_operand:EXTHI 0 "gpc_reg_operand" "=r") (zero_extend:EXTHI (match_dup 1)))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lhz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -319,7 +319,7 @@ (compare:CCUNS (match_operand:QI 1 "non_update_memory_operand" "m") (match_operand:QI 3 "const_0_to_1_operand" "n"))) (clobber (match_scratch:GPR 0 "=r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lbz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -340,7 +340,7 @@ (compare:CCUNS (match_operand:QI 1 "non_update_memory_operand" "m") (match_operand:QI 3 "const_0_to_1_operand" "n"))) (set (match_operand:GPR 0 "gpc_reg_operand" "=r") (zero_extend:GPR (match_dup 1)))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)" + "(TARGET_P10_FUSION)" "lbz%X1 %0,%1\;cmpldi %2,%0,%3" "&& reload_completed && (cc_reg_not_cr0_operand (operands[2], CCmode) @@ -363,7 +363,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;and %3,%3,%2 and %3,%1,%0\;and %3,%3,%2 @@ -381,7 +381,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;and %3,%3,%2 andc %3,%1,%0\;and %3,%3,%2 @@ -399,7 +399,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;and %3,%3,%2 eqv %3,%1,%0\;and %3,%3,%2 @@ -417,7 +417,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;and %3,%3,%2 nand %3,%1,%0\;and %3,%3,%2 @@ -435,7 +435,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;and %3,%3,%2 nor %3,%1,%0\;and %3,%3,%2 @@ -453,7 +453,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;and %3,%3,%2 or %3,%1,%0\;and %3,%3,%2 @@ -471,7 +471,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;and %3,%3,%2 orc %3,%1,%0\;and %3,%3,%2 @@ -489,7 +489,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;and %3,%3,%2 xor %3,%1,%0\;and %3,%3,%2 @@ -507,7 +507,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ add %3,%1,%0\;and %3,%3,%2 add %3,%1,%0\;and %3,%3,%2 @@ -525,7 +525,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ subf %3,%1,%0\;and %3,%3,%2 subf %3,%1,%0\;and %3,%3,%2 @@ -543,7 +543,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;andc %3,%3,%2 and %3,%1,%0\;andc %3,%3,%2 @@ -561,7 +561,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;andc %3,%3,%2 andc %3,%1,%0\;andc %3,%3,%2 @@ -579,7 +579,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;andc %3,%3,%2 eqv %3,%1,%0\;andc %3,%3,%2 @@ -597,7 +597,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;andc %3,%3,%2 nand %3,%1,%0\;andc %3,%3,%2 @@ -615,7 +615,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;andc %3,%3,%2 nor %3,%1,%0\;andc %3,%3,%2 @@ -633,7 +633,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;andc %3,%3,%2 or %3,%1,%0\;andc %3,%3,%2 @@ -651,7 +651,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;andc %3,%3,%2 orc %3,%1,%0\;andc %3,%3,%2 @@ -669,7 +669,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;andc %3,%3,%2 xor %3,%1,%0\;andc %3,%3,%2 @@ -687,7 +687,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;eqv %3,%3,%2 and %3,%1,%0\;eqv %3,%3,%2 @@ -705,7 +705,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;eqv %3,%3,%2 andc %3,%1,%0\;eqv %3,%3,%2 @@ -723,7 +723,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;eqv %3,%3,%2 eqv %3,%1,%0\;eqv %3,%3,%2 @@ -741,7 +741,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;eqv %3,%3,%2 nand %3,%1,%0\;eqv %3,%3,%2 @@ -759,7 +759,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;eqv %3,%3,%2 nor %3,%1,%0\;eqv %3,%3,%2 @@ -777,7 +777,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;eqv %3,%3,%2 or %3,%1,%0\;eqv %3,%3,%2 @@ -795,7 +795,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;eqv %3,%3,%2 orc %3,%1,%0\;eqv %3,%3,%2 @@ -813,7 +813,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;eqv %3,%3,%2 xor %3,%1,%0\;eqv %3,%3,%2 @@ -831,7 +831,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;nand %3,%3,%2 and %3,%1,%0\;nand %3,%3,%2 @@ -849,7 +849,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;nand %3,%3,%2 andc %3,%1,%0\;nand %3,%3,%2 @@ -867,7 +867,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;nand %3,%3,%2 eqv %3,%1,%0\;nand %3,%3,%2 @@ -885,7 +885,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;nand %3,%3,%2 nand %3,%1,%0\;nand %3,%3,%2 @@ -903,7 +903,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;nand %3,%3,%2 nor %3,%1,%0\;nand %3,%3,%2 @@ -921,7 +921,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;nand %3,%3,%2 or %3,%1,%0\;nand %3,%3,%2 @@ -939,7 +939,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;nand %3,%3,%2 orc %3,%1,%0\;nand %3,%3,%2 @@ -957,7 +957,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;nand %3,%3,%2 xor %3,%1,%0\;nand %3,%3,%2 @@ -975,7 +975,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ add %3,%1,%0\;nand %3,%3,%2 add %3,%1,%0\;nand %3,%3,%2 @@ -993,7 +993,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ subf %3,%1,%0\;nand %3,%3,%2 subf %3,%1,%0\;nand %3,%3,%2 @@ -1011,7 +1011,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;nor %3,%3,%2 and %3,%1,%0\;nor %3,%3,%2 @@ -1029,7 +1029,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;nor %3,%3,%2 andc %3,%1,%0\;nor %3,%3,%2 @@ -1047,7 +1047,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;nor %3,%3,%2 eqv %3,%1,%0\;nor %3,%3,%2 @@ -1065,7 +1065,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;nor %3,%3,%2 nand %3,%1,%0\;nor %3,%3,%2 @@ -1083,7 +1083,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;nor %3,%3,%2 nor %3,%1,%0\;nor %3,%3,%2 @@ -1101,7 +1101,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;nor %3,%3,%2 or %3,%1,%0\;nor %3,%3,%2 @@ -1119,7 +1119,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;nor %3,%3,%2 orc %3,%1,%0\;nor %3,%3,%2 @@ -1137,7 +1137,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;nor %3,%3,%2 xor %3,%1,%0\;nor %3,%3,%2 @@ -1155,7 +1155,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ add %3,%1,%0\;nor %3,%3,%2 add %3,%1,%0\;nor %3,%3,%2 @@ -1173,7 +1173,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ subf %3,%1,%0\;nor %3,%3,%2 subf %3,%1,%0\;nor %3,%3,%2 @@ -1191,7 +1191,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;or %3,%3,%2 and %3,%1,%0\;or %3,%3,%2 @@ -1209,7 +1209,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;or %3,%3,%2 andc %3,%1,%0\;or %3,%3,%2 @@ -1227,7 +1227,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;or %3,%3,%2 eqv %3,%1,%0\;or %3,%3,%2 @@ -1245,7 +1245,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;or %3,%3,%2 nand %3,%1,%0\;or %3,%3,%2 @@ -1263,7 +1263,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;or %3,%3,%2 nor %3,%1,%0\;or %3,%3,%2 @@ -1281,7 +1281,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;or %3,%3,%2 or %3,%1,%0\;or %3,%3,%2 @@ -1299,7 +1299,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;or %3,%3,%2 orc %3,%1,%0\;or %3,%3,%2 @@ -1317,7 +1317,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;or %3,%3,%2 xor %3,%1,%0\;or %3,%3,%2 @@ -1335,7 +1335,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ add %3,%1,%0\;or %3,%3,%2 add %3,%1,%0\;or %3,%3,%2 @@ -1353,7 +1353,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_ADDLOG)" + "(TARGET_P10_FUSION)" "@ subf %3,%1,%0\;or %3,%3,%2 subf %3,%1,%0\;or %3,%3,%2 @@ -1371,7 +1371,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;orc %3,%3,%2 and %3,%1,%0\;orc %3,%3,%2 @@ -1389,7 +1389,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;orc %3,%3,%2 andc %3,%1,%0\;orc %3,%3,%2 @@ -1407,7 +1407,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;orc %3,%3,%2 eqv %3,%1,%0\;orc %3,%3,%2 @@ -1425,7 +1425,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;orc %3,%3,%2 nand %3,%1,%0\;orc %3,%3,%2 @@ -1443,7 +1443,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;orc %3,%3,%2 nor %3,%1,%0\;orc %3,%3,%2 @@ -1461,7 +1461,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;orc %3,%3,%2 or %3,%1,%0\;orc %3,%3,%2 @@ -1479,7 +1479,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;orc %3,%3,%2 orc %3,%1,%0\;orc %3,%3,%2 @@ -1497,7 +1497,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;orc %3,%3,%2 xor %3,%1,%0\;orc %3,%3,%2 @@ -1515,7 +1515,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;xor %3,%3,%2 and %3,%1,%0\;xor %3,%3,%2 @@ -1533,7 +1533,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ andc %3,%1,%0\;xor %3,%3,%2 andc %3,%1,%0\;xor %3,%3,%2 @@ -1551,7 +1551,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ eqv %3,%1,%0\;xor %3,%3,%2 eqv %3,%1,%0\;xor %3,%3,%2 @@ -1569,7 +1569,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;xor %3,%3,%2 nand %3,%1,%0\;xor %3,%3,%2 @@ -1587,7 +1587,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;xor %3,%3,%2 nor %3,%1,%0\;xor %3,%3,%2 @@ -1605,7 +1605,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;xor %3,%3,%2 or %3,%1,%0\;xor %3,%3,%2 @@ -1623,7 +1623,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ orc %3,%1,%0\;xor %3,%3,%2 orc %3,%1,%0\;xor %3,%3,%2 @@ -1641,7 +1641,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ xor %3,%1,%0\;xor %3,%3,%2 xor %3,%1,%0\;xor %3,%3,%2 @@ -1659,7 +1659,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;add %3,%3,%2 and %3,%1,%0\;add %3,%3,%2 @@ -1677,7 +1677,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;add %3,%3,%2 nand %3,%1,%0\;add %3,%3,%2 @@ -1695,7 +1695,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;add %3,%3,%2 nor %3,%1,%0\;add %3,%3,%2 @@ -1713,7 +1713,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;add %3,%3,%2 or %3,%1,%0\;add %3,%3,%2 @@ -1731,7 +1731,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;subf %3,%2,%3 and %3,%1,%0\;subf %3,%2,%3 @@ -1749,7 +1749,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;subf %3,%2,%3 nand %3,%1,%0\;subf %3,%2,%3 @@ -1767,7 +1767,7 @@ (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;subf %3,%2,%3 nor %3,%1,%0\;subf %3,%2,%3 @@ -1785,7 +1785,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;subf %3,%2,%3 or %3,%1,%0\;subf %3,%2,%3 @@ -1803,7 +1803,7 @@ (and:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r") (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ and %3,%1,%0\;subf %3,%3,%2 and %3,%1,%0\;subf %3,%3,%2 @@ -1821,7 +1821,7 @@ (ior:GPR (not:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ nand %3,%1,%0\;subf %3,%3,%2 nand %3,%1,%0\;subf %3,%3,%2 @@ -1839,7 +1839,7 @@ (and:GPR (not:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r")) (not:GPR (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r"))))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ nor %3,%1,%0\;subf %3,%3,%2 nor %3,%1,%0\;subf %3,%3,%2 @@ -1857,7 +1857,7 @@ (ior:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r") (match_operand:GPR 1 "gpc_reg_operand" "r,r,r,r")))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_LOGADD)" + "(TARGET_P10_FUSION)" "@ or %3,%1,%0\;subf %3,%3,%2 or %3,%1,%0\;subf %3,%3,%2 @@ -1875,7 +1875,7 @@ (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vand %3,%3,%2 vand %3,%1,%0\;vand %3,%3,%2 @@ -1893,7 +1893,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vand %3,%3,%2 vandc %3,%1,%0\;vand %3,%3,%2 @@ -1911,7 +1911,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vand %3,%3,%2 veqv %3,%1,%0\;vand %3,%3,%2 @@ -1929,7 +1929,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vand %3,%3,%2 vnand %3,%1,%0\;vand %3,%3,%2 @@ -1947,7 +1947,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vand %3,%3,%2 vnor %3,%1,%0\;vand %3,%3,%2 @@ -1965,7 +1965,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vand %3,%3,%2 vor %3,%1,%0\;vand %3,%3,%2 @@ -1983,7 +1983,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vand %3,%3,%2 vorc %3,%1,%0\;vand %3,%3,%2 @@ -2001,7 +2001,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vand %3,%3,%2 vxor %3,%1,%0\;vand %3,%3,%2 @@ -2019,7 +2019,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vandc %3,%3,%2 vand %3,%1,%0\;vandc %3,%3,%2 @@ -2037,7 +2037,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vandc %3,%3,%2 vandc %3,%1,%0\;vandc %3,%3,%2 @@ -2055,7 +2055,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vandc %3,%3,%2 veqv %3,%1,%0\;vandc %3,%3,%2 @@ -2073,7 +2073,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vandc %3,%3,%2 vnand %3,%1,%0\;vandc %3,%3,%2 @@ -2091,7 +2091,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vandc %3,%3,%2 vnor %3,%1,%0\;vandc %3,%3,%2 @@ -2109,7 +2109,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vandc %3,%3,%2 vor %3,%1,%0\;vandc %3,%3,%2 @@ -2127,7 +2127,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vandc %3,%3,%2 vorc %3,%1,%0\;vandc %3,%3,%2 @@ -2145,7 +2145,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vandc %3,%3,%2 vxor %3,%1,%0\;vandc %3,%3,%2 @@ -2163,7 +2163,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;veqv %3,%3,%2 vand %3,%1,%0\;veqv %3,%3,%2 @@ -2181,7 +2181,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;veqv %3,%3,%2 vandc %3,%1,%0\;veqv %3,%3,%2 @@ -2199,7 +2199,7 @@ (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;veqv %3,%3,%2 veqv %3,%1,%0\;veqv %3,%3,%2 @@ -2217,7 +2217,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;veqv %3,%3,%2 vnand %3,%1,%0\;veqv %3,%3,%2 @@ -2235,7 +2235,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;veqv %3,%3,%2 vnor %3,%1,%0\;veqv %3,%3,%2 @@ -2253,7 +2253,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;veqv %3,%3,%2 vor %3,%1,%0\;veqv %3,%3,%2 @@ -2271,7 +2271,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;veqv %3,%3,%2 vorc %3,%1,%0\;veqv %3,%3,%2 @@ -2289,7 +2289,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;veqv %3,%3,%2 vxor %3,%1,%0\;veqv %3,%3,%2 @@ -2307,7 +2307,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vnand %3,%3,%2 vand %3,%1,%0\;vnand %3,%3,%2 @@ -2325,7 +2325,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vnand %3,%3,%2 vandc %3,%1,%0\;vnand %3,%3,%2 @@ -2343,7 +2343,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vnand %3,%3,%2 veqv %3,%1,%0\;vnand %3,%3,%2 @@ -2361,7 +2361,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vnand %3,%3,%2 vnand %3,%1,%0\;vnand %3,%3,%2 @@ -2379,7 +2379,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vnand %3,%3,%2 vnor %3,%1,%0\;vnand %3,%3,%2 @@ -2397,7 +2397,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vnand %3,%3,%2 vor %3,%1,%0\;vnand %3,%3,%2 @@ -2415,7 +2415,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vnand %3,%3,%2 vorc %3,%1,%0\;vnand %3,%3,%2 @@ -2433,7 +2433,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vnand %3,%3,%2 vxor %3,%1,%0\;vnand %3,%3,%2 @@ -2451,7 +2451,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vnor %3,%3,%2 vand %3,%1,%0\;vnor %3,%3,%2 @@ -2469,7 +2469,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vnor %3,%3,%2 vandc %3,%1,%0\;vnor %3,%3,%2 @@ -2487,7 +2487,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vnor %3,%3,%2 veqv %3,%1,%0\;vnor %3,%3,%2 @@ -2505,7 +2505,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vnor %3,%3,%2 vnand %3,%1,%0\;vnor %3,%3,%2 @@ -2523,7 +2523,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vnor %3,%3,%2 vnor %3,%1,%0\;vnor %3,%3,%2 @@ -2541,7 +2541,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vnor %3,%3,%2 vor %3,%1,%0\;vnor %3,%3,%2 @@ -2559,7 +2559,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vnor %3,%3,%2 vorc %3,%1,%0\;vnor %3,%3,%2 @@ -2577,7 +2577,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vnor %3,%3,%2 vxor %3,%1,%0\;vnor %3,%3,%2 @@ -2595,7 +2595,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vor %3,%3,%2 vand %3,%1,%0\;vor %3,%3,%2 @@ -2613,7 +2613,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vor %3,%3,%2 vandc %3,%1,%0\;vor %3,%3,%2 @@ -2631,7 +2631,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vor %3,%3,%2 veqv %3,%1,%0\;vor %3,%3,%2 @@ -2649,7 +2649,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vor %3,%3,%2 vnand %3,%1,%0\;vor %3,%3,%2 @@ -2667,7 +2667,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vor %3,%3,%2 vnor %3,%1,%0\;vor %3,%3,%2 @@ -2685,7 +2685,7 @@ (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vor %3,%3,%2 vor %3,%1,%0\;vor %3,%3,%2 @@ -2703,7 +2703,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vor %3,%3,%2 vorc %3,%1,%0\;vor %3,%3,%2 @@ -2721,7 +2721,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vor %3,%3,%2 vxor %3,%1,%0\;vor %3,%3,%2 @@ -2739,7 +2739,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vorc %3,%3,%2 vand %3,%1,%0\;vorc %3,%3,%2 @@ -2757,7 +2757,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vorc %3,%3,%2 vandc %3,%1,%0\;vorc %3,%3,%2 @@ -2775,7 +2775,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vorc %3,%3,%2 veqv %3,%1,%0\;vorc %3,%3,%2 @@ -2793,7 +2793,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vorc %3,%3,%2 vnand %3,%1,%0\;vorc %3,%3,%2 @@ -2811,7 +2811,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vorc %3,%3,%2 vnor %3,%1,%0\;vorc %3,%3,%2 @@ -2829,7 +2829,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vorc %3,%3,%2 vor %3,%1,%0\;vorc %3,%3,%2 @@ -2847,7 +2847,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vorc %3,%3,%2 vorc %3,%1,%0\;vorc %3,%3,%2 @@ -2865,7 +2865,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (not:VM (match_operand:VM 2 "altivec_register_operand" "v,v,v,v")))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vorc %3,%3,%2 vxor %3,%1,%0\;vorc %3,%3,%2 @@ -2883,7 +2883,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vand %3,%1,%0\;vxor %3,%3,%2 vand %3,%1,%0\;vxor %3,%3,%2 @@ -2901,7 +2901,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vandc %3,%1,%0\;vxor %3,%3,%2 vandc %3,%1,%0\;vxor %3,%3,%2 @@ -2919,7 +2919,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ veqv %3,%1,%0\;vxor %3,%3,%2 veqv %3,%1,%0\;vxor %3,%3,%2 @@ -2937,7 +2937,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnand %3,%1,%0\;vxor %3,%3,%2 vnand %3,%1,%0\;vxor %3,%3,%2 @@ -2955,7 +2955,7 @@ (not:VM (match_operand:VM 1 "altivec_register_operand" "v,v,v,v"))) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vnor %3,%1,%0\;vxor %3,%3,%2 vnor %3,%1,%0\;vxor %3,%3,%2 @@ -2973,7 +2973,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vor %3,%1,%0\;vxor %3,%3,%2 vor %3,%1,%0\;vxor %3,%3,%2 @@ -2991,7 +2991,7 @@ (match_operand:VM 1 "altivec_register_operand" "v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vorc %3,%1,%0\;vxor %3,%3,%2 vorc %3,%1,%0\;vxor %3,%3,%2 @@ -3009,7 +3009,7 @@ (match_operand:VM 1 "altivec_register_operand" "%v,v,v,v")) (match_operand:VM 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:VM 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2LOGICAL)" + "(TARGET_P10_FUSION)" "@ vxor %3,%1,%0\;vxor %3,%3,%2 vxor %3,%1,%0\;vxor %3,%3,%2 @@ -3027,7 +3027,7 @@ (match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r")) (match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r"))) (clobber (match_scratch:GPR 4 "=X,X,X,&r"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)" + "(TARGET_P10_FUSION)" "@ add %3,%1,%0\;add %3,%3,%2 add %3,%1,%0\;add %3,%3,%2 @@ -3045,7 +3045,7 @@ (match_operand:V2DI 1 "altivec_register_operand" "%v,v,v,v")) (match_operand:V2DI 2 "altivec_register_operand" "v,v,v,v"))) (clobber (match_scratch:V2DI 4 "=X,X,X,&v"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)" + "(TARGET_P10_FUSION)" "@ vaddudm %3,%1,%0\;vaddudm %3,%3,%2 vaddudm %3,%1,%0\;vaddudm %3,%3,%2 diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl index 7e201f7..81cc225 100755 --- a/gcc/config/rs6000/genfusion.pl +++ b/gcc/config/rs6000/genfusion.pl @@ -118,7 +118,7 @@ sub gen_ld_cmpi_p10 } else { print " (set (match_operand:${result} 0 \"gpc_reg_operand\" \"=r\") (${extend}_extend:${result} (match_dup 1)))]\n"; } - print " \"(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)\"\n"; + print " \"(TARGET_P10_FUSION)\"\n"; print " \"l${ldst}${echr}%X1 %0,%1\\;cmp${cmpl}di %2,%0,%3\"\n"; print " \"&& reload_completed\n"; print " && (cc_reg_not_cr0_operand (operands[2], CCmode)\n"; @@ -166,8 +166,8 @@ sub gen_logical_addsubf $outer_op, $outer_comp, $outer_inv, $outer_rtl, $inner, @inner_ops, $inner_comp, $inner_inv, $inner_rtl, $inner_op, $both_commute, $c4, $bc, $inner_arg0, $inner_arg1, $inner_exp, $outer_arg2, $outer_exp, - $target_flag, $ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42, - $outer_name, $fuse_type); + $ftype, $insn, $is_subf, $is_rsubf, $outer_32, $outer_42,$outer_name, + $fuse_type); KIND: foreach $kind ('scalar','vector') { @outer_ops = @logicals; if ( $kind eq 'vector' ) { @@ -199,18 +199,15 @@ sub gen_logical_addsubf $outer_rtl = $rtlop{$outer}; @inner_ops = @logicals; $ftype = "logical-logical"; - $target_flag = "TARGET_P10_FUSION_2LOGICAL"; if ( exists $isaddsub{$outer} ) { @inner_ops = sort keys %logicals_addsub; $ftype = "logical-add"; - $target_flag = "TARGET_P10_FUSION_LOGADD"; } elsif ( $kind ne 'vector' && exists $logicals_addsub{$outer} ) { push (@inner_ops, @addsub); } INNER: foreach $inner ( @inner_ops ) { if ( exists $isaddsub{$inner} ) { $ftype = "add-logical"; - $target_flag = "TARGET_P10_FUSION_ADDLOG"; } $inner_comp = $complement{$inner}; $inner_inv = $invert{$inner}; @@ -266,7 +263,7 @@ sub gen_logical_addsubf [(set (match_operand:${mode} 3 "${pred}" "=&0,&1,&${constraint},${constraint}") ${outer_exp}) (clobber (match_scratch:${mode} 4 "=X,X,X,&${constraint}"))] - "(TARGET_P10_FUSION && $target_flag)" + "(TARGET_P10_FUSION)" "@ ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32} ${inner_op} %3,%1,%0\\;${outer_op} %3,${outer_32} @@ -313,7 +310,7 @@ sub gen_addadd (match_operand:${mode} 1 "${pred}" "%${c4}")) (match_operand:${mode} 2 "${pred}" "${c4}"))) (clobber (match_scratch:${mode} 4 "=X,X,X,&${constraint}"))] - "(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)" + "(TARGET_P10_FUSION)" "@ ${op} %3,%1,%0\\;${op} %3,%3,%2 ${op} %3,%1,%0\\;${op} %3,%3,%2 diff --git a/gcc/config/rs6000/host-darwin.cc b/gcc/config/rs6000/host-darwin.cc index 541f7e1..6072a6c 100644 --- a/gcc/config/rs6000/host-darwin.cc +++ b/gcc/config/rs6000/host-darwin.cc @@ -58,7 +58,7 @@ extern int sigaltstack(const struct sigaltstack *, struct sigaltstack *); static void segv_crash_handler (int sig ATTRIBUTE_UNUSED) { - internal_error ("Segmentation Fault (code)"); + internal_error ("segmentation fault (code)"); } static void @@ -128,7 +128,7 @@ segv_handler (int sig ATTRIBUTE_UNUSED, fprintf (stderr, "[address=%08lx pc=%08x]\n", uc->uc_mcontext->MC_FLD(es).MC_FLD(dar), uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0)); - internal_error ("Segmentation Fault"); + internal_error ("segmentation fault"); exit (FATAL_EXIT_CODE); } diff --git a/gcc/config/rs6000/mm_malloc.h b/gcc/config/rs6000/mm_malloc.h index 3d2e09e..ae47cac 100644 --- a/gcc/config/rs6000/mm_malloc.h +++ b/gcc/config/rs6000/mm_malloc.h @@ -35,28 +35,28 @@ extern "C" int posix_memalign (void **, size_t, size_t) throw (); #endif static __inline void * -_mm_malloc (size_t size, size_t alignment) +_mm_malloc (size_t __size, size_t __alignment) { /* PowerPC64 ELF V2 ABI requires quadword alignment. */ - size_t vec_align = sizeof (__vector float); + size_t __vec_align = sizeof (__vector float); /* Linux GLIBC malloc alignment is at least 2 X ptr size. */ - size_t malloc_align = (sizeof (void *) + sizeof (void *)); - void *ptr; - - if (alignment == malloc_align && alignment == vec_align) - return malloc (size); - if (alignment < vec_align) - alignment = vec_align; - if (posix_memalign (&ptr, alignment, size) == 0) - return ptr; + size_t __malloc_align = (sizeof (void *) + sizeof (void *)); + void *__ptr; + + if (__alignment == __malloc_align && __alignment == __vec_align) + return malloc (__size); + if (__alignment < __vec_align) + __alignment = __vec_align; + if (posix_memalign (&__ptr, __alignment, __size) == 0) + return __ptr; else return NULL; } static __inline void -_mm_free (void * ptr) +_mm_free (void * __ptr) { - free (ptr); + free (__ptr); } #endif /* _MM_MALLOC_H_INCLUDED */ diff --git a/gcc/config/rs6000/mmintrin.h b/gcc/config/rs6000/mmintrin.h index da4f7d5..bf7f3b1 100644 --- a/gcc/config/rs6000/mmintrin.h +++ b/gcc/config/rs6000/mmintrin.h @@ -170,17 +170,17 @@ _mm_cvtsi64_si64x (__m64 __i) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pi16 (__m64 __m1, __m64 __m2) { - __vector signed short vm1; - __vector signed char vresult; + __vector signed short __vm1; + __vector signed char __vresult; - vm1 = (__vector signed short) (__vector unsigned long long) + __vm1 = (__vector signed short) (__vector unsigned long long) #ifdef __LITTLE_ENDIAN__ { __m1, __m2 }; #else { __m2, __m1 }; #endif - vresult = vec_packs (vm1, vm1); - return (__m64) ((__vector long long) vresult)[0]; + __vresult = vec_packs (__vm1, __vm1); + return (__m64) ((__vector long long) __vresult)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -195,17 +195,17 @@ _m_packsswb (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pi32 (__m64 __m1, __m64 __m2) { - __vector signed int vm1; - __vector signed short vresult; + __vector signed int __vm1; + __vector signed short __vresult; - vm1 = (__vector signed int) (__vector unsigned long long) + __vm1 = (__vector signed int) (__vector unsigned long long) #ifdef __LITTLE_ENDIAN__ { __m1, __m2 }; #else { __m2, __m1 }; #endif - vresult = vec_packs (vm1, vm1); - return (__m64) ((__vector long long) vresult)[0]; + __vresult = vec_packs (__vm1, __vm1); + return (__m64) ((__vector long long) __vresult)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -220,19 +220,19 @@ _m_packssdw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_packs_pu16 (__m64 __m1, __m64 __m2) { - __vector unsigned char r; - __vector signed short vm1 = (__vector signed short) (__vector long long) + __vector unsigned char __r; + __vector signed short __vm1 = (__vector signed short) (__vector long long) #ifdef __LITTLE_ENDIAN__ { __m1, __m2 }; #else { __m2, __m1 }; #endif const __vector signed short __zero = { 0 }; - __vector __bool short __select = vec_cmplt (vm1, __zero); - r = vec_packs ((__vector unsigned short) vm1, (__vector unsigned short) vm1); - __vector __bool char packsel = vec_pack (__select, __select); - r = vec_sel (r, (const __vector unsigned char) __zero, packsel); - return (__m64) ((__vector long long) r)[0]; + __vector __bool short __select = vec_cmplt (__vm1, __zero); + __r = vec_packs ((__vector unsigned short) __vm1, (__vector unsigned short) __vm1); + __vector __bool char __packsel = vec_pack (__select, __select); + __r = vec_sel (__r, (const __vector unsigned char) __zero, __packsel); + return (__m64) ((__vector long long) __r)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -248,28 +248,28 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_unpackhi_pi8 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector unsigned char a, b, c; + __vector unsigned char __a, __b, __c; - a = (__vector unsigned char)vec_splats (__m1); - b = (__vector unsigned char)vec_splats (__m2); - c = vec_mergel (a, b); - return (__m64) ((__vector long long) c)[1]; + __a = (__vector unsigned char)vec_splats (__m1); + __b = (__vector unsigned char)vec_splats (__m2); + __c = vec_mergel (__a, __b); + return (__m64) ((__vector long long) __c)[1]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_char[0] = m1.as_char[4]; - res.as_char[1] = m2.as_char[4]; - res.as_char[2] = m1.as_char[5]; - res.as_char[3] = m2.as_char[5]; - res.as_char[4] = m1.as_char[6]; - res.as_char[5] = m2.as_char[6]; - res.as_char[6] = m1.as_char[7]; - res.as_char[7] = m2.as_char[7]; + __res.as_char[0] = __mu1.as_char[4]; + __res.as_char[1] = __mu2.as_char[4]; + __res.as_char[2] = __mu1.as_char[5]; + __res.as_char[3] = __mu2.as_char[5]; + __res.as_char[4] = __mu1.as_char[6]; + __res.as_char[5] = __mu2.as_char[6]; + __res.as_char[6] = __mu1.as_char[7]; + __res.as_char[7] = __mu2.as_char[7]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -284,17 +284,17 @@ _m_punpckhbw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi16 (__m64 __m1, __m64 __m2) { - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_short[0] = m1.as_short[2]; - res.as_short[1] = m2.as_short[2]; - res.as_short[2] = m1.as_short[3]; - res.as_short[3] = m2.as_short[3]; + __res.as_short[0] = __mu1.as_short[2]; + __res.as_short[1] = __mu2.as_short[2]; + __res.as_short[2] = __mu1.as_short[3]; + __res.as_short[3] = __mu2.as_short[3]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -307,15 +307,15 @@ _m_punpckhwd (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpackhi_pi32 (__m64 __m1, __m64 __m2) { - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_int[0] = m1.as_int[1]; - res.as_int[1] = m2.as_int[1]; + __res.as_int[0] = __mu1.as_int[1]; + __res.as_int[1] = __mu2.as_int[1]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -329,28 +329,28 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_unpacklo_pi8 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector unsigned char a, b, c; + __vector unsigned char __a, __b, __c; - a = (__vector unsigned char)vec_splats (__m1); - b = (__vector unsigned char)vec_splats (__m2); - c = vec_mergel (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector unsigned char)vec_splats (__m1); + __b = (__vector unsigned char)vec_splats (__m2); + __c = vec_mergel (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_char[0] = m1.as_char[0]; - res.as_char[1] = m2.as_char[0]; - res.as_char[2] = m1.as_char[1]; - res.as_char[3] = m2.as_char[1]; - res.as_char[4] = m1.as_char[2]; - res.as_char[5] = m2.as_char[2]; - res.as_char[6] = m1.as_char[3]; - res.as_char[7] = m2.as_char[3]; + __res.as_char[0] = __mu1.as_char[0]; + __res.as_char[1] = __mu2.as_char[0]; + __res.as_char[2] = __mu1.as_char[1]; + __res.as_char[3] = __mu2.as_char[1]; + __res.as_char[4] = __mu1.as_char[2]; + __res.as_char[5] = __mu2.as_char[2]; + __res.as_char[6] = __mu1.as_char[3]; + __res.as_char[7] = __mu2.as_char[3]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -364,17 +364,17 @@ _m_punpcklbw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi16 (__m64 __m1, __m64 __m2) { - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_short[0] = m1.as_short[0]; - res.as_short[1] = m2.as_short[0]; - res.as_short[2] = m1.as_short[1]; - res.as_short[3] = m2.as_short[1]; + __res.as_short[0] = __mu1.as_short[0]; + __res.as_short[1] = __mu2.as_short[0]; + __res.as_short[2] = __mu1.as_short[1]; + __res.as_short[3] = __mu2.as_short[1]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -388,15 +388,15 @@ _m_punpcklwd (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_unpacklo_pi32 (__m64 __m1, __m64 __m2) { - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_int[0] = m1.as_int[0]; - res.as_int[1] = m2.as_int[0]; + __res.as_int[0] = __mu1.as_int[0]; + __res.as_int[1] = __mu2.as_int[0]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -410,28 +410,28 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_add_pi8 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector signed char a, b, c; + __vector signed char __a, __b, __c; - a = (__vector signed char)vec_splats (__m1); - b = (__vector signed char)vec_splats (__m2); - c = vec_add (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed char)vec_splats (__m1); + __b = (__vector signed char)vec_splats (__m2); + __c = vec_add (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_char[0] = m1.as_char[0] + m2.as_char[0]; - res.as_char[1] = m1.as_char[1] + m2.as_char[1]; - res.as_char[2] = m1.as_char[2] + m2.as_char[2]; - res.as_char[3] = m1.as_char[3] + m2.as_char[3]; - res.as_char[4] = m1.as_char[4] + m2.as_char[4]; - res.as_char[5] = m1.as_char[5] + m2.as_char[5]; - res.as_char[6] = m1.as_char[6] + m2.as_char[6]; - res.as_char[7] = m1.as_char[7] + m2.as_char[7]; + __res.as_char[0] = __mu1.as_char[0] + __mu2.as_char[0]; + __res.as_char[1] = __mu1.as_char[1] + __mu2.as_char[1]; + __res.as_char[2] = __mu1.as_char[2] + __mu2.as_char[2]; + __res.as_char[3] = __mu1.as_char[3] + __mu2.as_char[3]; + __res.as_char[4] = __mu1.as_char[4] + __mu2.as_char[4]; + __res.as_char[5] = __mu1.as_char[5] + __mu2.as_char[5]; + __res.as_char[6] = __mu1.as_char[6] + __mu2.as_char[6]; + __res.as_char[7] = __mu1.as_char[7] + __mu2.as_char[7]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -446,24 +446,24 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_add_pi16 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector signed short a, b, c; + __vector signed short __a, __b, __c; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = vec_add (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = vec_add (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_short[0] = m1.as_short[0] + m2.as_short[0]; - res.as_short[1] = m1.as_short[1] + m2.as_short[1]; - res.as_short[2] = m1.as_short[2] + m2.as_short[2]; - res.as_short[3] = m1.as_short[3] + m2.as_short[3]; + __res.as_short[0] = __mu1.as_short[0] + __mu2.as_short[0]; + __res.as_short[1] = __mu1.as_short[1] + __mu2.as_short[1]; + __res.as_short[2] = __mu1.as_short[2] + __mu2.as_short[2]; + __res.as_short[3] = __mu1.as_short[3] + __mu2.as_short[3]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -478,22 +478,22 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_add_pi32 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 - __vector signed int a, b, c; + __vector signed int __a, __b, __c; - a = (__vector signed int)vec_splats (__m1); - b = (__vector signed int)vec_splats (__m2); - c = vec_add (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed int)vec_splats (__m1); + __b = (__vector signed int)vec_splats (__m2); + __c = vec_add (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_int[0] = m1.as_int[0] + m2.as_int[0]; - res.as_int[1] = m1.as_int[1] + m2.as_int[1]; + __res.as_int[0] = __mu1.as_int[0] + __mu2.as_int[0]; + __res.as_int[1] = __mu1.as_int[1] + __mu2.as_int[1]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -508,28 +508,28 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_sub_pi8 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector signed char a, b, c; + __vector signed char __a, __b, __c; - a = (__vector signed char)vec_splats (__m1); - b = (__vector signed char)vec_splats (__m2); - c = vec_sub (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed char)vec_splats (__m1); + __b = (__vector signed char)vec_splats (__m2); + __c = vec_sub (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_char[0] = m1.as_char[0] - m2.as_char[0]; - res.as_char[1] = m1.as_char[1] - m2.as_char[1]; - res.as_char[2] = m1.as_char[2] - m2.as_char[2]; - res.as_char[3] = m1.as_char[3] - m2.as_char[3]; - res.as_char[4] = m1.as_char[4] - m2.as_char[4]; - res.as_char[5] = m1.as_char[5] - m2.as_char[5]; - res.as_char[6] = m1.as_char[6] - m2.as_char[6]; - res.as_char[7] = m1.as_char[7] - m2.as_char[7]; + __res.as_char[0] = __mu1.as_char[0] - __mu2.as_char[0]; + __res.as_char[1] = __mu1.as_char[1] - __mu2.as_char[1]; + __res.as_char[2] = __mu1.as_char[2] - __mu2.as_char[2]; + __res.as_char[3] = __mu1.as_char[3] - __mu2.as_char[3]; + __res.as_char[4] = __mu1.as_char[4] - __mu2.as_char[4]; + __res.as_char[5] = __mu1.as_char[5] - __mu2.as_char[5]; + __res.as_char[6] = __mu1.as_char[6] - __mu2.as_char[6]; + __res.as_char[7] = __mu1.as_char[7] - __mu2.as_char[7]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -544,24 +544,24 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_sub_pi16 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector signed short a, b, c; + __vector signed short __a, __b, __c; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = vec_sub (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = vec_sub (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_short[0] = m1.as_short[0] - m2.as_short[0]; - res.as_short[1] = m1.as_short[1] - m2.as_short[1]; - res.as_short[2] = m1.as_short[2] - m2.as_short[2]; - res.as_short[3] = m1.as_short[3] - m2.as_short[3]; + __res.as_short[0] = __mu1.as_short[0] - __mu2.as_short[0]; + __res.as_short[1] = __mu1.as_short[1] - __mu2.as_short[1]; + __res.as_short[2] = __mu1.as_short[2] - __mu2.as_short[2]; + __res.as_short[3] = __mu1.as_short[3] - __mu2.as_short[3]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -576,22 +576,22 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_sub_pi32 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 - __vector signed int a, b, c; + __vector signed int __a, __b, __c; - a = (__vector signed int)vec_splats (__m1); - b = (__vector signed int)vec_splats (__m2); - c = vec_sub (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed int)vec_splats (__m1); + __b = (__vector signed int)vec_splats (__m2); + __c = vec_sub (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_int[0] = m1.as_int[0] - m2.as_int[0]; - res.as_int[1] = m1.as_int[1] - m2.as_int[1]; + __res.as_int[0] = __mu1.as_int[0] - __mu2.as_int[0]; + __res.as_int[1] = __mu1.as_int[1] - __mu2.as_int[1]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -729,30 +729,30 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2) { #if defined(_ARCH_PWR6) && defined(__powerpc64__) - __m64 res; + __m64 __res; __asm__( "cmpb %0,%1,%2;\n" - : "=r" (res) + : "=r" (__res) : "r" (__m1), "r" (__m2) : ); - return (res); + return (__res); #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_char[0] = (m1.as_char[0] == m2.as_char[0])? -1: 0; - res.as_char[1] = (m1.as_char[1] == m2.as_char[1])? -1: 0; - res.as_char[2] = (m1.as_char[2] == m2.as_char[2])? -1: 0; - res.as_char[3] = (m1.as_char[3] == m2.as_char[3])? -1: 0; - res.as_char[4] = (m1.as_char[4] == m2.as_char[4])? -1: 0; - res.as_char[5] = (m1.as_char[5] == m2.as_char[5])? -1: 0; - res.as_char[6] = (m1.as_char[6] == m2.as_char[6])? -1: 0; - res.as_char[7] = (m1.as_char[7] == m2.as_char[7])? -1: 0; + __res.as_char[0] = (__mu1.as_char[0] == __mu2.as_char[0])? -1: 0; + __res.as_char[1] = (__mu1.as_char[1] == __mu2.as_char[1])? -1: 0; + __res.as_char[2] = (__mu1.as_char[2] == __mu2.as_char[2])? -1: 0; + __res.as_char[3] = (__mu1.as_char[3] == __mu2.as_char[3])? -1: 0; + __res.as_char[4] = (__mu1.as_char[4] == __mu2.as_char[4])? -1: 0; + __res.as_char[5] = (__mu1.as_char[5] == __mu2.as_char[5])? -1: 0; + __res.as_char[6] = (__mu1.as_char[6] == __mu2.as_char[6])? -1: 0; + __res.as_char[7] = (__mu1.as_char[7] == __mu2.as_char[7])? -1: 0; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -766,28 +766,28 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector signed char a, b, c; + __vector signed char __a, __b, __c; - a = (__vector signed char)vec_splats (__m1); - b = (__vector signed char)vec_splats (__m2); - c = (__vector signed char)vec_cmpgt (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed char)vec_splats (__m1); + __b = (__vector signed char)vec_splats (__m2); + __c = (__vector signed char)vec_cmpgt (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_char[0] = (m1.as_char[0] > m2.as_char[0])? -1: 0; - res.as_char[1] = (m1.as_char[1] > m2.as_char[1])? -1: 0; - res.as_char[2] = (m1.as_char[2] > m2.as_char[2])? -1: 0; - res.as_char[3] = (m1.as_char[3] > m2.as_char[3])? -1: 0; - res.as_char[4] = (m1.as_char[4] > m2.as_char[4])? -1: 0; - res.as_char[5] = (m1.as_char[5] > m2.as_char[5])? -1: 0; - res.as_char[6] = (m1.as_char[6] > m2.as_char[6])? -1: 0; - res.as_char[7] = (m1.as_char[7] > m2.as_char[7])? -1: 0; + __res.as_char[0] = (__mu1.as_char[0] > __mu2.as_char[0])? -1: 0; + __res.as_char[1] = (__mu1.as_char[1] > __mu2.as_char[1])? -1: 0; + __res.as_char[2] = (__mu1.as_char[2] > __mu2.as_char[2])? -1: 0; + __res.as_char[3] = (__mu1.as_char[3] > __mu2.as_char[3])? -1: 0; + __res.as_char[4] = (__mu1.as_char[4] > __mu2.as_char[4])? -1: 0; + __res.as_char[5] = (__mu1.as_char[5] > __mu2.as_char[5])? -1: 0; + __res.as_char[6] = (__mu1.as_char[6] > __mu2.as_char[6])? -1: 0; + __res.as_char[7] = (__mu1.as_char[7] > __mu2.as_char[7])? -1: 0; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -803,24 +803,24 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector signed short a, b, c; + __vector signed short __a, __b, __c; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = (__vector signed short)vec_cmpeq (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = (__vector signed short)vec_cmpeq (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_short[0] = (m1.as_short[0] == m2.as_short[0])? -1: 0; - res.as_short[1] = (m1.as_short[1] == m2.as_short[1])? -1: 0; - res.as_short[2] = (m1.as_short[2] == m2.as_short[2])? -1: 0; - res.as_short[3] = (m1.as_short[3] == m2.as_short[3])? -1: 0; + __res.as_short[0] = (__mu1.as_short[0] == __mu2.as_short[0])? -1: 0; + __res.as_short[1] = (__mu1.as_short[1] == __mu2.as_short[1])? -1: 0; + __res.as_short[2] = (__mu1.as_short[2] == __mu2.as_short[2])? -1: 0; + __res.as_short[3] = (__mu1.as_short[3] == __mu2.as_short[3])? -1: 0; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -834,24 +834,24 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR8 - __vector signed short a, b, c; + __vector signed short __a, __b, __c; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = (__vector signed short)vec_cmpgt (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = (__vector signed short)vec_cmpgt (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_short[0] = (m1.as_short[0] > m2.as_short[0])? -1: 0; - res.as_short[1] = (m1.as_short[1] > m2.as_short[1])? -1: 0; - res.as_short[2] = (m1.as_short[2] > m2.as_short[2])? -1: 0; - res.as_short[3] = (m1.as_short[3] > m2.as_short[3])? -1: 0; + __res.as_short[0] = (__mu1.as_short[0] > __mu2.as_short[0])? -1: 0; + __res.as_short[1] = (__mu1.as_short[1] > __mu2.as_short[1])? -1: 0; + __res.as_short[2] = (__mu1.as_short[2] > __mu2.as_short[2])? -1: 0; + __res.as_short[3] = (__mu1.as_short[3] > __mu2.as_short[3])? -1: 0; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -867,22 +867,22 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 - __vector signed int a, b, c; + __vector signed int __a, __b, __c; - a = (__vector signed int)vec_splats (__m1); - b = (__vector signed int)vec_splats (__m2); - c = (__vector signed int)vec_cmpeq (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed int)vec_splats (__m1); + __b = (__vector signed int)vec_splats (__m2); + __c = (__vector signed int)vec_cmpeq (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_int[0] = (m1.as_int[0] == m2.as_int[0])? -1: 0; - res.as_int[1] = (m1.as_int[1] == m2.as_int[1])? -1: 0; + __res.as_int[0] = (__mu1.as_int[0] == __mu2.as_int[0])? -1: 0; + __res.as_int[1] = (__mu1.as_int[1] == __mu2.as_int[1])? -1: 0; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -896,22 +896,22 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2) { #if _ARCH_PWR9 - __vector signed int a, b, c; + __vector signed int __a, __b, __c; - a = (__vector signed int)vec_splats (__m1); - b = (__vector signed int)vec_splats (__m2); - c = (__vector signed int)vec_cmpgt (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed int)vec_splats (__m1); + __b = (__vector signed int)vec_splats (__m2); + __c = (__vector signed int)vec_cmpgt (__a, __b); + return (__m64) ((__vector long long) __c)[0]; #else - __m64_union m1, m2, res; + __m64_union __mu1, __mu2, __res; - m1.as_m64 = __m1; - m2.as_m64 = __m2; + __mu1.as_m64 = __m1; + __mu2.as_m64 = __m2; - res.as_int[0] = (m1.as_int[0] > m2.as_int[0])? -1: 0; - res.as_int[1] = (m1.as_int[1] > m2.as_int[1])? -1: 0; + __res.as_int[0] = (__mu1.as_int[0] > __mu2.as_int[0])? -1: 0; + __res.as_int[1] = (__mu1.as_int[1] > __mu2.as_int[1])? -1: 0; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -927,12 +927,12 @@ _m_pcmpgtd (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pi8 (__m64 __m1, __m64 __m2) { - __vector signed char a, b, c; + __vector signed char __a, __b, __c; - a = (__vector signed char)vec_splats (__m1); - b = (__vector signed char)vec_splats (__m2); - c = vec_adds (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed char)vec_splats (__m1); + __b = (__vector signed char)vec_splats (__m2); + __c = vec_adds (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -945,12 +945,12 @@ _m_paddsb (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pi16 (__m64 __m1, __m64 __m2) { - __vector signed short a, b, c; + __vector signed short __a, __b, __c; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = vec_adds (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = vec_adds (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -963,12 +963,12 @@ _m_paddsw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pu8 (__m64 __m1, __m64 __m2) { - __vector unsigned char a, b, c; + __vector unsigned char __a, __b, __c; - a = (__vector unsigned char)vec_splats (__m1); - b = (__vector unsigned char)vec_splats (__m2); - c = vec_adds (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector unsigned char)vec_splats (__m1); + __b = (__vector unsigned char)vec_splats (__m2); + __c = vec_adds (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -982,12 +982,12 @@ _m_paddusb (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_adds_pu16 (__m64 __m1, __m64 __m2) { - __vector unsigned short a, b, c; + __vector unsigned short __a, __b, __c; - a = (__vector unsigned short)vec_splats (__m1); - b = (__vector unsigned short)vec_splats (__m2); - c = vec_adds (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector unsigned short)vec_splats (__m1); + __b = (__vector unsigned short)vec_splats (__m2); + __c = vec_adds (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1001,12 +1001,12 @@ _m_paddusw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pi8 (__m64 __m1, __m64 __m2) { - __vector signed char a, b, c; + __vector signed char __a, __b, __c; - a = (__vector signed char)vec_splats (__m1); - b = (__vector signed char)vec_splats (__m2); - c = vec_subs (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed char)vec_splats (__m1); + __b = (__vector signed char)vec_splats (__m2); + __c = vec_subs (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1020,12 +1020,12 @@ _m_psubsb (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pi16 (__m64 __m1, __m64 __m2) { - __vector signed short a, b, c; + __vector signed short __a, __b, __c; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = vec_subs (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = vec_subs (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1039,12 +1039,12 @@ _m_psubsw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pu8 (__m64 __m1, __m64 __m2) { - __vector unsigned char a, b, c; + __vector unsigned char __a, __b, __c; - a = (__vector unsigned char)vec_splats (__m1); - b = (__vector unsigned char)vec_splats (__m2); - c = vec_subs (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector unsigned char)vec_splats (__m1); + __b = (__vector unsigned char)vec_splats (__m2); + __c = vec_subs (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1058,12 +1058,12 @@ _m_psubusb (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_subs_pu16 (__m64 __m1, __m64 __m2) { - __vector unsigned short a, b, c; + __vector unsigned short __a, __b, __c; - a = (__vector unsigned short)vec_splats (__m1); - b = (__vector unsigned short)vec_splats (__m2); - c = vec_subs (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector unsigned short)vec_splats (__m1); + __b = (__vector unsigned short)vec_splats (__m2); + __c = vec_subs (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1078,14 +1078,14 @@ _m_psubusw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_madd_pi16 (__m64 __m1, __m64 __m2) { - __vector signed short a, b; - __vector signed int c; - __vector signed int zero = {0, 0, 0, 0}; + __vector signed short __a, __b; + __vector signed int __c; + __vector signed int __zero = {0, 0, 0, 0}; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = vec_vmsumshm (a, b, zero); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = vec_vmsumshm (__a, __b, __zero); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1098,10 +1098,10 @@ _m_pmaddwd (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_pi16 (__m64 __m1, __m64 __m2) { - __vector signed short a, b; - __vector signed short c; - __vector signed int w0, w1; - __vector unsigned char xform1 = { + __vector signed short __a, __b; + __vector signed short __c; + __vector signed int __w0, __w1; + __vector unsigned char __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F @@ -1111,14 +1111,14 @@ _mm_mulhi_pi16 (__m64 __m1, __m64 __m2) #endif }; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); - w0 = vec_vmulesh (a, b); - w1 = vec_vmulosh (a, b); - c = (__vector signed short)vec_perm (w0, w1, xform1); + __w0 = vec_vmulesh (__a, __b); + __w1 = vec_vmulosh (__a, __b); + __c = (__vector signed short)vec_perm (__w0, __w1, __xform1); - return (__m64) ((__vector long long) c)[0]; + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1132,12 +1132,12 @@ _m_pmulhw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mullo_pi16 (__m64 __m1, __m64 __m2) { - __vector signed short a, b, c; + __vector signed short __a, __b, __c; - a = (__vector signed short)vec_splats (__m1); - b = (__vector signed short)vec_splats (__m2); - c = a * b; - return (__m64) ((__vector long long) c)[0]; + __a = (__vector signed short)vec_splats (__m1); + __b = (__vector signed short)vec_splats (__m2); + __c = __a * __b; + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1150,15 +1150,15 @@ _m_pmullw (__m64 __m1, __m64 __m2) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_pi16 (__m64 __m, __m64 __count) { - __vector signed short m, r; - __vector unsigned short c; + __vector signed short __r; + __vector unsigned short __c; if (__count <= 15) { - m = (__vector signed short)vec_splats (__m); - c = (__vector unsigned short)vec_splats ((unsigned short)__count); - r = vec_sl (m, (__vector unsigned short)c); - return (__m64) ((__vector long long) r)[0]; + __r = (__vector signed short)vec_splats (__m); + __c = (__vector unsigned short)vec_splats ((unsigned short)__count); + __r = vec_sl (__r, (__vector unsigned short)__c); + return (__m64) ((__vector long long) __r)[0]; } else return (0); @@ -1187,13 +1187,13 @@ _m_psllwi (__m64 __m, int __count) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sll_pi32 (__m64 __m, __m64 __count) { - __m64_union m, res; + __m64_union __res; - m.as_m64 = __m; + __res.as_m64 = __m; - res.as_int[0] = m.as_int[0] << __count; - res.as_int[1] = m.as_int[1] << __count; - return (res.as_m64); + __res.as_int[0] = __res.as_int[0] << __count; + __res.as_int[1] = __res.as_int[1] << __count; + return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1219,15 +1219,15 @@ _m_pslldi (__m64 __m, int __count) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_pi16 (__m64 __m, __m64 __count) { - __vector signed short m, r; - __vector unsigned short c; + __vector signed short __r; + __vector unsigned short __c; if (__count <= 15) { - m = (__vector signed short)vec_splats (__m); - c = (__vector unsigned short)vec_splats ((unsigned short)__count); - r = vec_sra (m, (__vector unsigned short)c); - return (__m64) ((__vector long long) r)[0]; + __r = (__vector signed short)vec_splats (__m); + __c = (__vector unsigned short)vec_splats ((unsigned short)__count); + __r = vec_sra (__r, (__vector unsigned short)__c); + return (__m64) ((__vector long long) __r)[0]; } else return (0); @@ -1256,13 +1256,13 @@ _m_psrawi (__m64 __m, int __count) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sra_pi32 (__m64 __m, __m64 __count) { - __m64_union m, res; + __m64_union __res; - m.as_m64 = __m; + __res.as_m64 = __m; - res.as_int[0] = m.as_int[0] >> __count; - res.as_int[1] = m.as_int[1] >> __count; - return (res.as_m64); + __res.as_int[0] = __res.as_int[0] >> __count; + __res.as_int[1] = __res.as_int[1] >> __count; + return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1288,15 +1288,15 @@ _m_psradi (__m64 __m, int __count) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_pi16 (__m64 __m, __m64 __count) { - __vector unsigned short m, r; - __vector unsigned short c; + __vector unsigned short __r; + __vector unsigned short __c; if (__count <= 15) { - m = (__vector unsigned short)vec_splats (__m); - c = (__vector unsigned short)vec_splats ((unsigned short)__count); - r = vec_sr (m, (__vector unsigned short)c); - return (__m64) ((__vector long long) r)[0]; + __r = (__vector unsigned short)vec_splats (__m); + __c = (__vector unsigned short)vec_splats ((unsigned short)__count); + __r = vec_sr (__r, (__vector unsigned short)__c); + return (__m64) ((__vector long long) __r)[0]; } else return (0); @@ -1325,13 +1325,13 @@ _m_psrlwi (__m64 __m, int __count) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_srl_pi32 (__m64 __m, __m64 __count) { - __m64_union m, res; + __m64_union __res; - m.as_m64 = __m; + __res.as_m64 = __m; - res.as_int[0] = (unsigned int)m.as_int[0] >> __count; - res.as_int[1] = (unsigned int)m.as_int[1] >> __count; - return (res.as_m64); + __res.as_int[0] = (unsigned int)__res.as_int[0] >> __count; + __res.as_int[1] = (unsigned int)__res.as_int[1] >> __count; + return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1358,24 +1358,24 @@ _m_psrldi (__m64 __m, int __count) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi32 (int __i1, int __i0) { - __m64_union res; + __m64_union __res; - res.as_int[0] = __i0; - res.as_int[1] = __i1; - return (res.as_m64); + __res.as_int[0] = __i0; + __res.as_int[1] = __i1; + return (__res.as_m64); } /* Creates a vector of four 16-bit values; W0 is least significant. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set_pi16 (short __w3, short __w2, short __w1, short __w0) { - __m64_union res; + __m64_union __res; - res.as_short[0] = __w0; - res.as_short[1] = __w1; - res.as_short[2] = __w2; - res.as_short[3] = __w3; - return (res.as_m64); + __res.as_short[0] = __w0; + __res.as_short[1] = __w1; + __res.as_short[2] = __w2; + __res.as_short[3] = __w3; + return (__res.as_m64); } /* Creates a vector of eight 8-bit values; B0 is least significant. */ @@ -1383,28 +1383,28 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_set_pi8 (char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, char __b1, char __b0) { - __m64_union res; + __m64_union __res; - res.as_char[0] = __b0; - res.as_char[1] = __b1; - res.as_char[2] = __b2; - res.as_char[3] = __b3; - res.as_char[4] = __b4; - res.as_char[5] = __b5; - res.as_char[6] = __b6; - res.as_char[7] = __b7; - return (res.as_m64); + __res.as_char[0] = __b0; + __res.as_char[1] = __b1; + __res.as_char[2] = __b2; + __res.as_char[3] = __b3; + __res.as_char[4] = __b4; + __res.as_char[5] = __b5; + __res.as_char[6] = __b6; + __res.as_char[7] = __b7; + return (__res.as_m64); } /* Similar, but with the arguments in reverse order. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_setr_pi32 (int __i0, int __i1) { - __m64_union res; + __m64_union __res; - res.as_int[0] = __i0; - res.as_int[1] = __i1; - return (res.as_m64); + __res.as_int[0] = __i0; + __res.as_int[1] = __i1; + return (__res.as_m64); } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1424,11 +1424,11 @@ _mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3, extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_set1_pi32 (int __i) { - __m64_union res; + __m64_union __res; - res.as_int[0] = __i; - res.as_int[1] = __i; - return (res.as_m64); + __res.as_int[0] = __i; + __res.as_int[1] = __i; + return (__res.as_m64); } /* Creates a vector of four 16-bit values, all elements containing W. */ @@ -1441,13 +1441,13 @@ _mm_set1_pi16 (short __w) w = (__vector signed short)vec_splats (__w); return (__m64) ((__vector long long) w)[0]; #else - __m64_union res; + __m64_union __res; - res.as_short[0] = __w; - res.as_short[1] = __w; - res.as_short[2] = __w; - res.as_short[3] = __w; - return (res.as_m64); + __res.as_short[0] = __w; + __res.as_short[1] = __w; + __res.as_short[2] = __w; + __res.as_short[3] = __w; + return (__res.as_m64); #endif } @@ -1456,22 +1456,22 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_set1_pi8 (signed char __b) { #if _ARCH_PWR8 - __vector signed char b; + __vector signed char __res; - b = (__vector signed char)vec_splats (__b); - return (__m64) ((__vector long long) b)[0]; + __res = (__vector signed char)vec_splats (__b); + return (__m64) ((__vector long long) __res)[0]; #else - __m64_union res; - - res.as_char[0] = __b; - res.as_char[1] = __b; - res.as_char[2] = __b; - res.as_char[3] = __b; - res.as_char[4] = __b; - res.as_char[5] = __b; - res.as_char[6] = __b; - res.as_char[7] = __b; - return (res.as_m64); + __m64_union __res; + + __res.as_char[0] = __b; + __res.as_char[1] = __b; + __res.as_char[2] = __b; + __res.as_char[3] = __b; + __res.as_char[4] = __b; + __res.as_char[5] = __b; + __res.as_char[6] = __b; + __res.as_char[7] = __b; + return (__res.as_m64); #endif } #endif /* _MMINTRIN_H_INCLUDED */ diff --git a/gcc/config/rs6000/option-defaults.h b/gcc/config/rs6000/option-defaults.h index f03694e..2123bfd 100644 --- a/gcc/config/rs6000/option-defaults.h +++ b/gcc/config/rs6000/option-defaults.h @@ -62,3 +62,9 @@ {"cpu_32", "%{" OPT_ARCH32 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ {"cpu_64", "%{" OPT_ARCH64 ":%{!mcpu=*:-mcpu=%(VALUE)}}" }, \ {"float", "%{!msoft-float:%{!mhard-float:-m%(VALUE)-float}}" } + +/* rs6000.md uses OPTION_GLIBC unconditionally, while it is defined only in + linux{,64}.h. Define fallback for other targets here. */ +#ifndef OPTION_GLIBC +#define OPTION_GLIBC 0 +#endif diff --git a/gcc/config/rs6000/pmmintrin.h b/gcc/config/rs6000/pmmintrin.h index bcbca15..e1b5426 100644 --- a/gcc/config/rs6000/pmmintrin.h +++ b/gcc/config/rs6000/pmmintrin.h @@ -58,55 +58,55 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_addsub_ps (__m128 __X, __m128 __Y) { - const __v4sf even_n0 = {-0.0, 0.0, -0.0, 0.0}; - __v4sf even_neg_Y = vec_xor(__Y, even_n0); - return (__m128) vec_add (__X, even_neg_Y); + const __v4sf __even_n0 = {-0.0, 0.0, -0.0, 0.0}; + __v4sf __even_neg_Y = vec_xor(__Y, __even_n0); + return (__m128) vec_add (__X, __even_neg_Y); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_addsub_pd (__m128d __X, __m128d __Y) { - const __v2df even_n0 = {-0.0, 0.0}; - __v2df even_neg_Y = vec_xor(__Y, even_n0); - return (__m128d) vec_add (__X, even_neg_Y); + const __v2df __even_n0 = {-0.0, 0.0}; + __v2df __even_neg_Y = vec_xor(__Y, __even_n0); + return (__m128d) vec_add (__X, __even_neg_Y); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hadd_ps (__m128 __X, __m128 __Y) { - __vector unsigned char xform2 = { + __vector unsigned char __xform2 = { 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B }; - __vector unsigned char xform1 = { + __vector unsigned char __xform1 = { 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F }; - return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2), - vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1)); + return (__m128) vec_add (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2), + vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_hsub_ps (__m128 __X, __m128 __Y) { - __vector unsigned char xform2 = { + __vector unsigned char __xform2 = { 0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B, 0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B }; - __vector unsigned char xform1 = { + __vector unsigned char __xform1 = { 0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F, 0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F }; - return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, xform2), - vec_perm ((__v4sf) __X, (__v4sf) __Y, xform1)); + return (__m128) vec_sub (vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform2), + vec_perm ((__v4sf) __X, (__v4sf) __Y, __xform1)); } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index c65dfb9..28f6e98 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -2045,3 +2045,17 @@ (if_then_else (match_test "TARGET_VSX") (match_operand 0 "reg_or_cint_operand") (match_operand 0 "const_int_operand"))) + +;; Return true if the operand is a valid Mach-O pic address. +;; +(define_predicate "macho_pic_address" + (match_code "const,unspec") +{ + if (GET_CODE (op) == CONST) + op = XEXP (op, 0); + + if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_MACHOPIC_OFFSET) + return CONSTANT_P (XVECEXP (op, 0, 0)); + else + return false; +}) diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc new file mode 100644 index 0000000..5d34c1b --- /dev/null +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -0,0 +1,3723 @@ +/* Target-specific built-in function support for the Power architecture. + See also rs6000-c.c, rs6000-gen-builtins.c, rs6000-builtins.def, and + rs6000-overloads.def. + Note that "normal" builtins (generic math functions, etc.) are handled + in rs6000.c. + + Copyright (C) 2002-2022 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version. + + GCC is distributed in the hope that it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public + License for more details. + + You should have received a copy of the GNU General Public License + along with GCC; see the file COPYING3. If not see + <http://www.gnu.org/licenses/>. */ + +#define IN_TARGET_CODE 1 + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "target.h" +#include "backend.h" +#include "rtl.h" +#include "tree.h" +#include "memmodel.h" +#include "gimple.h" +#include "tm_p.h" +#include "optabs.h" +#include "recog.h" +#include "diagnostic-core.h" +#include "fold-const.h" +#include "stor-layout.h" +#include "calls.h" +#include "varasm.h" +#include "explow.h" +#include "expr.h" +#include "langhooks.h" +#include "gimplify.h" +#include "gimple-fold.h" +#include "gimple-iterator.h" +#include "ssa.h" +#include "tree-ssa-propagate.h" +#include "builtins.h" +#include "tree-vector-builder.h" +#if TARGET_XCOFF +#include "xcoffout.h" /* get declarations of xcoff_*_section_name */ +#endif +#include "ppc-auxv.h" +#include "rs6000-internal.h" + +/* Built in types. */ +tree rs6000_builtin_types[RS6000_BTI_MAX]; + +/* Support targetm.vectorize.builtin_mask_for_load. */ +tree altivec_builtin_mask_for_load; + +/* **** General support functions **** */ + +/* Raise an error message for a builtin function that is called without the + appropriate target options being set. */ + +void +rs6000_invalid_builtin (enum rs6000_gen_builtins fncode) +{ + size_t j = (size_t) fncode; + const char *name = rs6000_builtin_info[j].bifname; + + switch (rs6000_builtin_info[j].enable) + { + case ENB_P5: + error ("%qs requires the %qs option", name, "-mcpu=power5"); + break; + case ENB_P6: + error ("%qs requires the %qs option", name, "-mcpu=power6"); + break; + case ENB_P6_64: + error ("%qs requires the %qs option and either the %qs or %qs option", + name, "-mcpu=power6", "-m64", "-mpowerpc64"); + break; + case ENB_ALTIVEC: + error ("%qs requires the %qs option", name, "-maltivec"); + break; + case ENB_CELL: + error ("%qs requires the %qs option", name, "-mcpu=cell"); + break; + case ENB_VSX: + error ("%qs requires the %qs option", name, "-mvsx"); + break; + case ENB_P7: + error ("%qs requires the %qs option", name, "-mcpu=power7"); + break; + case ENB_P7_64: + error ("%qs requires the %qs option and either the %qs or %qs option", + name, "-mcpu=power7", "-m64", "-mpowerpc64"); + break; + case ENB_P8: + error ("%qs requires the %qs option", name, "-mcpu=power8"); + break; + case ENB_P8V: + error ("%qs requires the %qs and %qs options", name, "-mcpu=power8", + "-mvsx"); + break; + case ENB_P9: + error ("%qs requires the %qs option", name, "-mcpu=power9"); + break; + case ENB_P9_64: + error ("%qs requires the %qs option and either the %qs or %qs option", + name, "-mcpu=power9", "-m64", "-mpowerpc64"); + break; + case ENB_P9V: + error ("%qs requires the %qs and %qs options", name, "-mcpu=power9", + "-mvsx"); + break; + case ENB_IEEE128_HW: + error ("%qs requires quad-precision floating-point arithmetic", name); + break; + case ENB_DFP: + error ("%qs requires the %qs option", name, "-mhard-dfp"); + break; + case ENB_CRYPTO: + error ("%qs requires the %qs option", name, "-mcrypto"); + break; + case ENB_HTM: + error ("%qs requires the %qs option", name, "-mhtm"); + break; + case ENB_P10: + error ("%qs requires the %qs option", name, "-mcpu=power10"); + break; + case ENB_P10_64: + error ("%qs requires the %qs option and either the %qs or %qs option", + name, "-mcpu=power10", "-m64", "-mpowerpc64"); + break; + case ENB_MMA: + error ("%qs requires the %qs option", name, "-mmma"); + break; + default: + case ENB_ALWAYS: + gcc_unreachable (); + } +} + +/* Check whether a builtin function is supported in this target + configuration. */ +bool +rs6000_builtin_is_supported (enum rs6000_gen_builtins fncode) +{ + switch (rs6000_builtin_info[(size_t) fncode].enable) + { + case ENB_ALWAYS: + return true; + case ENB_P5: + return TARGET_POPCNTB; + case ENB_P6: + return TARGET_CMPB; + case ENB_P6_64: + return TARGET_CMPB && TARGET_POWERPC64; + case ENB_P7: + return TARGET_POPCNTD; + case ENB_P7_64: + return TARGET_POPCNTD && TARGET_POWERPC64; + case ENB_P8: + return TARGET_DIRECT_MOVE; + case ENB_P8V: + return TARGET_P8_VECTOR; + case ENB_P9: + return TARGET_MODULO; + case ENB_P9_64: + return TARGET_MODULO && TARGET_POWERPC64; + case ENB_P9V: + return TARGET_P9_VECTOR; + case ENB_P10: + return TARGET_POWER10; + case ENB_P10_64: + return TARGET_POWER10 && TARGET_POWERPC64; + case ENB_ALTIVEC: + return TARGET_ALTIVEC; + case ENB_VSX: + return TARGET_VSX; + case ENB_CELL: + return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL; + case ENB_IEEE128_HW: + return TARGET_FLOAT128_HW; + case ENB_DFP: + return TARGET_DFP; + case ENB_CRYPTO: + return TARGET_CRYPTO; + case ENB_HTM: + return TARGET_HTM; + case ENB_MMA: + return TARGET_MMA; + default: + gcc_unreachable (); + } + gcc_unreachable (); +} + +/* Target hook for early folding of built-ins, shamelessly stolen + from ia64.cc. */ + +tree +rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED, + int n_args ATTRIBUTE_UNUSED, + tree *args ATTRIBUTE_UNUSED, + bool ignore ATTRIBUTE_UNUSED) +{ +#ifdef SUBTARGET_FOLD_BUILTIN + return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); +#else + return NULL_TREE; +#endif +} + +tree +rs6000_builtin_decl (unsigned code, bool /* initialize_p */) +{ + rs6000_gen_builtins fcode = (rs6000_gen_builtins) code; + + if (fcode >= RS6000_OVLD_MAX) + return error_mark_node; + + return rs6000_builtin_decls[code]; +} + +/* Implement targetm.vectorize.builtin_mask_for_load. */ +tree +rs6000_builtin_mask_for_load (void) +{ + /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ + if ((TARGET_ALTIVEC && !TARGET_VSX) + || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) + return altivec_builtin_mask_for_load; + else + return 0; +} + +/* Implement targetm.vectorize.builtin_md_vectorized_function. */ + +tree +rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, + tree type_in) +{ + machine_mode in_mode, out_mode; + int in_n, out_n; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, + "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n", + IDENTIFIER_POINTER (DECL_NAME (fndecl)), + GET_MODE_NAME (TYPE_MODE (type_out)), + GET_MODE_NAME (TYPE_MODE (type_in))); + + /* TODO: Should this be gcc_assert? */ + if (TREE_CODE (type_out) != VECTOR_TYPE + || TREE_CODE (type_in) != VECTOR_TYPE) + return NULL_TREE; + + out_mode = TYPE_MODE (TREE_TYPE (type_out)); + out_n = TYPE_VECTOR_SUBPARTS (type_out); + in_mode = TYPE_MODE (TREE_TYPE (type_in)); + in_n = TYPE_VECTOR_SUBPARTS (type_in); + + enum rs6000_gen_builtins fn + = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); + switch (fn) + { + case RS6000_BIF_RSQRTF: + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[RS6000_BIF_VRSQRTFP]; + break; + case RS6000_BIF_RSQRT: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF]; + break; + case RS6000_BIF_RECIPF: + if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) + && out_mode == SFmode && out_n == 4 + && in_mode == SFmode && in_n == 4) + return rs6000_builtin_decls[RS6000_BIF_VRECIPFP]; + break; + case RS6000_BIF_RECIP: + if (VECTOR_UNIT_VSX_P (V2DFmode) + && out_mode == DFmode && out_n == 2 + && in_mode == DFmode && in_n == 2) + return rs6000_builtin_decls[RS6000_BIF_RECIP_V2DF]; + break; + default: + break; + } + + machine_mode in_vmode = TYPE_MODE (type_in); + machine_mode out_vmode = TYPE_MODE (type_out); + + /* Power10 supported vectorized built-in functions. */ + if (TARGET_POWER10 + && in_vmode == out_vmode + && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode)) + { + machine_mode exp_mode = DImode; + machine_mode exp_vmode = V2DImode; + enum rs6000_gen_builtins bif; + switch (fn) + { + case RS6000_BIF_DIVWE: + case RS6000_BIF_DIVWEU: + exp_mode = SImode; + exp_vmode = V4SImode; + if (fn == RS6000_BIF_DIVWE) + bif = RS6000_BIF_VDIVESW; + else + bif = RS6000_BIF_VDIVEUW; + break; + case RS6000_BIF_DIVDE: + case RS6000_BIF_DIVDEU: + if (fn == RS6000_BIF_DIVDE) + bif = RS6000_BIF_VDIVESD; + else + bif = RS6000_BIF_VDIVEUD; + break; + case RS6000_BIF_CFUGED: + bif = RS6000_BIF_VCFUGED; + break; + case RS6000_BIF_CNTLZDM: + bif = RS6000_BIF_VCLZDM; + break; + case RS6000_BIF_CNTTZDM: + bif = RS6000_BIF_VCTZDM; + break; + case RS6000_BIF_PDEPD: + bif = RS6000_BIF_VPDEPD; + break; + case RS6000_BIF_PEXTD: + bif = RS6000_BIF_VPEXTD; + break; + default: + return NULL_TREE; + } + + if (in_mode == exp_mode && in_vmode == exp_vmode) + return rs6000_builtin_decls[bif]; + } + + return NULL_TREE; +} + +/* Returns a code for a target-specific builtin that implements + reciprocal of the function, or NULL_TREE if not available. */ + +tree +rs6000_builtin_reciprocal (tree fndecl) +{ + switch (DECL_MD_FUNCTION_CODE (fndecl)) + { + case RS6000_BIF_XVSQRTDP: + if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) + return NULL_TREE; + + return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF]; + + case RS6000_BIF_XVSQRTSP: + if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) + return NULL_TREE; + + return rs6000_builtin_decls[RS6000_BIF_RSQRT_4SF]; + + default: + return NULL_TREE; + } +} + +/* **** Initialization support **** */ + +/* Create a builtin vector type with a name. Taking care not to give + the canonical type a name. */ + +static tree +rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts) +{ + tree result = build_vector_type (elt_type, num_elts); + + /* Copy so we don't give the canonical type a name. */ + result = build_variant_type_copy (result); + + add_builtin_type (name, result); + + return result; +} + +/* Debug utility to translate a type node to a single textual token. */ +static +const char *rs6000_type_string (tree type_node) +{ + if (type_node == void_type_node) + return "void"; + else if (type_node == long_integer_type_node) + return "long"; + else if (type_node == long_unsigned_type_node) + return "ulong"; + else if (type_node == long_long_integer_type_node) + return "longlong"; + else if (type_node == long_long_unsigned_type_node) + return "ulonglong"; + else if (type_node == bool_V2DI_type_node) + return "vbll"; + else if (type_node == bool_V4SI_type_node) + return "vbi"; + else if (type_node == bool_V8HI_type_node) + return "vbs"; + else if (type_node == bool_V16QI_type_node) + return "vbc"; + else if (type_node == bool_int_type_node) + return "bool"; + else if (type_node == dfloat64_type_node) + return "_Decimal64"; + else if (type_node == double_type_node) + return "double"; + else if (type_node == intDI_type_node) + return "sll"; + else if (type_node == intHI_type_node) + return "ss"; + else if (type_node == ibm128_float_type_node) + return "__ibm128"; + else if (type_node == opaque_V4SI_type_node) + return "opaque"; + else if (POINTER_TYPE_P (type_node)) + return "void*"; + else if (type_node == intQI_type_node || type_node == char_type_node) + return "sc"; + else if (type_node == dfloat32_type_node) + return "_Decimal32"; + else if (type_node == float_type_node) + return "float"; + else if (type_node == intSI_type_node || type_node == integer_type_node) + return "si"; + else if (type_node == dfloat128_type_node) + return "_Decimal128"; + else if (type_node == long_double_type_node) + return "longdouble"; + else if (type_node == intTI_type_node) + return "sq"; + else if (type_node == unsigned_intDI_type_node) + return "ull"; + else if (type_node == unsigned_intHI_type_node) + return "us"; + else if (type_node == unsigned_intQI_type_node) + return "uc"; + else if (type_node == unsigned_intSI_type_node) + return "ui"; + else if (type_node == unsigned_intTI_type_node) + return "uq"; + else if (type_node == unsigned_V1TI_type_node) + return "vuq"; + else if (type_node == unsigned_V2DI_type_node) + return "vull"; + else if (type_node == unsigned_V4SI_type_node) + return "vui"; + else if (type_node == unsigned_V8HI_type_node) + return "vus"; + else if (type_node == unsigned_V16QI_type_node) + return "vuc"; + else if (type_node == V16QI_type_node) + return "vsc"; + else if (type_node == V1TI_type_node) + return "vsq"; + else if (type_node == V2DF_type_node) + return "vd"; + else if (type_node == V2DI_type_node) + return "vsll"; + else if (type_node == V4SF_type_node) + return "vf"; + else if (type_node == V4SI_type_node) + return "vsi"; + else if (type_node == V8HI_type_node) + return "vss"; + else if (type_node == pixel_V8HI_type_node) + return "vp"; + else if (type_node == pcvoid_type_node) + return "voidc*"; + else if (type_node == float128_type_node) + return "_Float128"; + else if (type_node == vector_pair_type_node) + return "__vector_pair"; + else if (type_node == vector_quad_type_node) + return "__vector_quad"; + + return "unknown"; +} + +void +rs6000_init_builtins (void) +{ + tree tdecl; + tree t; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_init_builtins%s%s\n", + (TARGET_ALTIVEC) ? ", altivec" : "", + (TARGET_VSX) ? ", vsx" : ""); + + V2DI_type_node = rs6000_vector_type ("__vector long long", + long_long_integer_type_node, 2); + ptr_V2DI_type_node + = build_pointer_type (build_qualified_type (V2DI_type_node, + TYPE_QUAL_CONST)); + + V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2); + ptr_V2DF_type_node + = build_pointer_type (build_qualified_type (V2DF_type_node, + TYPE_QUAL_CONST)); + + V4SI_type_node = rs6000_vector_type ("__vector signed int", + intSI_type_node, 4); + ptr_V4SI_type_node + = build_pointer_type (build_qualified_type (V4SI_type_node, + TYPE_QUAL_CONST)); + + V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4); + ptr_V4SF_type_node + = build_pointer_type (build_qualified_type (V4SF_type_node, + TYPE_QUAL_CONST)); + + V8HI_type_node = rs6000_vector_type ("__vector signed short", + intHI_type_node, 8); + ptr_V8HI_type_node + = build_pointer_type (build_qualified_type (V8HI_type_node, + TYPE_QUAL_CONST)); + + V16QI_type_node = rs6000_vector_type ("__vector signed char", + intQI_type_node, 16); + ptr_V16QI_type_node + = build_pointer_type (build_qualified_type (V16QI_type_node, + TYPE_QUAL_CONST)); + + unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char", + unsigned_intQI_type_node, 16); + ptr_unsigned_V16QI_type_node + = build_pointer_type (build_qualified_type (unsigned_V16QI_type_node, + TYPE_QUAL_CONST)); + + unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short", + unsigned_intHI_type_node, 8); + ptr_unsigned_V8HI_type_node + = build_pointer_type (build_qualified_type (unsigned_V8HI_type_node, + TYPE_QUAL_CONST)); + + unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int", + unsigned_intSI_type_node, 4); + ptr_unsigned_V4SI_type_node + = build_pointer_type (build_qualified_type (unsigned_V4SI_type_node, + TYPE_QUAL_CONST)); + + unsigned_V2DI_type_node + = rs6000_vector_type ("__vector unsigned long long", + long_long_unsigned_type_node, 2); + + ptr_unsigned_V2DI_type_node + = build_pointer_type (build_qualified_type (unsigned_V2DI_type_node, + TYPE_QUAL_CONST)); + + opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4); + + const_str_type_node + = build_pointer_type (build_qualified_type (char_type_node, + TYPE_QUAL_CONST)); + + /* We use V1TI mode as a special container to hold __int128_t items that + must live in VSX registers. */ + if (intTI_type_node) + { + V1TI_type_node = rs6000_vector_type ("__vector __int128", + intTI_type_node, 1); + ptr_V1TI_type_node + = build_pointer_type (build_qualified_type (V1TI_type_node, + TYPE_QUAL_CONST)); + unsigned_V1TI_type_node + = rs6000_vector_type ("__vector unsigned __int128", + unsigned_intTI_type_node, 1); + ptr_unsigned_V1TI_type_node + = build_pointer_type (build_qualified_type (unsigned_V1TI_type_node, + TYPE_QUAL_CONST)); + } + + /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...' + types, especially in C++ land. Similarly, 'vector pixel' is distinct from + 'vector unsigned short'. */ + + bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node); + bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node); + bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node); + bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node); + pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node); + + long_integer_type_internal_node = long_integer_type_node; + long_unsigned_type_internal_node = long_unsigned_type_node; + long_long_integer_type_internal_node = long_long_integer_type_node; + long_long_unsigned_type_internal_node = long_long_unsigned_type_node; + intQI_type_internal_node = intQI_type_node; + uintQI_type_internal_node = unsigned_intQI_type_node; + intHI_type_internal_node = intHI_type_node; + uintHI_type_internal_node = unsigned_intHI_type_node; + intSI_type_internal_node = intSI_type_node; + uintSI_type_internal_node = unsigned_intSI_type_node; + intDI_type_internal_node = intDI_type_node; + uintDI_type_internal_node = unsigned_intDI_type_node; + intTI_type_internal_node = intTI_type_node; + uintTI_type_internal_node = unsigned_intTI_type_node; + float_type_internal_node = float_type_node; + double_type_internal_node = double_type_node; + long_double_type_internal_node = long_double_type_node; + dfloat64_type_internal_node = dfloat64_type_node; + dfloat128_type_internal_node = dfloat128_type_node; + void_type_internal_node = void_type_node; + + ptr_intQI_type_node + = build_pointer_type (build_qualified_type (intQI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_uintQI_type_node + = build_pointer_type (build_qualified_type (uintQI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_intHI_type_node + = build_pointer_type (build_qualified_type (intHI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_uintHI_type_node + = build_pointer_type (build_qualified_type (uintHI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_intSI_type_node + = build_pointer_type (build_qualified_type (intSI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_uintSI_type_node + = build_pointer_type (build_qualified_type (uintSI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_intDI_type_node + = build_pointer_type (build_qualified_type (intDI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_uintDI_type_node + = build_pointer_type (build_qualified_type (uintDI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_intTI_type_node + = build_pointer_type (build_qualified_type (intTI_type_internal_node, + TYPE_QUAL_CONST)); + ptr_uintTI_type_node + = build_pointer_type (build_qualified_type (uintTI_type_internal_node, + TYPE_QUAL_CONST)); + + t = build_qualified_type (long_integer_type_internal_node, TYPE_QUAL_CONST); + ptr_long_integer_type_node = build_pointer_type (t); + + t = build_qualified_type (long_unsigned_type_internal_node, TYPE_QUAL_CONST); + ptr_long_unsigned_type_node = build_pointer_type (t); + + ptr_float_type_node + = build_pointer_type (build_qualified_type (float_type_internal_node, + TYPE_QUAL_CONST)); + ptr_double_type_node + = build_pointer_type (build_qualified_type (double_type_internal_node, + TYPE_QUAL_CONST)); + ptr_long_double_type_node + = build_pointer_type (build_qualified_type (long_double_type_internal_node, + TYPE_QUAL_CONST)); + if (dfloat64_type_node) + { + t = build_qualified_type (dfloat64_type_internal_node, TYPE_QUAL_CONST); + ptr_dfloat64_type_node = build_pointer_type (t); + } + else + ptr_dfloat64_type_node = NULL; + + if (dfloat128_type_node) + { + t = build_qualified_type (dfloat128_type_internal_node, TYPE_QUAL_CONST); + ptr_dfloat128_type_node = build_pointer_type (t); + } + else + ptr_dfloat128_type_node = NULL; + + t = build_qualified_type (long_long_integer_type_internal_node, + TYPE_QUAL_CONST); + ptr_long_long_integer_type_node = build_pointer_type (t); + + t = build_qualified_type (long_long_unsigned_type_internal_node, + TYPE_QUAL_CONST); + ptr_long_long_unsigned_type_node = build_pointer_type (t); + + /* 128-bit floating point support. KFmode is IEEE 128-bit floating point. + IFmode is the IBM extended 128-bit format that is a pair of doubles. + TFmode will be either IEEE 128-bit floating point or the IBM double-double + format that uses a pair of doubles, depending on the switches and + defaults. + + If we don't support for either 128-bit IBM double double or IEEE 128-bit + floating point, we need make sure the type is non-zero or else self-test + fails during bootstrap. + + Always create __ibm128 as a separate type, even if the current long double + format is IBM extended double. + + For IEEE 128-bit floating point, always create the type __ieee128. If the + user used -mfloat128, rs6000-c.cc will create a define from __float128 to + __ieee128. */ + if (TARGET_FLOAT128_TYPE) + { + if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128) + ibm128_float_type_node = long_double_type_node; + else + { + ibm128_float_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (ibm128_float_type_node) = 128; + SET_TYPE_MODE (ibm128_float_type_node, IFmode); + layout_type (ibm128_float_type_node); + } + t = build_qualified_type (ibm128_float_type_node, TYPE_QUAL_CONST); + ptr_ibm128_float_type_node = build_pointer_type (t); + lang_hooks.types.register_builtin_type (ibm128_float_type_node, + "__ibm128"); + + if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128) + ieee128_float_type_node = long_double_type_node; + else + ieee128_float_type_node = float128_type_node; + t = build_qualified_type (ieee128_float_type_node, TYPE_QUAL_CONST); + ptr_ieee128_float_type_node = build_pointer_type (t); + lang_hooks.types.register_builtin_type (ieee128_float_type_node, + "__ieee128"); + } + + else + ieee128_float_type_node = ibm128_float_type_node = long_double_type_node; + + /* Vector pair and vector quad support. */ + vector_pair_type_node = make_node (OPAQUE_TYPE); + SET_TYPE_MODE (vector_pair_type_node, OOmode); + TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode)); + TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode); + TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode)); + SET_TYPE_ALIGN (vector_pair_type_node, 256); + TYPE_USER_ALIGN (vector_pair_type_node) = 0; + lang_hooks.types.register_builtin_type (vector_pair_type_node, + "__vector_pair"); + t = build_qualified_type (vector_pair_type_node, TYPE_QUAL_CONST); + ptr_vector_pair_type_node = build_pointer_type (t); + + vector_quad_type_node = make_node (OPAQUE_TYPE); + SET_TYPE_MODE (vector_quad_type_node, XOmode); + TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode)); + TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode); + TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode)); + SET_TYPE_ALIGN (vector_quad_type_node, 512); + TYPE_USER_ALIGN (vector_quad_type_node) = 0; + lang_hooks.types.register_builtin_type (vector_quad_type_node, + "__vector_quad"); + t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST); + ptr_vector_quad_type_node = build_pointer_type (t); + + tdecl = add_builtin_type ("__bool char", bool_char_type_node); + TYPE_NAME (bool_char_type_node) = tdecl; + + tdecl = add_builtin_type ("__bool short", bool_short_type_node); + TYPE_NAME (bool_short_type_node) = tdecl; + + tdecl = add_builtin_type ("__bool int", bool_int_type_node); + TYPE_NAME (bool_int_type_node) = tdecl; + + tdecl = add_builtin_type ("__pixel", pixel_type_node); + TYPE_NAME (pixel_type_node) = tdecl; + + bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char", + bool_char_type_node, 16); + ptr_bool_V16QI_type_node + = build_pointer_type (build_qualified_type (bool_V16QI_type_node, + TYPE_QUAL_CONST)); + + bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short", + bool_short_type_node, 8); + ptr_bool_V8HI_type_node + = build_pointer_type (build_qualified_type (bool_V8HI_type_node, + TYPE_QUAL_CONST)); + + bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int", + bool_int_type_node, 4); + ptr_bool_V4SI_type_node + = build_pointer_type (build_qualified_type (bool_V4SI_type_node, + TYPE_QUAL_CONST)); + + bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 + ? "__vector __bool long" + : "__vector __bool long long", + bool_long_long_type_node, 2); + ptr_bool_V2DI_type_node + = build_pointer_type (build_qualified_type (bool_V2DI_type_node, + TYPE_QUAL_CONST)); + + bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128", + intTI_type_node, 1); + ptr_bool_V1TI_type_node + = build_pointer_type (build_qualified_type (bool_V1TI_type_node, + TYPE_QUAL_CONST)); + + pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", + pixel_type_node, 8); + ptr_pixel_V8HI_type_node + = build_pointer_type (build_qualified_type (pixel_V8HI_type_node, + TYPE_QUAL_CONST)); + pcvoid_type_node + = build_pointer_type (build_qualified_type (void_type_node, + TYPE_QUAL_CONST)); + + /* Execute the autogenerated initialization code for builtins. */ + rs6000_init_generated_builtins (); + + if (TARGET_DEBUG_BUILTIN) + { + fprintf (stderr, "\nAutogenerated built-in functions:\n\n"); + for (int i = 1; i < (int) RS6000_BIF_MAX; i++) + { + bif_enable e = rs6000_builtin_info[i].enable; + if (e == ENB_P5 && !TARGET_POPCNTB) + continue; + if (e == ENB_P6 && !TARGET_CMPB) + continue; + if (e == ENB_P6_64 && !(TARGET_CMPB && TARGET_POWERPC64)) + continue; + if (e == ENB_ALTIVEC && !TARGET_ALTIVEC) + continue; + if (e == ENB_VSX && !TARGET_VSX) + continue; + if (e == ENB_P7 && !TARGET_POPCNTD) + continue; + if (e == ENB_P7_64 && !(TARGET_POPCNTD && TARGET_POWERPC64)) + continue; + if (e == ENB_P8 && !TARGET_DIRECT_MOVE) + continue; + if (e == ENB_P8V && !TARGET_P8_VECTOR) + continue; + if (e == ENB_P9 && !TARGET_MODULO) + continue; + if (e == ENB_P9_64 && !(TARGET_MODULO && TARGET_POWERPC64)) + continue; + if (e == ENB_P9V && !TARGET_P9_VECTOR) + continue; + if (e == ENB_IEEE128_HW && !TARGET_FLOAT128_HW) + continue; + if (e == ENB_DFP && !TARGET_DFP) + continue; + if (e == ENB_CRYPTO && !TARGET_CRYPTO) + continue; + if (e == ENB_HTM && !TARGET_HTM) + continue; + if (e == ENB_P10 && !TARGET_POWER10) + continue; + if (e == ENB_P10_64 && !(TARGET_POWER10 && TARGET_POWERPC64)) + continue; + if (e == ENB_MMA && !TARGET_MMA) + continue; + tree fntype = rs6000_builtin_info[i].fntype; + tree t = TREE_TYPE (fntype); + fprintf (stderr, "%s %s (", rs6000_type_string (t), + rs6000_builtin_info[i].bifname); + t = TYPE_ARG_TYPES (fntype); + while (t && TREE_VALUE (t) != void_type_node) + { + fprintf (stderr, "%s", + rs6000_type_string (TREE_VALUE (t))); + t = TREE_CHAIN (t); + if (t && TREE_VALUE (t) != void_type_node) + fprintf (stderr, ", "); + } + fprintf (stderr, "); %s [%4d]\n", + rs6000_builtin_info[i].attr_string, (int) i); + } + fprintf (stderr, "\nEnd autogenerated built-in functions.\n\n\n"); + } + + if (TARGET_XCOFF) + { + /* AIX libm provides clog as __clog. */ + if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) + set_user_assembler_name (tdecl, "__clog"); + + /* When long double is 64 bit, some long double builtins of libc + functions (like __builtin_frexpl) must call the double version + (frexp) not the long double version (frexpl) that expects a 128 bit + argument. */ + if (! TARGET_LONG_DOUBLE_128) + { + if ((tdecl = builtin_decl_explicit (BUILT_IN_FMODL)) != NULL_TREE) + set_user_assembler_name (tdecl, "fmod"); + if ((tdecl = builtin_decl_explicit (BUILT_IN_FREXPL)) != NULL_TREE) + set_user_assembler_name (tdecl, "frexp"); + if ((tdecl = builtin_decl_explicit (BUILT_IN_LDEXPL)) != NULL_TREE) + set_user_assembler_name (tdecl, "ldexp"); + if ((tdecl = builtin_decl_explicit (BUILT_IN_MODFL)) != NULL_TREE) + set_user_assembler_name (tdecl, "modf"); + } + } + + altivec_builtin_mask_for_load + = rs6000_builtin_decls[RS6000_BIF_MASK_FOR_LOAD]; + +#ifdef SUBTARGET_INIT_BUILTINS + SUBTARGET_INIT_BUILTINS; +#endif + + return; +} + +/* **** GIMPLE folding support **** */ + +/* Helper function to handle the gimple folding of a vector compare + operation. This sets up true/false vectors, and uses the + VEC_COND_EXPR operation. + CODE indicates which comparison is to be made. (EQ, GT, ...). + TYPE indicates the type of the result. + Code is inserted before GSI. */ +static tree +fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1, + gimple_stmt_iterator *gsi) +{ + tree cmp_type = truth_type_for (type); + tree zero_vec = build_zero_cst (type); + tree minus_one_vec = build_minus_one_cst (type); + tree temp = create_tmp_reg_or_ssa_name (cmp_type); + gimple *g = gimple_build_assign (temp, code, arg0, arg1); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec); +} + +/* Helper function to handle the in-between steps for the + vector compare built-ins. */ +static void +fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt) +{ + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + tree lhs = gimple_call_lhs (stmt); + tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1, gsi); + gimple *g = gimple_build_assign (lhs, cmp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); +} + +/* Helper function to map V2DF and V4SF types to their + integral equivalents (V2DI and V4SI). */ +tree map_to_integral_tree_type (tree input_tree_type) +{ + if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type))) + return input_tree_type; + else + { + if (types_compatible_p (TREE_TYPE (input_tree_type), + TREE_TYPE (V2DF_type_node))) + return V2DI_type_node; + else if (types_compatible_p (TREE_TYPE (input_tree_type), + TREE_TYPE (V4SF_type_node))) + return V4SI_type_node; + else + gcc_unreachable (); + } +} + +/* Helper function to handle the vector merge[hl] built-ins. The + implementation difference between h and l versions for this code are in + the values used when building of the permute vector for high word versus + low word merge. The variance is keyed off the use_high parameter. */ +static void +fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high) +{ + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + tree lhs = gimple_call_lhs (stmt); + tree lhs_type = TREE_TYPE (lhs); + int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); + int midpoint = n_elts / 2; + int offset = 0; + + if (use_high == 1) + offset = midpoint; + + /* The permute_type will match the lhs for integral types. For double and + float types, the permute type needs to map to the V2 or V4 type that + matches size. */ + tree permute_type; + permute_type = map_to_integral_tree_type (lhs_type); + tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1); + + for (int i = 0; i < midpoint; i++) + { + elts.safe_push (build_int_cst (TREE_TYPE (permute_type), + offset + i)); + elts.safe_push (build_int_cst (TREE_TYPE (permute_type), + offset + n_elts + i)); + } + + tree permute = elts.build (); + + gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); +} + +/* Helper function to handle the vector merge[eo] built-ins. */ +static void +fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd) +{ + tree arg0 = gimple_call_arg (stmt, 0); + tree arg1 = gimple_call_arg (stmt, 1); + tree lhs = gimple_call_lhs (stmt); + tree lhs_type = TREE_TYPE (lhs); + int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); + + /* The permute_type will match the lhs for integral types. For double and + float types, the permute type needs to map to the V2 or V4 type that + matches size. */ + tree permute_type; + permute_type = map_to_integral_tree_type (lhs_type); + + tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1); + + /* Build the permute vector. */ + for (int i = 0; i < n_elts / 2; i++) + { + elts.safe_push (build_int_cst (TREE_TYPE (permute_type), + 2*i + use_odd)); + elts.safe_push (build_int_cst (TREE_TYPE (permute_type), + 2*i + use_odd + n_elts)); + } + + tree permute = elts.build (); + + gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); +} + +/* Helper function to sort out which built-ins may be valid without having + a LHS. */ +static bool +rs6000_builtin_valid_without_lhs (enum rs6000_gen_builtins fn_code, + tree fndecl) +{ + if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node) + return true; + + switch (fn_code) + { + case RS6000_BIF_STVX_V16QI: + case RS6000_BIF_STVX_V8HI: + case RS6000_BIF_STVX_V4SI: + case RS6000_BIF_STVX_V4SF: + case RS6000_BIF_STVX_V2DI: + case RS6000_BIF_STVX_V2DF: + case RS6000_BIF_STXVW4X_V16QI: + case RS6000_BIF_STXVW4X_V8HI: + case RS6000_BIF_STXVW4X_V4SF: + case RS6000_BIF_STXVW4X_V4SI: + case RS6000_BIF_STXVD2X_V2DF: + case RS6000_BIF_STXVD2X_V2DI: + return true; + default: + return false; + } +} + +/* Expand the MMA built-ins early, so that we can convert the pass-by-reference + __vector_quad arguments into pass-by-value arguments, leading to more + efficient code generation. */ +static bool +rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, + rs6000_gen_builtins fn_code) +{ + gimple *stmt = gsi_stmt (*gsi); + size_t fncode = (size_t) fn_code; + + if (!bif_is_mma (rs6000_builtin_info[fncode])) + return false; + + /* Each call that can be gimple-expanded has an associated built-in + function that it will expand into. If this one doesn't, we have + already expanded it! Exceptions: lxvp and stxvp. */ + if (rs6000_builtin_info[fncode].assoc_bif == RS6000_BIF_NONE + && fncode != RS6000_BIF_LXVP + && fncode != RS6000_BIF_STXVP) + return false; + + bifdata *bd = &rs6000_builtin_info[fncode]; + unsigned nopnds = bd->nargs; + gimple_seq new_seq = NULL; + gimple *new_call; + tree new_decl; + + /* Compatibility built-ins; we used to call these + __builtin_mma_{dis,}assemble_pair, but now we call them + __builtin_vsx_{dis,}assemble_pair. Handle the old versions. */ + if (fncode == RS6000_BIF_ASSEMBLE_PAIR) + fncode = RS6000_BIF_ASSEMBLE_PAIR_V; + else if (fncode == RS6000_BIF_DISASSEMBLE_PAIR) + fncode = RS6000_BIF_DISASSEMBLE_PAIR_V; + + if (fncode == RS6000_BIF_DISASSEMBLE_ACC + || fncode == RS6000_BIF_DISASSEMBLE_PAIR_V) + { + /* This is an MMA disassemble built-in function. */ + push_gimplify_context (true); + unsigned nvec = (fncode == RS6000_BIF_DISASSEMBLE_ACC) ? 4 : 2; + tree dst_ptr = gimple_call_arg (stmt, 0); + tree src_ptr = gimple_call_arg (stmt, 1); + tree src_type = TREE_TYPE (src_ptr); + tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type)); + gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq); + + /* If we are not disassembling an accumulator/pair or our destination is + another accumulator/pair, then just copy the entire thing as is. */ + if ((fncode == RS6000_BIF_DISASSEMBLE_ACC + && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node) + || (fncode == RS6000_BIF_DISASSEMBLE_PAIR_V + && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node)) + { + tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR, + src_type, dst_ptr)); + gimplify_assign (dst, src, &new_seq); + pop_gimplify_context (NULL); + gsi_replace_with_seq (gsi, new_seq, true); + return true; + } + + /* If we're disassembling an accumulator into a different type, we need + to emit a xxmfacc instruction now, since we cannot do it later. */ + if (fncode == RS6000_BIF_DISASSEMBLE_ACC) + { + new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL]; + new_call = gimple_build_call (new_decl, 1, src); + src = create_tmp_reg_or_ssa_name (vector_quad_type_node); + gimple_call_set_lhs (new_call, src); + gimple_seq_add_stmt (&new_seq, new_call); + } + + /* Copy the accumulator/pair vector by vector. */ + new_decl + = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif]; + tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node, + ptr_mode, true); + tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr); + for (unsigned i = 0; i < nvec; i++) + { + unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i; + tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base, + build_int_cst (dst_type, index * 16)); + tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node); + new_call = gimple_build_call (new_decl, 2, src, + build_int_cstu (uint16_type_node, i)); + gimple_call_set_lhs (new_call, dstssa); + gimple_seq_add_stmt (&new_seq, new_call); + gimplify_assign (dst, dstssa, &new_seq); + } + pop_gimplify_context (NULL); + gsi_replace_with_seq (gsi, new_seq, true); + return true; + } + + /* TODO: Do some factoring on these two chunks. */ + if (fncode == RS6000_BIF_LXVP) + { + push_gimplify_context (true); + tree offset = gimple_call_arg (stmt, 0); + tree ptr = gimple_call_arg (stmt, 1); + tree lhs = gimple_call_lhs (stmt); + if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) + ptr = build1 (VIEW_CONVERT_EXPR, + build_pointer_type (vector_pair_type_node), ptr); + tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, + TREE_TYPE (ptr), ptr, offset)); + gimplify_assign (lhs, mem, &new_seq); + pop_gimplify_context (NULL); + gsi_replace_with_seq (gsi, new_seq, true); + return true; + } + + if (fncode == RS6000_BIF_STXVP) + { + push_gimplify_context (true); + tree src = gimple_call_arg (stmt, 0); + tree offset = gimple_call_arg (stmt, 1); + tree ptr = gimple_call_arg (stmt, 2); + if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) + ptr = build1 (VIEW_CONVERT_EXPR, + build_pointer_type (vector_pair_type_node), ptr); + tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, + TREE_TYPE (ptr), ptr, offset)); + gimplify_assign (mem, src, &new_seq); + pop_gimplify_context (NULL); + gsi_replace_with_seq (gsi, new_seq, true); + return true; + } + + /* Convert this built-in into an internal version that uses pass-by-value + arguments. The internal built-in is found in the assoc_bif field. */ + new_decl = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif]; + tree lhs, op[MAX_MMA_OPERANDS]; + tree acc = gimple_call_arg (stmt, 0); + push_gimplify_context (true); + + if (bif_is_quad (*bd)) + { + /* This built-in has a pass-by-reference accumulator input, so load it + into a temporary accumulator for use as a pass-by-value input. */ + op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node); + for (unsigned i = 1; i < nopnds; i++) + op[i] = gimple_call_arg (stmt, i); + gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq); + } + else + { + /* This built-in does not use its pass-by-reference accumulator argument + as an input argument, so remove it from the input list. */ + nopnds--; + for (unsigned i = 0; i < nopnds; i++) + op[i] = gimple_call_arg (stmt, i + 1); + } + + switch (nopnds) + { + case 0: + new_call = gimple_build_call (new_decl, 0); + break; + case 1: + new_call = gimple_build_call (new_decl, 1, op[0]); + break; + case 2: + new_call = gimple_build_call (new_decl, 2, op[0], op[1]); + break; + case 3: + new_call = gimple_build_call (new_decl, 3, op[0], op[1], op[2]); + break; + case 4: + new_call = gimple_build_call (new_decl, 4, op[0], op[1], op[2], op[3]); + break; + case 5: + new_call = gimple_build_call (new_decl, 5, op[0], op[1], op[2], op[3], + op[4]); + break; + case 6: + new_call = gimple_build_call (new_decl, 6, op[0], op[1], op[2], op[3], + op[4], op[5]); + break; + case 7: + new_call = gimple_build_call (new_decl, 7, op[0], op[1], op[2], op[3], + op[4], op[5], op[6]); + break; + default: + gcc_unreachable (); + } + + if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V) + lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node); + else + lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node); + gimple_call_set_lhs (new_call, lhs); + gimple_seq_add_stmt (&new_seq, new_call); + gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq); + pop_gimplify_context (NULL); + gsi_replace_with_seq (gsi, new_seq, true); + + return true; +} + +/* Fold a machine-dependent built-in in GIMPLE. (For folding into + a constant, use rs6000_fold_builtin.) */ +bool +rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) +{ + gimple *stmt = gsi_stmt (*gsi); + tree fndecl = gimple_call_fndecl (stmt); + gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); + enum rs6000_gen_builtins fn_code + = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); + tree arg0, arg1, lhs, temp; + enum tree_code bcode; + gimple *g; + + size_t uns_fncode = (size_t) fn_code; + enum insn_code icode = rs6000_builtin_info[uns_fncode].icode; + const char *fn_name1 = rs6000_builtin_info[uns_fncode].bifname; + const char *fn_name2 = (icode != CODE_FOR_nothing) + ? get_insn_name ((int) icode) + : "nothing"; + + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n", + fn_code, fn_name1, fn_name2); + + /* Prevent gimple folding for code that does not have a LHS, unless it is + allowed per the rs6000_builtin_valid_without_lhs helper function. */ + if (!gimple_call_lhs (stmt) + && !rs6000_builtin_valid_without_lhs (fn_code, fndecl)) + return false; + + /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */ + if (!rs6000_builtin_is_supported (fn_code)) + return false; + + if (rs6000_gimple_fold_mma_builtin (gsi, fn_code)) + return true; + + switch (fn_code) + { + /* Flavors of vec_add. We deliberately don't expand + RS6000_BIF_VADDUQM as it gets lowered from V1TImode to + TImode, resulting in much poorer code generation. */ + case RS6000_BIF_VADDUBM: + case RS6000_BIF_VADDUHM: + case RS6000_BIF_VADDUWM: + case RS6000_BIF_VADDUDM: + case RS6000_BIF_VADDFP: + case RS6000_BIF_XVADDDP: + case RS6000_BIF_XVADDSP: + bcode = PLUS_EXPR; + do_binary: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs))) + && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs)))) + { + /* Ensure the binary operation is performed in a type + that wraps if it is integral type. */ + gimple_seq stmts = NULL; + tree type = unsigned_type_for (TREE_TYPE (lhs)); + tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR, + type, arg0); + tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, + type, arg1); + tree res = gimple_build (&stmts, gimple_location (stmt), bcode, + type, uarg0, uarg1); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, + build1 (VIEW_CONVERT_EXPR, + TREE_TYPE (lhs), res)); + gsi_replace (gsi, g, true); + return true; + } + g = gimple_build_assign (lhs, bcode, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_sub. We deliberately don't expand + RS6000_BIF_VSUBUQM. */ + case RS6000_BIF_VSUBUBM: + case RS6000_BIF_VSUBUHM: + case RS6000_BIF_VSUBUWM: + case RS6000_BIF_VSUBUDM: + case RS6000_BIF_VSUBFP: + case RS6000_BIF_XVSUBDP: + case RS6000_BIF_XVSUBSP: + bcode = MINUS_EXPR; + goto do_binary; + case RS6000_BIF_XVMULSP: + case RS6000_BIF_XVMULDP: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Even element flavors of vec_mul (signed). */ + case RS6000_BIF_VMULESB: + case RS6000_BIF_VMULESH: + case RS6000_BIF_VMULESW: + /* Even element flavors of vec_mul (unsigned). */ + case RS6000_BIF_VMULEUB: + case RS6000_BIF_VMULEUH: + case RS6000_BIF_VMULEUW: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Odd element flavors of vec_mul (signed). */ + case RS6000_BIF_VMULOSB: + case RS6000_BIF_VMULOSH: + case RS6000_BIF_VMULOSW: + /* Odd element flavors of vec_mul (unsigned). */ + case RS6000_BIF_VMULOUB: + case RS6000_BIF_VMULOUH: + case RS6000_BIF_VMULOUW: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_div (Integer). */ + case RS6000_BIF_DIV_V2DI: + case RS6000_BIF_UDIV_V2DI: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_div (Float). */ + case RS6000_BIF_XVDIVSP: + case RS6000_BIF_XVDIVDP: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_and. */ + case RS6000_BIF_VAND_V16QI_UNS: + case RS6000_BIF_VAND_V16QI: + case RS6000_BIF_VAND_V8HI_UNS: + case RS6000_BIF_VAND_V8HI: + case RS6000_BIF_VAND_V4SI_UNS: + case RS6000_BIF_VAND_V4SI: + case RS6000_BIF_VAND_V2DI_UNS: + case RS6000_BIF_VAND_V2DI: + case RS6000_BIF_VAND_V4SF: + case RS6000_BIF_VAND_V2DF: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_andc. */ + case RS6000_BIF_VANDC_V16QI_UNS: + case RS6000_BIF_VANDC_V16QI: + case RS6000_BIF_VANDC_V8HI_UNS: + case RS6000_BIF_VANDC_V8HI: + case RS6000_BIF_VANDC_V4SI_UNS: + case RS6000_BIF_VANDC_V4SI: + case RS6000_BIF_VANDC_V2DI_UNS: + case RS6000_BIF_VANDC_V2DI: + case RS6000_BIF_VANDC_V4SF: + case RS6000_BIF_VANDC_V2DF: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_nand. */ + case RS6000_BIF_NAND_V16QI_UNS: + case RS6000_BIF_NAND_V16QI: + case RS6000_BIF_NAND_V8HI_UNS: + case RS6000_BIF_NAND_V8HI: + case RS6000_BIF_NAND_V4SI_UNS: + case RS6000_BIF_NAND_V4SI: + case RS6000_BIF_NAND_V2DI_UNS: + case RS6000_BIF_NAND_V2DI: + case RS6000_BIF_NAND_V4SF: + case RS6000_BIF_NAND_V2DF: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_or. */ + case RS6000_BIF_VOR_V16QI_UNS: + case RS6000_BIF_VOR_V16QI: + case RS6000_BIF_VOR_V8HI_UNS: + case RS6000_BIF_VOR_V8HI: + case RS6000_BIF_VOR_V4SI_UNS: + case RS6000_BIF_VOR_V4SI: + case RS6000_BIF_VOR_V2DI_UNS: + case RS6000_BIF_VOR_V2DI: + case RS6000_BIF_VOR_V4SF: + case RS6000_BIF_VOR_V2DF: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* flavors of vec_orc. */ + case RS6000_BIF_ORC_V16QI_UNS: + case RS6000_BIF_ORC_V16QI: + case RS6000_BIF_ORC_V8HI_UNS: + case RS6000_BIF_ORC_V8HI: + case RS6000_BIF_ORC_V4SI_UNS: + case RS6000_BIF_ORC_V4SI: + case RS6000_BIF_ORC_V2DI_UNS: + case RS6000_BIF_ORC_V2DI: + case RS6000_BIF_ORC_V4SF: + case RS6000_BIF_ORC_V2DF: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_xor. */ + case RS6000_BIF_VXOR_V16QI_UNS: + case RS6000_BIF_VXOR_V16QI: + case RS6000_BIF_VXOR_V8HI_UNS: + case RS6000_BIF_VXOR_V8HI: + case RS6000_BIF_VXOR_V4SI_UNS: + case RS6000_BIF_VXOR_V4SI: + case RS6000_BIF_VXOR_V2DI_UNS: + case RS6000_BIF_VXOR_V2DI: + case RS6000_BIF_VXOR_V4SF: + case RS6000_BIF_VXOR_V2DF: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_nor. */ + case RS6000_BIF_VNOR_V16QI_UNS: + case RS6000_BIF_VNOR_V16QI: + case RS6000_BIF_VNOR_V8HI_UNS: + case RS6000_BIF_VNOR_V8HI: + case RS6000_BIF_VNOR_V4SI_UNS: + case RS6000_BIF_VNOR_V4SI: + case RS6000_BIF_VNOR_V2DI_UNS: + case RS6000_BIF_VNOR_V2DI: + case RS6000_BIF_VNOR_V4SF: + case RS6000_BIF_VNOR_V2DF: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* flavors of vec_abs. */ + case RS6000_BIF_ABS_V16QI: + case RS6000_BIF_ABS_V8HI: + case RS6000_BIF_ABS_V4SI: + case RS6000_BIF_ABS_V4SF: + case RS6000_BIF_ABS_V2DI: + case RS6000_BIF_XVABSDP: + case RS6000_BIF_XVABSSP: + arg0 = gimple_call_arg (stmt, 0); + if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0))) + && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0)))) + return false; + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, ABS_EXPR, arg0); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* flavors of vec_min. */ + case RS6000_BIF_XVMINDP: + case RS6000_BIF_XVMINSP: + case RS6000_BIF_VMINFP: + { + lhs = gimple_call_lhs (stmt); + tree type = TREE_TYPE (lhs); + if (HONOR_NANS (type)) + return false; + gcc_fallthrough (); + } + case RS6000_BIF_VMINSD: + case RS6000_BIF_VMINUD: + case RS6000_BIF_VMINSB: + case RS6000_BIF_VMINSH: + case RS6000_BIF_VMINSW: + case RS6000_BIF_VMINUB: + case RS6000_BIF_VMINUH: + case RS6000_BIF_VMINUW: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* flavors of vec_max. */ + case RS6000_BIF_XVMAXDP: + case RS6000_BIF_XVMAXSP: + case RS6000_BIF_VMAXFP: + { + lhs = gimple_call_lhs (stmt); + tree type = TREE_TYPE (lhs); + if (HONOR_NANS (type)) + return false; + gcc_fallthrough (); + } + case RS6000_BIF_VMAXSD: + case RS6000_BIF_VMAXUD: + case RS6000_BIF_VMAXSB: + case RS6000_BIF_VMAXSH: + case RS6000_BIF_VMAXSW: + case RS6000_BIF_VMAXUB: + case RS6000_BIF_VMAXUH: + case RS6000_BIF_VMAXUW: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_eqv. */ + case RS6000_BIF_EQV_V16QI: + case RS6000_BIF_EQV_V8HI: + case RS6000_BIF_EQV_V4SI: + case RS6000_BIF_EQV_V4SF: + case RS6000_BIF_EQV_V2DF: + case RS6000_BIF_EQV_V2DI: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); + g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vec_rotate_left. */ + case RS6000_BIF_VRLB: + case RS6000_BIF_VRLH: + case RS6000_BIF_VRLW: + case RS6000_BIF_VRLD: + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + /* Flavors of vector shift right algebraic. + vec_sra{b,h,w} -> vsra{b,h,w}. */ + case RS6000_BIF_VSRAB: + case RS6000_BIF_VSRAH: + case RS6000_BIF_VSRAW: + case RS6000_BIF_VSRAD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + tree arg1_type = TREE_TYPE (arg1); + tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); + tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); + location_t loc = gimple_location (stmt); + /* Force arg1 into the range valid matching the arg0 type. */ + /* Build a vector consisting of the max valid bit-size values. */ + int n_elts = VECTOR_CST_NELTS (arg1); + tree element_size = build_int_cst (unsigned_element_type, + 128 / n_elts); + tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); + for (int i = 0; i < n_elts; i++) + elts.safe_push (element_size); + tree modulo_tree = elts.build (); + /* Modulo the provided shift value against that vector. */ + gimple_seq stmts = NULL; + tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, + unsigned_arg1_type, arg1); + tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, + unsigned_arg1_type, unsigned_arg1, + modulo_tree); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + /* And finally, do the shift. */ + g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift left. + builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */ + case RS6000_BIF_VSLB: + case RS6000_BIF_VSLH: + case RS6000_BIF_VSLW: + case RS6000_BIF_VSLD: + { + location_t loc; + gimple_seq stmts = NULL; + arg0 = gimple_call_arg (stmt, 0); + tree arg0_type = TREE_TYPE (arg0); + if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type)) + && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type))) + return false; + arg1 = gimple_call_arg (stmt, 1); + tree arg1_type = TREE_TYPE (arg1); + tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); + tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); + loc = gimple_location (stmt); + lhs = gimple_call_lhs (stmt); + /* Force arg1 into the range valid matching the arg0 type. */ + /* Build a vector consisting of the max valid bit-size values. */ + int n_elts = VECTOR_CST_NELTS (arg1); + int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type)) + * BITS_PER_UNIT; + tree element_size = build_int_cst (unsigned_element_type, + tree_size_in_bits / n_elts); + tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1); + for (int i = 0; i < n_elts; i++) + elts.safe_push (element_size); + tree modulo_tree = elts.build (); + /* Modulo the provided shift value against that vector. */ + tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, + unsigned_arg1_type, arg1); + tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, + unsigned_arg1_type, unsigned_arg1, + modulo_tree); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + /* And finally, do the shift. */ + g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* Flavors of vector shift right. */ + case RS6000_BIF_VSRB: + case RS6000_BIF_VSRH: + case RS6000_BIF_VSRW: + case RS6000_BIF_VSRD: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + tree arg1_type = TREE_TYPE (arg1); + tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); + tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); + location_t loc = gimple_location (stmt); + gimple_seq stmts = NULL; + /* Convert arg0 to unsigned. */ + tree arg0_unsigned + = gimple_build (&stmts, VIEW_CONVERT_EXPR, + unsigned_type_for (TREE_TYPE (arg0)), arg0); + /* Force arg1 into the range valid matching the arg0 type. */ + /* Build a vector consisting of the max valid bit-size values. */ + int n_elts = VECTOR_CST_NELTS (arg1); + tree element_size = build_int_cst (unsigned_element_type, + 128 / n_elts); + tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); + for (int i = 0; i < n_elts; i++) + elts.safe_push (element_size); + tree modulo_tree = elts.build (); + /* Modulo the provided shift value against that vector. */ + tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, + unsigned_arg1_type, arg1); + tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, + unsigned_arg1_type, unsigned_arg1, + modulo_tree); + /* Do the shift. */ + tree res + = gimple_build (&stmts, RSHIFT_EXPR, + TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1); + /* Convert result back to the lhs type. */ + res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + replace_call_with_value (gsi, res); + return true; + } + /* Vector loads. */ + case RS6000_BIF_LVX_V16QI: + case RS6000_BIF_LVX_V8HI: + case RS6000_BIF_LVX_V4SI: + case RS6000_BIF_LVX_V4SF: + case RS6000_BIF_LVX_V2DI: + case RS6000_BIF_LVX_V2DF: + case RS6000_BIF_LVX_V1TI: + { + arg0 = gimple_call_arg (stmt, 0); // offset + arg1 = gimple_call_arg (stmt, 1); // address + lhs = gimple_call_lhs (stmt); + location_t loc = gimple_location (stmt); + /* Since arg1 may be cast to a different type, just use ptr_type_node + here instead of trying to enforce TBAA on pointer types. */ + tree arg1_type = ptr_type_node; + tree lhs_type = TREE_TYPE (lhs); + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg0. The resulting type will match + the type of arg1. */ + gimple_seq stmts = NULL; + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, + arg1_type, arg1, temp_offset); + /* Mask off any lower bits from the address. */ + tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, + arg1_type, temp_addr, + build_int_cst (arg1_type, -16)); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + if (!is_gimple_mem_ref_addr (aligned_addr)) + { + tree t = make_ssa_name (TREE_TYPE (aligned_addr)); + gimple *g = gimple_build_assign (t, aligned_addr); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + aligned_addr = t; + } + /* Use the build2 helper to set up the mem_ref. The MEM_REF could also + take an offset, but since we've already incorporated the offset + above, here we just pass in a zero. */ + gimple *g + = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr, + build_int_cst (arg1_type, 0))); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + /* Vector stores. */ + case RS6000_BIF_STVX_V16QI: + case RS6000_BIF_STVX_V8HI: + case RS6000_BIF_STVX_V4SI: + case RS6000_BIF_STVX_V4SF: + case RS6000_BIF_STVX_V2DI: + case RS6000_BIF_STVX_V2DF: + { + arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ + arg1 = gimple_call_arg (stmt, 1); /* Offset. */ + tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ + location_t loc = gimple_location (stmt); + tree arg0_type = TREE_TYPE (arg0); + /* Use ptr_type_node (no TBAA) for the arg2_type. + FIXME: (Richard) "A proper fix would be to transition this type as + seen from the frontend to GIMPLE, for example in a similar way we + do for MEM_REFs by piggy-backing that on an extra argument, a + constant zero pointer of the alias pointer type to use (which would + also serve as a type indicator of the store itself). I'd use a + target specific internal function for this (not sure if we can have + those target specific, but I guess if it's folded away then that's + fine) and get away with the overload set." */ + tree arg2_type = ptr_type_node; + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg0. The resulting type will match + the type of arg2. */ + gimple_seq stmts = NULL; + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, + arg2_type, arg2, temp_offset); + /* Mask off any lower bits from the address. */ + tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, + arg2_type, temp_addr, + build_int_cst (arg2_type, -16)); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + if (!is_gimple_mem_ref_addr (aligned_addr)) + { + tree t = make_ssa_name (TREE_TYPE (aligned_addr)); + gimple *g = gimple_build_assign (t, aligned_addr); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + aligned_addr = t; + } + /* The desired gimple result should be similar to: + MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */ + gimple *g + = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr, + build_int_cst (arg2_type, 0)), arg0); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + + /* unaligned Vector loads. */ + case RS6000_BIF_LXVW4X_V16QI: + case RS6000_BIF_LXVW4X_V8HI: + case RS6000_BIF_LXVW4X_V4SF: + case RS6000_BIF_LXVW4X_V4SI: + case RS6000_BIF_LXVD2X_V2DF: + case RS6000_BIF_LXVD2X_V2DI: + { + arg0 = gimple_call_arg (stmt, 0); // offset + arg1 = gimple_call_arg (stmt, 1); // address + lhs = gimple_call_lhs (stmt); + location_t loc = gimple_location (stmt); + /* Since arg1 may be cast to a different type, just use ptr_type_node + here instead of trying to enforce TBAA on pointer types. */ + tree arg1_type = ptr_type_node; + tree lhs_type = TREE_TYPE (lhs); + /* In GIMPLE the type of the MEM_REF specifies the alignment. The + required alignment (power) is 4 bytes regardless of data type. */ + tree align_ltype = build_aligned_type (lhs_type, 4); + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg0. The resulting type will match + the type of arg1. */ + gimple_seq stmts = NULL; + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, + arg1_type, arg1, temp_offset); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + if (!is_gimple_mem_ref_addr (temp_addr)) + { + tree t = make_ssa_name (TREE_TYPE (temp_addr)); + gimple *g = gimple_build_assign (t, temp_addr); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + temp_addr = t; + } + /* Use the build2 helper to set up the mem_ref. The MEM_REF could also + take an offset, but since we've already incorporated the offset + above, here we just pass in a zero. */ + gimple *g; + g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr, + build_int_cst (arg1_type, 0))); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + + /* unaligned Vector stores. */ + case RS6000_BIF_STXVW4X_V16QI: + case RS6000_BIF_STXVW4X_V8HI: + case RS6000_BIF_STXVW4X_V4SF: + case RS6000_BIF_STXVW4X_V4SI: + case RS6000_BIF_STXVD2X_V2DF: + case RS6000_BIF_STXVD2X_V2DI: + { + arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ + arg1 = gimple_call_arg (stmt, 1); /* Offset. */ + tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ + location_t loc = gimple_location (stmt); + tree arg0_type = TREE_TYPE (arg0); + /* Use ptr_type_node (no TBAA) for the arg2_type. */ + tree arg2_type = ptr_type_node; + /* In GIMPLE the type of the MEM_REF specifies the alignment. The + required alignment (power) is 4 bytes regardless of data type. */ + tree align_stype = build_aligned_type (arg0_type, 4); + /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create + the tree using the value from arg1. */ + gimple_seq stmts = NULL; + tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); + tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, + arg2_type, arg2, temp_offset); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + if (!is_gimple_mem_ref_addr (temp_addr)) + { + tree t = make_ssa_name (TREE_TYPE (temp_addr)); + gimple *g = gimple_build_assign (t, temp_addr); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + temp_addr = t; + } + gimple *g; + g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr, + build_int_cst (arg2_type, 0)), arg0); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + + /* Vector Fused multiply-add (fma). */ + case RS6000_BIF_VMADDFP: + case RS6000_BIF_XVMADDDP: + case RS6000_BIF_XVMADDSP: + case RS6000_BIF_VMLADDUHM: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + tree arg2 = gimple_call_arg (stmt, 2); + lhs = gimple_call_lhs (stmt); + gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2); + gimple_call_set_lhs (g, lhs); + gimple_call_set_nothrow (g, true); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + + /* Vector compares; EQ, NE, GE, GT, LE. */ + case RS6000_BIF_VCMPEQUB: + case RS6000_BIF_VCMPEQUH: + case RS6000_BIF_VCMPEQUW: + case RS6000_BIF_VCMPEQUD: + /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple + folding produces worse code for 128-bit compares. */ + fold_compare_helper (gsi, EQ_EXPR, stmt); + return true; + + case RS6000_BIF_VCMPNEB: + case RS6000_BIF_VCMPNEH: + case RS6000_BIF_VCMPNEW: + /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple + folding produces worse code for 128-bit compares. */ + fold_compare_helper (gsi, NE_EXPR, stmt); + return true; + + case RS6000_BIF_CMPGE_16QI: + case RS6000_BIF_CMPGE_U16QI: + case RS6000_BIF_CMPGE_8HI: + case RS6000_BIF_CMPGE_U8HI: + case RS6000_BIF_CMPGE_4SI: + case RS6000_BIF_CMPGE_U4SI: + case RS6000_BIF_CMPGE_2DI: + case RS6000_BIF_CMPGE_U2DI: + /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI + for now, because gimple folding produces worse code for 128-bit + compares. */ + fold_compare_helper (gsi, GE_EXPR, stmt); + return true; + + case RS6000_BIF_VCMPGTSB: + case RS6000_BIF_VCMPGTUB: + case RS6000_BIF_VCMPGTSH: + case RS6000_BIF_VCMPGTUH: + case RS6000_BIF_VCMPGTSW: + case RS6000_BIF_VCMPGTUW: + case RS6000_BIF_VCMPGTUD: + case RS6000_BIF_VCMPGTSD: + /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST + for now, because gimple folding produces worse code for 128-bit + compares. */ + fold_compare_helper (gsi, GT_EXPR, stmt); + return true; + + case RS6000_BIF_CMPLE_16QI: + case RS6000_BIF_CMPLE_U16QI: + case RS6000_BIF_CMPLE_8HI: + case RS6000_BIF_CMPLE_U8HI: + case RS6000_BIF_CMPLE_4SI: + case RS6000_BIF_CMPLE_U4SI: + case RS6000_BIF_CMPLE_2DI: + case RS6000_BIF_CMPLE_U2DI: + /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI + for now, because gimple folding produces worse code for 128-bit + compares. */ + fold_compare_helper (gsi, LE_EXPR, stmt); + return true; + + /* flavors of vec_splat_[us]{8,16,32}. */ + case RS6000_BIF_VSPLTISB: + case RS6000_BIF_VSPLTISH: + case RS6000_BIF_VSPLTISW: + { + arg0 = gimple_call_arg (stmt, 0); + lhs = gimple_call_lhs (stmt); + + /* Only fold the vec_splat_*() if the lower bits of arg 0 is a + 5-bit signed constant in range -16 to +15. */ + if (TREE_CODE (arg0) != INTEGER_CST + || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15)) + return false; + gimple_seq stmts = NULL; + location_t loc = gimple_location (stmt); + tree splat_value = gimple_convert (&stmts, loc, + TREE_TYPE (TREE_TYPE (lhs)), arg0); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value); + g = gimple_build_assign (lhs, splat_tree); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + + /* Flavors of vec_splat. */ + /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */ + case RS6000_BIF_VSPLTB: + case RS6000_BIF_VSPLTH: + case RS6000_BIF_VSPLTW: + case RS6000_BIF_XXSPLTD_V2DI: + case RS6000_BIF_XXSPLTD_V2DF: + { + arg0 = gimple_call_arg (stmt, 0); /* input vector. */ + arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */ + /* Only fold the vec_splat_*() if arg1 is both a constant value and + is a valid index into the arg0 vector. */ + unsigned int n_elts = VECTOR_CST_NELTS (arg0); + if (TREE_CODE (arg1) != INTEGER_CST + || TREE_INT_CST_LOW (arg1) > (n_elts -1)) + return false; + lhs = gimple_call_lhs (stmt); + tree lhs_type = TREE_TYPE (lhs); + tree arg0_type = TREE_TYPE (arg0); + tree splat; + if (TREE_CODE (arg0) == VECTOR_CST) + splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1)); + else + { + /* Determine (in bits) the length and start location of the + splat value for a call to the tree_vec_extract helper. */ + int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type)) + * BITS_PER_UNIT / n_elts; + int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size; + tree len = build_int_cst (bitsizetype, splat_elem_size); + tree start = build_int_cst (bitsizetype, splat_start_bit); + splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0, + len, start); + } + /* And finally, build the new vector. */ + tree splat_tree = build_vector_from_val (lhs_type, splat); + g = gimple_build_assign (lhs, splat_tree); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + + /* vec_mergel (integrals). */ + case RS6000_BIF_VMRGLH: + case RS6000_BIF_VMRGLW: + case RS6000_BIF_XXMRGLW_4SI: + case RS6000_BIF_VMRGLB: + case RS6000_BIF_VEC_MERGEL_V2DI: + case RS6000_BIF_XXMRGLW_4SF: + case RS6000_BIF_VEC_MERGEL_V2DF: + fold_mergehl_helper (gsi, stmt, 1); + return true; + /* vec_mergeh (integrals). */ + case RS6000_BIF_VMRGHH: + case RS6000_BIF_VMRGHW: + case RS6000_BIF_XXMRGHW_4SI: + case RS6000_BIF_VMRGHB: + case RS6000_BIF_VEC_MERGEH_V2DI: + case RS6000_BIF_XXMRGHW_4SF: + case RS6000_BIF_VEC_MERGEH_V2DF: + fold_mergehl_helper (gsi, stmt, 0); + return true; + + /* Flavors of vec_mergee. */ + case RS6000_BIF_VMRGEW_V4SI: + case RS6000_BIF_VMRGEW_V2DI: + case RS6000_BIF_VMRGEW_V4SF: + case RS6000_BIF_VMRGEW_V2DF: + fold_mergeeo_helper (gsi, stmt, 0); + return true; + /* Flavors of vec_mergeo. */ + case RS6000_BIF_VMRGOW_V4SI: + case RS6000_BIF_VMRGOW_V2DI: + case RS6000_BIF_VMRGOW_V4SF: + case RS6000_BIF_VMRGOW_V2DF: + fold_mergeeo_helper (gsi, stmt, 1); + return true; + + /* d = vec_pack (a, b) */ + case RS6000_BIF_VPKUDUM: + case RS6000_BIF_VPKUHUM: + case RS6000_BIF_VPKUWUM: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + lhs = gimple_call_lhs (stmt); + gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + + /* d = vec_unpackh (a) */ + /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call + in this code is sensitive to endian-ness, and needs to be inverted to + handle both LE and BE targets. */ + case RS6000_BIF_VUPKHSB: + case RS6000_BIF_VUPKHSH: + case RS6000_BIF_VUPKHSW: + { + arg0 = gimple_call_arg (stmt, 0); + lhs = gimple_call_lhs (stmt); + if (BYTES_BIG_ENDIAN) + g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0); + else + g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* d = vec_unpackl (a) */ + case RS6000_BIF_VUPKLSB: + case RS6000_BIF_VUPKLSH: + case RS6000_BIF_VUPKLSW: + { + arg0 = gimple_call_arg (stmt, 0); + lhs = gimple_call_lhs (stmt); + if (BYTES_BIG_ENDIAN) + g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0); + else + g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0); + gimple_set_location (g, gimple_location (stmt)); + gsi_replace (gsi, g, true); + return true; + } + /* There is no gimple type corresponding with pixel, so just return. */ + case RS6000_BIF_VUPKHPX: + case RS6000_BIF_VUPKLPX: + return false; + + /* vec_perm. */ + case RS6000_BIF_VPERM_16QI: + case RS6000_BIF_VPERM_8HI: + case RS6000_BIF_VPERM_4SI: + case RS6000_BIF_VPERM_2DI: + case RS6000_BIF_VPERM_4SF: + case RS6000_BIF_VPERM_2DF: + case RS6000_BIF_VPERM_16QI_UNS: + case RS6000_BIF_VPERM_8HI_UNS: + case RS6000_BIF_VPERM_4SI_UNS: + case RS6000_BIF_VPERM_2DI_UNS: + { + arg0 = gimple_call_arg (stmt, 0); + arg1 = gimple_call_arg (stmt, 1); + tree permute = gimple_call_arg (stmt, 2); + lhs = gimple_call_lhs (stmt); + location_t loc = gimple_location (stmt); + gimple_seq stmts = NULL; + // convert arg0 and arg1 to match the type of the permute + // for the VEC_PERM_EXPR operation. + tree permute_type = (TREE_TYPE (permute)); + tree arg0_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, + permute_type, arg0); + tree arg1_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, + permute_type, arg1); + tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR, + permute_type, arg0_ptype, arg1_ptype, + permute); + // Convert the result back to the desired lhs type upon completion. + tree temp = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, + TREE_TYPE (lhs), lhs_ptype); + gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); + g = gimple_build_assign (lhs, temp); + gimple_set_location (g, loc); + gsi_replace (gsi, g, true); + return true; + } + + default: + if (TARGET_DEBUG_BUILTIN) + fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", + fn_code, fn_name1, fn_name2); + break; + } + + return false; +} + +/* **** Expansion support **** */ + +static rtx +altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) +{ + rtx pat, scratch; + tree cr6_form = CALL_EXPR_ARG (exp, 0); + tree arg0 = CALL_EXPR_ARG (exp, 1); + tree arg1 = CALL_EXPR_ARG (exp, 2); + rtx op0 = expand_normal (arg0); + rtx op1 = expand_normal (arg1); + machine_mode tmode = SImode; + machine_mode mode0 = insn_data[icode].operand[1].mode; + machine_mode mode1 = insn_data[icode].operand[2].mode; + int cr6_form_int; + + if (TREE_CODE (cr6_form) != INTEGER_CST) + { + error ("argument 1 of %qs must be a constant", + "__builtin_altivec_predicate"); + return const0_rtx; + } + else + cr6_form_int = TREE_INT_CST_LOW (cr6_form); + + gcc_assert (mode0 == mode1); + + /* If we have invalid arguments, bail out before generating bad rtl. */ + if (arg0 == error_mark_node || arg1 == error_mark_node) + return const0_rtx; + + if (target == 0 + || GET_MODE (target) != tmode + || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + + if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) + op0 = copy_to_mode_reg (mode0, op0); + if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) + op1 = copy_to_mode_reg (mode1, op1); + + /* Note that for many of the relevant operations (e.g. cmpne or + cmpeq) with float or double operands, it makes more sense for the + mode of the allocated scratch register to select a vector of + integer. But the choice to copy the mode of operand 0 was made + long ago and there are no plans to change it. */ + scratch = gen_reg_rtx (mode0); + + pat = GEN_FCN (icode) (scratch, op0, op1); + if (! pat) + return 0; + emit_insn (pat); + + /* The vec_any* and vec_all* predicates use the same opcodes for two + different operations, but the bits in CR6 will be different + depending on what information we want. So we have to play tricks + with CR6 to get the right bits out. + + If you think this is disgusting, look at the specs for the + AltiVec predicates. */ + + switch (cr6_form_int) + { + case 0: + emit_insn (gen_cr6_test_for_zero (target)); + break; + case 1: + emit_insn (gen_cr6_test_for_zero_reverse (target)); + break; + case 2: + emit_insn (gen_cr6_test_for_lt (target)); + break; + case 3: + emit_insn (gen_cr6_test_for_lt_reverse (target)); + break; + default: + error ("argument 1 of %qs is out of range", + "__builtin_altivec_predicate"); + break; + } + + return target; +} + +/* Expand vec_init builtin. */ +static rtx +altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) +{ + machine_mode tmode = TYPE_MODE (type); + machine_mode inner_mode = GET_MODE_INNER (tmode); + int i, n_elt = GET_MODE_NUNITS (tmode); + + gcc_assert (VECTOR_MODE_P (tmode)); + gcc_assert (n_elt == call_expr_nargs (exp)); + + if (!target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + /* If we have a vector compromised of a single element, such as V1TImode, do + the initialization directly. */ + if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode)) + { + rtx x = expand_normal (CALL_EXPR_ARG (exp, 0)); + emit_move_insn (target, gen_lowpart (tmode, x)); + } + else + { + rtvec v = rtvec_alloc (n_elt); + + for (i = 0; i < n_elt; ++i) + { + rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); + RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); + } + + rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); + } + + return target; +} + +/* Return the integer constant in ARG. Constrain it to be in the range + of the subparts of VEC_TYPE; issue an error if not. */ + +static int +get_element_number (tree vec_type, tree arg) +{ + unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; + + if (!tree_fits_uhwi_p (arg) + || (elt = tree_to_uhwi (arg), elt > max)) + { + error ("selector must be an integer constant in the range [0, %wi]", max); + return 0; + } + + return elt; +} + +/* Expand vec_set builtin. */ +static rtx +altivec_expand_vec_set_builtin (tree exp) +{ + machine_mode tmode, mode1; + tree arg0, arg1, arg2; + int elt; + rtx op0, op1; + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); + + tmode = TYPE_MODE (TREE_TYPE (arg0)); + mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + gcc_assert (VECTOR_MODE_P (tmode)); + + op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); + op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); + elt = get_element_number (TREE_TYPE (arg0), arg2); + + if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) + op1 = convert_modes (mode1, GET_MODE (op1), op1, true); + + op0 = force_reg (tmode, op0); + op1 = force_reg (mode1, op1); + + rs6000_expand_vector_set (op0, op1, GEN_INT (elt)); + + return op0; +} + +/* Expand vec_ext builtin. */ +static rtx +altivec_expand_vec_ext_builtin (tree exp, rtx target) +{ + machine_mode tmode, mode0; + tree arg0, arg1; + rtx op0; + rtx op1; + + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + + op0 = expand_normal (arg0); + op1 = expand_normal (arg1); + + if (TREE_CODE (arg1) == INTEGER_CST) + { + unsigned HOST_WIDE_INT elt; + unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); + unsigned int truncated_selector; + /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0) + returns low-order bits of INTEGER_CST for modulo indexing. */ + elt = TREE_INT_CST_LOW (arg1); + truncated_selector = elt % size; + op1 = GEN_INT (truncated_selector); + } + + tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); + mode0 = TYPE_MODE (TREE_TYPE (arg0)); + gcc_assert (VECTOR_MODE_P (mode0)); + + op0 = force_reg (mode0, op0); + + if (optimize || !target || !register_operand (target, tmode)) + target = gen_reg_rtx (tmode); + + rs6000_expand_vector_extract (target, op0, op1); + + return target; +} + +/* Expand ALTIVEC_BUILTIN_MASK_FOR_LOAD. */ +rtx +rs6000_expand_ldst_mask (rtx target, tree arg0) +{ + int icode2 = BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct + : (int) CODE_FOR_altivec_lvsl_direct; + machine_mode tmode = insn_data[icode2].operand[0].mode; + machine_mode mode = insn_data[icode2].operand[1].mode; + + gcc_assert (TARGET_ALTIVEC); + + gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg0))); + rtx op = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL); + rtx addr = memory_address (mode, op); + /* We need to negate the address. */ + op = gen_reg_rtx (GET_MODE (addr)); + emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr))); + op = gen_rtx_MEM (mode, op); + + if (target == 0 + || GET_MODE (target) != tmode + || !insn_data[icode2].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + rtx pat = GEN_FCN (icode2) (target, op); + if (!pat) + return 0; + emit_insn (pat); + + return target; +} + +/* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */ +static const struct +{ + const char *cpu; + unsigned int cpuid; +} cpu_is_info[] = { + { "power10", PPC_PLATFORM_POWER10 }, + { "power9", PPC_PLATFORM_POWER9 }, + { "power8", PPC_PLATFORM_POWER8 }, + { "power7", PPC_PLATFORM_POWER7 }, + { "power6x", PPC_PLATFORM_POWER6X }, + { "power6", PPC_PLATFORM_POWER6 }, + { "power5+", PPC_PLATFORM_POWER5_PLUS }, + { "power5", PPC_PLATFORM_POWER5 }, + { "ppc970", PPC_PLATFORM_PPC970 }, + { "power4", PPC_PLATFORM_POWER4 }, + { "ppca2", PPC_PLATFORM_PPCA2 }, + { "ppc476", PPC_PLATFORM_PPC476 }, + { "ppc464", PPC_PLATFORM_PPC464 }, + { "ppc440", PPC_PLATFORM_PPC440 }, + { "ppc405", PPC_PLATFORM_PPC405 }, + { "ppc-cell-be", PPC_PLATFORM_CELL_BE } +}; + +/* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */ +static const struct +{ + const char *hwcap; + int mask; + unsigned int id; +} cpu_supports_info[] = { + /* AT_HWCAP masks. */ + { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 }, + { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 }, + { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 }, + { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 }, + { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 }, + { "booke", PPC_FEATURE_BOOKE, 0 }, + { "cellbe", PPC_FEATURE_CELL_BE, 0 }, + { "dfp", PPC_FEATURE_HAS_DFP, 0 }, + { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 }, + { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 }, + { "fpu", PPC_FEATURE_HAS_FPU, 0 }, + { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 }, + { "mmu", PPC_FEATURE_HAS_MMU, 0 }, + { "notb", PPC_FEATURE_NO_TB, 0 }, + { "pa6t", PPC_FEATURE_PA6T, 0 }, + { "power4", PPC_FEATURE_POWER4, 0 }, + { "power5", PPC_FEATURE_POWER5, 0 }, + { "power5+", PPC_FEATURE_POWER5_PLUS, 0 }, + { "power6x", PPC_FEATURE_POWER6_EXT, 0 }, + { "ppc32", PPC_FEATURE_32, 0 }, + { "ppc601", PPC_FEATURE_601_INSTR, 0 }, + { "ppc64", PPC_FEATURE_64, 0 }, + { "ppcle", PPC_FEATURE_PPC_LE, 0 }, + { "smt", PPC_FEATURE_SMT, 0 }, + { "spe", PPC_FEATURE_HAS_SPE, 0 }, + { "true_le", PPC_FEATURE_TRUE_LE, 0 }, + { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 }, + { "vsx", PPC_FEATURE_HAS_VSX, 0 }, + + /* AT_HWCAP2 masks. */ + { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 }, + { "dscr", PPC_FEATURE2_HAS_DSCR, 1 }, + { "ebb", PPC_FEATURE2_HAS_EBB, 1 }, + { "htm", PPC_FEATURE2_HAS_HTM, 1 }, + { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 }, + { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 }, + { "isel", PPC_FEATURE2_HAS_ISEL, 1 }, + { "tar", PPC_FEATURE2_HAS_TAR, 1 }, + { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, + { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 }, + { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }, + { "darn", PPC_FEATURE2_DARN, 1 }, + { "scv", PPC_FEATURE2_SCV, 1 }, + { "arch_3_1", PPC_FEATURE2_ARCH_3_1, 1 }, + { "mma", PPC_FEATURE2_MMA, 1 }, +}; + +/* Expand the CPU builtin in FCODE and store the result in TARGET. */ +static rtx +cpu_expand_builtin (enum rs6000_gen_builtins fcode, + tree exp ATTRIBUTE_UNUSED, rtx target) +{ + /* __builtin_cpu_init () is a nop, so expand to nothing. */ + if (fcode == RS6000_BIF_CPU_INIT) + return const0_rtx; + + if (target == 0 || GET_MODE (target) != SImode) + target = gen_reg_rtx (SImode); + + /* TODO: Factor the #ifdef'd code into a separate function. */ +#ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB + tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0); + /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back + to a STRING_CST. */ + if (TREE_CODE (arg) == ARRAY_REF + && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST + && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST + && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0) + arg = TREE_OPERAND (arg, 0); + + if (TREE_CODE (arg) != STRING_CST) + { + error ("builtin %qs only accepts a string argument", + rs6000_builtin_info[(size_t) fcode].bifname); + return const0_rtx; + } + + if (fcode == RS6000_BIF_CPU_IS) + { + const char *cpu = TREE_STRING_POINTER (arg); + rtx cpuid = NULL_RTX; + for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++) + if (strcmp (cpu, cpu_is_info[i].cpu) == 0) + { + /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */ + cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM); + break; + } + if (cpuid == NULL_RTX) + { + /* Invalid CPU argument. */ + error ("cpu %qs is an invalid argument to builtin %qs", + cpu, rs6000_builtin_info[(size_t) fcode].bifname); + return const0_rtx; + } + + rtx platform = gen_reg_rtx (SImode); + rtx address = gen_rtx_PLUS (Pmode, + gen_rtx_REG (Pmode, TLS_REGNUM), + GEN_INT (TCB_PLATFORM_OFFSET)); + rtx tcbmem = gen_const_mem (SImode, address); + emit_move_insn (platform, tcbmem); + emit_insn (gen_eqsi3 (target, platform, cpuid)); + } + else if (fcode == RS6000_BIF_CPU_SUPPORTS) + { + const char *hwcap = TREE_STRING_POINTER (arg); + rtx mask = NULL_RTX; + int hwcap_offset; + for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++) + if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0) + { + mask = GEN_INT (cpu_supports_info[i].mask); + hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id); + break; + } + if (mask == NULL_RTX) + { + /* Invalid HWCAP argument. */ + error ("%s %qs is an invalid argument to builtin %qs", + "hwcap", hwcap, + rs6000_builtin_info[(size_t) fcode].bifname); + return const0_rtx; + } + + rtx tcb_hwcap = gen_reg_rtx (SImode); + rtx address = gen_rtx_PLUS (Pmode, + gen_rtx_REG (Pmode, TLS_REGNUM), + GEN_INT (hwcap_offset)); + rtx tcbmem = gen_const_mem (SImode, address); + emit_move_insn (tcb_hwcap, tcbmem); + rtx scratch1 = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (scratch1, + gen_rtx_AND (SImode, tcb_hwcap, mask))); + rtx scratch2 = gen_reg_rtx (SImode); + emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx)); + emit_insn (gen_rtx_SET (target, + gen_rtx_XOR (SImode, scratch2, const1_rtx))); + } + else + gcc_unreachable (); + + /* Record that we have expanded a CPU builtin, so that we can later + emit a reference to the special symbol exported by LIBC to ensure we + do not link against an old LIBC that doesn't support this feature. */ + cpu_builtin_p = true; + +#else + warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware " + "capability bits", rs6000_builtin_info[(size_t) fcode].bifname); + + /* For old LIBCs, always return FALSE. */ + emit_move_insn (target, GEN_INT (0)); +#endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */ + + return target; +} + +/* For the element-reversing load/store built-ins, produce the correct + insn_code depending on the target endianness. */ +static insn_code +elemrev_icode (rs6000_gen_builtins fcode) +{ + switch (fcode) + { + case RS6000_BIF_ST_ELEMREV_V1TI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti + : CODE_FOR_vsx_st_elemrev_v1ti; + + case RS6000_BIF_ST_ELEMREV_V2DF: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df + : CODE_FOR_vsx_st_elemrev_v2df; + + case RS6000_BIF_ST_ELEMREV_V2DI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di + : CODE_FOR_vsx_st_elemrev_v2di; + + case RS6000_BIF_ST_ELEMREV_V4SF: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf + : CODE_FOR_vsx_st_elemrev_v4sf; + + case RS6000_BIF_ST_ELEMREV_V4SI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si + : CODE_FOR_vsx_st_elemrev_v4si; + + case RS6000_BIF_ST_ELEMREV_V8HI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi + : CODE_FOR_vsx_st_elemrev_v8hi; + + case RS6000_BIF_ST_ELEMREV_V16QI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi + : CODE_FOR_vsx_st_elemrev_v16qi; + + case RS6000_BIF_LD_ELEMREV_V2DF: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df + : CODE_FOR_vsx_ld_elemrev_v2df; + + case RS6000_BIF_LD_ELEMREV_V1TI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti + : CODE_FOR_vsx_ld_elemrev_v1ti; + + case RS6000_BIF_LD_ELEMREV_V2DI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di + : CODE_FOR_vsx_ld_elemrev_v2di; + + case RS6000_BIF_LD_ELEMREV_V4SF: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf + : CODE_FOR_vsx_ld_elemrev_v4sf; + + case RS6000_BIF_LD_ELEMREV_V4SI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si + : CODE_FOR_vsx_ld_elemrev_v4si; + + case RS6000_BIF_LD_ELEMREV_V8HI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi + : CODE_FOR_vsx_ld_elemrev_v8hi; + + case RS6000_BIF_LD_ELEMREV_V16QI: + return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi + : CODE_FOR_vsx_ld_elemrev_v16qi; + default: + ; + } + + gcc_unreachable (); +} + +/* Expand an AltiVec vector load builtin, and return the expanded rtx. */ +static rtx +ldv_expand_builtin (rtx target, insn_code icode, rtx *op, machine_mode tmode) +{ + if (target == 0 + || GET_MODE (target) != tmode + || !insn_data[icode].operand[0].predicate (target, tmode)) + target = gen_reg_rtx (tmode); + + op[1] = copy_to_mode_reg (Pmode, op[1]); + + /* These CELL built-ins use BLKmode instead of tmode for historical + (i.e., unknown) reasons. TODO: Is this necessary? */ + bool blk = (icode == CODE_FOR_altivec_lvlx + || icode == CODE_FOR_altivec_lvlxl + || icode == CODE_FOR_altivec_lvrx + || icode == CODE_FOR_altivec_lvrxl); + + /* For LVX, express the RTL accurately by ANDing the address with -16. + LVXL and LVE*X expand to use UNSPECs to hide their special behavior, + so the raw address is fine. */ + /* TODO: That statement seems wrong, as the UNSPECs don't surround the + memory expression, so a latent bug may lie here. The &-16 is likely + needed for all VMX-style loads. */ + if (icode == CODE_FOR_altivec_lvx_v1ti + || icode == CODE_FOR_altivec_lvx_v2df + || icode == CODE_FOR_altivec_lvx_v2di + || icode == CODE_FOR_altivec_lvx_v4sf + || icode == CODE_FOR_altivec_lvx_v4si + || icode == CODE_FOR_altivec_lvx_v8hi + || icode == CODE_FOR_altivec_lvx_v16qi) + { + rtx rawaddr; + if (op[0] == const0_rtx) + rawaddr = op[1]; + else + { + op[0] = copy_to_mode_reg (Pmode, op[0]); + rawaddr = gen_rtx_PLUS (Pmode, op[1], op[0]); + } + rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); + addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr); + + emit_insn (gen_rtx_SET (target, addr)); + } + else + { + rtx addr; + if (op[0] == const0_rtx) + addr = gen_rtx_MEM (blk ? BLKmode : tmode, op[1]); + else + { + op[0] = copy_to_mode_reg (Pmode, op[0]); + addr = gen_rtx_MEM (blk ? BLKmode : tmode, + gen_rtx_PLUS (Pmode, op[1], op[0])); + } + + rtx pat = GEN_FCN (icode) (target, addr); + if (!pat) + return 0; + emit_insn (pat); + } + + return target; +} + +/* Expand a builtin function that loads a scalar into a vector register + with sign extension, and return the expanded rtx. */ +static rtx +lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op, + machine_mode tmode, machine_mode smode) +{ + rtx pat, addr; + op[1] = copy_to_mode_reg (Pmode, op[1]); + + if (op[0] == const0_rtx) + addr = gen_rtx_MEM (tmode, op[1]); + else + { + op[0] = copy_to_mode_reg (Pmode, op[0]); + addr = gen_rtx_MEM (smode, + gen_rtx_PLUS (Pmode, op[1], op[0])); + } + + rtx discratch = gen_reg_rtx (V2DImode); + rtx tiscratch = gen_reg_rtx (TImode); + + /* Emit the lxvr*x insn. */ + pat = GEN_FCN (icode) (tiscratch, addr); + if (!pat) + return 0; + emit_insn (pat); + + /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ + rtx temp1; + if (icode == CODE_FOR_vsx_lxvrbx) + { + temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); + emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); + } + else if (icode == CODE_FOR_vsx_lxvrhx) + { + temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); + emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); + } + else if (icode == CODE_FOR_vsx_lxvrwx) + { + temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); + emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); + } + else if (icode == CODE_FOR_vsx_lxvrdx) + discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); + else + gcc_unreachable (); + + /* Emit the sign extension from V2DI (double) to TI (quad). */ + rtx temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); + emit_insn (gen_extendditi2_vector (target, temp2)); + + return target; +} + +/* Expand a builtin function that loads a scalar into a vector register + with zero extension, and return the expanded rtx. */ +static rtx +lxvrze_expand_builtin (rtx target, insn_code icode, rtx *op, + machine_mode tmode, machine_mode smode) +{ + rtx pat, addr; + op[1] = copy_to_mode_reg (Pmode, op[1]); + + if (op[0] == const0_rtx) + addr = gen_rtx_MEM (tmode, op[1]); + else + { + op[0] = copy_to_mode_reg (Pmode, op[0]); + addr = gen_rtx_MEM (smode, + gen_rtx_PLUS (Pmode, op[1], op[0])); + } + + pat = GEN_FCN (icode) (target, addr); + if (!pat) + return 0; + emit_insn (pat); + return target; +} + +/* Expand an AltiVec vector store builtin, and return the expanded rtx. */ +static rtx +stv_expand_builtin (insn_code icode, rtx *op, + machine_mode tmode, machine_mode smode) +{ + op[2] = copy_to_mode_reg (Pmode, op[2]); + + /* For STVX, express the RTL accurately by ANDing the address with -16. + STVXL and STVE*X expand to use UNSPECs to hide their special behavior, + so the raw address is fine. */ + /* TODO: That statement seems wrong, as the UNSPECs don't surround the + memory expression, so a latent bug may lie here. The &-16 is likely + needed for all VMX-style stores. */ + if (icode == CODE_FOR_altivec_stvx_v2df + || icode == CODE_FOR_altivec_stvx_v2di + || icode == CODE_FOR_altivec_stvx_v4sf + || icode == CODE_FOR_altivec_stvx_v4si + || icode == CODE_FOR_altivec_stvx_v8hi + || icode == CODE_FOR_altivec_stvx_v16qi) + { + rtx rawaddr; + if (op[1] == const0_rtx) + rawaddr = op[2]; + else + { + op[1] = copy_to_mode_reg (Pmode, op[1]); + rawaddr = gen_rtx_PLUS (Pmode, op[2], op[1]); + } + + rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); + addr = gen_rtx_MEM (tmode, addr); + op[0] = copy_to_mode_reg (tmode, op[0]); + emit_insn (gen_rtx_SET (addr, op[0])); + } + else if (icode == CODE_FOR_vsx_stxvrbx + || icode == CODE_FOR_vsx_stxvrhx + || icode == CODE_FOR_vsx_stxvrwx + || icode == CODE_FOR_vsx_stxvrdx) + { + rtx truncrtx = gen_rtx_TRUNCATE (tmode, op[0]); + op[0] = copy_to_mode_reg (E_TImode, truncrtx); + + rtx addr; + if (op[1] == const0_rtx) + addr = gen_rtx_MEM (Pmode, op[2]); + else + { + op[1] = copy_to_mode_reg (Pmode, op[1]); + addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1])); + } + rtx pat = GEN_FCN (icode) (addr, op[0]); + if (pat) + emit_insn (pat); + } + else + { + if (!insn_data[icode].operand[1].predicate (op[0], smode)) + op[0] = copy_to_mode_reg (smode, op[0]); + + rtx addr; + if (op[1] == const0_rtx) + addr = gen_rtx_MEM (tmode, op[2]); + else + { + op[1] = copy_to_mode_reg (Pmode, op[1]); + addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1])); + } + + rtx pat = GEN_FCN (icode) (addr, op[0]); + if (pat) + emit_insn (pat); + } + + return NULL_RTX; +} + +/* Expand the MMA built-in in EXP, and return it. */ +static rtx +mma_expand_builtin (tree exp, rtx target, insn_code icode, + rs6000_gen_builtins fcode) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; + machine_mode tmode = VOIDmode; + rtx op[MAX_MMA_OPERANDS]; + unsigned nopnds = 0; + + if (!void_func) + { + tmode = insn_data[icode].operand[0].mode; + if (!(target + && GET_MODE (target) == tmode + && insn_data[icode].operand[0].predicate (target, tmode))) + target = gen_reg_rtx (tmode); + op[nopnds++] = target; + } + else + target = const0_rtx; + + call_expr_arg_iterator iter; + tree arg; + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + if (arg == error_mark_node) + return const0_rtx; + + rtx opnd; + const struct insn_operand_data *insn_op; + insn_op = &insn_data[icode].operand[nopnds]; + if (TREE_CODE (arg) == ADDR_EXPR + && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0)))) + opnd = DECL_RTL (TREE_OPERAND (arg, 0)); + else + opnd = expand_normal (arg); + + if (!insn_op->predicate (opnd, insn_op->mode)) + { + /* TODO: This use of constraints needs explanation. */ + if (!strcmp (insn_op->constraint, "n")) + { + if (!CONST_INT_P (opnd)) + error ("argument %d must be an unsigned literal", nopnds); + else + error ("argument %d is an unsigned literal that is " + "out of range", nopnds); + return const0_rtx; + } + opnd = copy_to_mode_reg (insn_op->mode, opnd); + } + + /* Some MMA instructions have INOUT accumulator operands, so force + their target register to be the same as their input register. */ + if (!void_func + && nopnds == 1 + && !strcmp (insn_op->constraint, "0") + && insn_op->mode == tmode + && REG_P (opnd) + && insn_data[icode].operand[0].predicate (opnd, tmode)) + target = op[0] = opnd; + + op[nopnds++] = opnd; + } + + rtx pat; + switch (nopnds) + { + case 1: + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + case 3: + /* The ASSEMBLE builtin source operands are reversed in little-endian + mode, so reorder them. */ + if (fcode == RS6000_BIF_ASSEMBLE_PAIR_V_INTERNAL && !WORDS_BIG_ENDIAN) + std::swap (op[1], op[2]); + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + case 4: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + case 5: + /* The ASSEMBLE builtin source operands are reversed in little-endian + mode, so reorder them. */ + if (fcode == RS6000_BIF_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN) + { + std::swap (op[1], op[4]); + std::swap (op[2], op[3]); + } + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); + break; + case 6: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); + break; + case 7: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]); + break; + default: + gcc_unreachable (); + } + + if (!pat) + return NULL_RTX; + + emit_insn (pat); + return target; +} + +/* Return the correct ICODE value depending on whether we are + setting or reading the HTM SPRs. */ +static inline enum insn_code +rs6000_htm_spr_icode (bool nonvoid) +{ + if (nonvoid) + return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; + else + return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; +} + +/* Return the appropriate SPR number associated with the given builtin. */ +static inline HOST_WIDE_INT +htm_spr_num (enum rs6000_gen_builtins code) +{ + if (code == RS6000_BIF_GET_TFHAR + || code == RS6000_BIF_SET_TFHAR) + return TFHAR_SPR; + else if (code == RS6000_BIF_GET_TFIAR + || code == RS6000_BIF_SET_TFIAR) + return TFIAR_SPR; + else if (code == RS6000_BIF_GET_TEXASR + || code == RS6000_BIF_SET_TEXASR) + return TEXASR_SPR; + gcc_assert (code == RS6000_BIF_GET_TEXASRU + || code == RS6000_BIF_SET_TEXASRU); + return TEXASRU_SPR; +} + +/* Expand the HTM builtin in EXP and store the result in TARGET. + Return the expanded rtx. */ +static rtx +htm_expand_builtin (bifdata *bifaddr, rs6000_gen_builtins fcode, + tree exp, rtx target) +{ + if (!TARGET_POWERPC64 + && (fcode == RS6000_BIF_TABORTDC + || fcode == RS6000_BIF_TABORTDCI)) + { + error ("builtin %qs is only valid in 64-bit mode", bifaddr->bifname); + return const0_rtx; + } + + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + bool uses_spr = bif_is_htmspr (*bifaddr); + insn_code icode = bifaddr->icode; + + if (uses_spr) + icode = rs6000_htm_spr_icode (nonvoid); + + rtx op[MAX_HTM_OPERANDS]; + int nopnds = 0; + const insn_operand_data *insn_op = &insn_data[icode].operand[0]; + + if (nonvoid) + { + machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode; + if (!target + || GET_MODE (target) != tmode + || (uses_spr && !insn_op->predicate (target, tmode))) + target = gen_reg_rtx (tmode); + if (uses_spr) + op[nopnds++] = target; + } + + tree arg; + call_expr_arg_iterator iter; + + FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) + { + if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS) + return const0_rtx; + + insn_op = &insn_data[icode].operand[nopnds]; + op[nopnds] = expand_normal (arg); + + if (!insn_op->predicate (op[nopnds], insn_op->mode)) + { + /* TODO: This use of constraints could use explanation. + This happens a couple of places, perhaps make that a + function to document what's happening. */ + if (!strcmp (insn_op->constraint, "n")) + { + int arg_num = nonvoid ? nopnds : nopnds + 1; + if (!CONST_INT_P (op[nopnds])) + error ("argument %d must be an unsigned literal", arg_num); + else + error ("argument %d is an unsigned literal that is " + "out of range", arg_num); + return const0_rtx; + } + op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]); + } + + nopnds++; + } + + /* Handle the builtins for extended mnemonics. These accept + no arguments, but map to builtins that take arguments. */ + switch (fcode) + { + case RS6000_BIF_TENDALL: /* Alias for: tend. 1 */ + case RS6000_BIF_TRESUME: /* Alias for: tsr. 1 */ + op[nopnds++] = GEN_INT (1); + break; + case RS6000_BIF_TSUSPEND: /* Alias for: tsr. 0 */ + op[nopnds++] = GEN_INT (0); + break; + default: + break; + } + + /* If this builtin accesses SPRs, then pass in the appropriate + SPR number and SPR regno as the last two operands. */ + rtx cr = NULL_RTX; + if (uses_spr) + { + machine_mode mode = TARGET_POWERPC64 ? DImode : SImode; + op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode)); + } + /* If this builtin accesses a CR field, then pass in a scratch + CR field as the last operand. */ + else if (bif_is_htmcr (*bifaddr)) + { + cr = gen_reg_rtx (CCmode); + op[nopnds++] = cr; + } + + rtx pat; + switch (nopnds) + { + case 1: + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + pat = GEN_FCN (icode) (op[0], op[1]); + break; + case 3: + pat = GEN_FCN (icode) (op[0], op[1], op[2]); + break; + case 4: + pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); + break; + default: + gcc_unreachable (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (bif_is_htmcr (*bifaddr)) + { + if (fcode == RS6000_BIF_TBEGIN) + { + /* Emit code to set TARGET to true or false depending on + whether the tbegin. instruction succeeded or failed + to start a transaction. We do this by placing the 1's + complement of CR's EQ bit into TARGET. */ + rtx scratch = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (scratch, + gen_rtx_EQ (SImode, cr, + const0_rtx))); + emit_insn (gen_rtx_SET (target, + gen_rtx_XOR (SImode, scratch, + GEN_INT (1)))); + } + else + { + /* Emit code to copy the 4-bit condition register field + CR into the least significant end of register TARGET. */ + rtx scratch1 = gen_reg_rtx (SImode); + rtx scratch2 = gen_reg_rtx (SImode); + rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0); + emit_insn (gen_movcc (subreg, cr)); + emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28))); + emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf))); + } + } + + if (nonvoid) + return target; + return const0_rtx; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. + Use the new builtin infrastructure. */ +rtx +rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */, + machine_mode /* mode */, int ignore) +{ + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + enum rs6000_gen_builtins fcode + = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); + size_t uns_fcode = (size_t)fcode; + enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; + + /* TODO: The following commentary and code is inherited from the original + builtin processing code. The commentary is a bit confusing, with the + intent being that KFmode is always IEEE-128, IFmode is always IBM + double-double, and TFmode is the current long double. The code is + confusing in that it converts from KFmode to TFmode pattern names, + when the other direction is more intuitive. Try to address this. */ + + /* We have two different modes (KFmode, TFmode) that are the IEEE + 128-bit floating point type, depending on whether long double is the + IBM extended double (KFmode) or long double is IEEE 128-bit (TFmode). + It is simpler if we only define one variant of the built-in function, + and switch the code when defining it, rather than defining two built- + ins and using the overload table in rs6000-c.cc to switch between the + two. If we don't have the proper assembler, don't do this switch + because CODE_FOR_*kf* and CODE_FOR_*tf* will be CODE_FOR_nothing. */ + if (FLOAT128_IEEE_P (TFmode)) + switch (icode) + { + case CODE_FOR_sqrtkf2_odd: + icode = CODE_FOR_sqrttf2_odd; + break; + case CODE_FOR_trunckfdf2_odd: + icode = CODE_FOR_trunctfdf2_odd; + break; + case CODE_FOR_addkf3_odd: + icode = CODE_FOR_addtf3_odd; + break; + case CODE_FOR_subkf3_odd: + icode = CODE_FOR_subtf3_odd; + break; + case CODE_FOR_mulkf3_odd: + icode = CODE_FOR_multf3_odd; + break; + case CODE_FOR_divkf3_odd: + icode = CODE_FOR_divtf3_odd; + break; + case CODE_FOR_fmakf4_odd: + icode = CODE_FOR_fmatf4_odd; + break; + case CODE_FOR_xsxexpqp_kf: + icode = CODE_FOR_xsxexpqp_tf; + break; + case CODE_FOR_xsxsigqp_kf: + icode = CODE_FOR_xsxsigqp_tf; + break; + case CODE_FOR_xststdcnegqp_kf: + icode = CODE_FOR_xststdcnegqp_tf; + break; + case CODE_FOR_xsiexpqp_kf: + icode = CODE_FOR_xsiexpqp_tf; + break; + case CODE_FOR_xsiexpqpf_kf: + icode = CODE_FOR_xsiexpqpf_tf; + break; + case CODE_FOR_xststdcqp_kf: + icode = CODE_FOR_xststdcqp_tf; + break; + case CODE_FOR_xscmpexpqp_eq_kf: + icode = CODE_FOR_xscmpexpqp_eq_tf; + break; + case CODE_FOR_xscmpexpqp_lt_kf: + icode = CODE_FOR_xscmpexpqp_lt_tf; + break; + case CODE_FOR_xscmpexpqp_gt_kf: + icode = CODE_FOR_xscmpexpqp_gt_tf; + break; + case CODE_FOR_xscmpexpqp_unordered_kf: + icode = CODE_FOR_xscmpexpqp_unordered_tf; + break; + default: + break; + } + + /* In case of "#pragma target" changes, we initialize all builtins + but check for actual availability now, during expand time. For + invalid builtins, generate a normal call. */ + bifdata *bifaddr = &rs6000_builtin_info[uns_fcode]; + bif_enable e = bifaddr->enable; + + if (!(e == ENB_ALWAYS + || (e == ENB_P5 && TARGET_POPCNTB) + || (e == ENB_P6 && TARGET_CMPB) + || (e == ENB_P6_64 && TARGET_CMPB && TARGET_POWERPC64) + || (e == ENB_ALTIVEC && TARGET_ALTIVEC) + || (e == ENB_CELL && TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL) + || (e == ENB_VSX && TARGET_VSX) + || (e == ENB_P7 && TARGET_POPCNTD) + || (e == ENB_P7_64 && TARGET_POPCNTD && TARGET_POWERPC64) + || (e == ENB_P8 && TARGET_DIRECT_MOVE) + || (e == ENB_P8V && TARGET_P8_VECTOR) + || (e == ENB_P9 && TARGET_MODULO) + || (e == ENB_P9_64 && TARGET_MODULO && TARGET_POWERPC64) + || (e == ENB_P9V && TARGET_P9_VECTOR) + || (e == ENB_IEEE128_HW && TARGET_FLOAT128_HW) + || (e == ENB_DFP && TARGET_DFP) + || (e == ENB_CRYPTO && TARGET_CRYPTO) + || (e == ENB_HTM && TARGET_HTM) + || (e == ENB_P10 && TARGET_POWER10) + || (e == ENB_P10_64 && TARGET_POWER10 && TARGET_POWERPC64) + || (e == ENB_MMA && TARGET_MMA))) + { + rs6000_invalid_builtin (fcode); + return expand_call (exp, target, ignore); + } + + if (bif_is_nosoft (*bifaddr) + && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT) + { + error ("%qs not supported with %<-msoft-float%>", + bifaddr->bifname); + return const0_rtx; + } + + if (bif_is_no32bit (*bifaddr) && TARGET_32BIT) + { + error ("%qs is not supported in 32-bit mode", bifaddr->bifname); + return const0_rtx; + } + + if (bif_is_ibmld (*bifaddr) && !FLOAT128_2REG_P (TFmode)) + { + error ("%qs requires %<long double%> to be IBM 128-bit format", + bifaddr->bifname); + return const0_rtx; + } + + if (bif_is_cpu (*bifaddr)) + return cpu_expand_builtin (fcode, exp, target); + + if (bif_is_init (*bifaddr)) + return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); + + if (bif_is_set (*bifaddr)) + return altivec_expand_vec_set_builtin (exp); + + if (bif_is_extract (*bifaddr)) + return altivec_expand_vec_ext_builtin (exp, target); + + if (bif_is_predicate (*bifaddr)) + return altivec_expand_predicate_builtin (icode, exp, target); + + if (bif_is_htm (*bifaddr)) + return htm_expand_builtin (bifaddr, fcode, exp, target); + + if (bif_is_32bit (*bifaddr) && TARGET_32BIT) + { + if (fcode == RS6000_BIF_MFTB) + icode = CODE_FOR_rs6000_mftb_si; + else if (fcode == RS6000_BIF_BPERMD) + icode = CODE_FOR_bpermd_si; + else if (fcode == RS6000_BIF_DARN) + icode = CODE_FOR_darn_64_si; + else if (fcode == RS6000_BIF_DARN_32) + icode = CODE_FOR_darn_32_si; + else if (fcode == RS6000_BIF_DARN_RAW) + icode = CODE_FOR_darn_raw_si; + else + gcc_unreachable (); + } + + if (bif_is_endian (*bifaddr) && BYTES_BIG_ENDIAN) + { + if (fcode == RS6000_BIF_LD_ELEMREV_V1TI) + icode = CODE_FOR_vsx_load_v1ti; + else if (fcode == RS6000_BIF_LD_ELEMREV_V2DF) + icode = CODE_FOR_vsx_load_v2df; + else if (fcode == RS6000_BIF_LD_ELEMREV_V2DI) + icode = CODE_FOR_vsx_load_v2di; + else if (fcode == RS6000_BIF_LD_ELEMREV_V4SF) + icode = CODE_FOR_vsx_load_v4sf; + else if (fcode == RS6000_BIF_LD_ELEMREV_V4SI) + icode = CODE_FOR_vsx_load_v4si; + else if (fcode == RS6000_BIF_LD_ELEMREV_V8HI) + icode = CODE_FOR_vsx_load_v8hi; + else if (fcode == RS6000_BIF_LD_ELEMREV_V16QI) + icode = CODE_FOR_vsx_load_v16qi; + else if (fcode == RS6000_BIF_ST_ELEMREV_V1TI) + icode = CODE_FOR_vsx_store_v1ti; + else if (fcode == RS6000_BIF_ST_ELEMREV_V2DF) + icode = CODE_FOR_vsx_store_v2df; + else if (fcode == RS6000_BIF_ST_ELEMREV_V2DI) + icode = CODE_FOR_vsx_store_v2di; + else if (fcode == RS6000_BIF_ST_ELEMREV_V4SF) + icode = CODE_FOR_vsx_store_v4sf; + else if (fcode == RS6000_BIF_ST_ELEMREV_V4SI) + icode = CODE_FOR_vsx_store_v4si; + else if (fcode == RS6000_BIF_ST_ELEMREV_V8HI) + icode = CODE_FOR_vsx_store_v8hi; + else if (fcode == RS6000_BIF_ST_ELEMREV_V16QI) + icode = CODE_FOR_vsx_store_v16qi; + else if (fcode == RS6000_BIF_VCLZLSBB_V16QI) + icode = CODE_FOR_vclzlsbb_v16qi; + else if (fcode == RS6000_BIF_VCLZLSBB_V4SI) + icode = CODE_FOR_vclzlsbb_v4si; + else if (fcode == RS6000_BIF_VCLZLSBB_V8HI) + icode = CODE_FOR_vclzlsbb_v8hi; + else if (fcode == RS6000_BIF_VCTZLSBB_V16QI) + icode = CODE_FOR_vctzlsbb_v16qi; + else if (fcode == RS6000_BIF_VCTZLSBB_V4SI) + icode = CODE_FOR_vctzlsbb_v4si; + else if (fcode == RS6000_BIF_VCTZLSBB_V8HI) + icode = CODE_FOR_vctzlsbb_v8hi; + else + gcc_unreachable (); + } + + + /* TRUE iff the built-in function returns void. */ + bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; + /* Position of first argument (0 for void-returning functions, else 1). */ + int k; + /* Modes for the return value, if any, and arguments. */ + const int MAX_BUILTIN_ARGS = 6; + machine_mode mode[MAX_BUILTIN_ARGS + 1]; + + if (void_func) + k = 0; + else + { + k = 1; + mode[0] = insn_data[icode].operand[0].mode; + } + + /* Tree expressions for each argument. */ + tree arg[MAX_BUILTIN_ARGS]; + /* RTL expressions for each argument. */ + rtx op[MAX_BUILTIN_ARGS]; + + int nargs = bifaddr->nargs; + gcc_assert (nargs <= MAX_BUILTIN_ARGS); + + + for (int i = 0; i < nargs; i++) + { + arg[i] = CALL_EXPR_ARG (exp, i); + if (arg[i] == error_mark_node) + return const0_rtx; + STRIP_NOPS (arg[i]); + op[i] = expand_normal (arg[i]); + /* We have a couple of pesky patterns that don't specify the mode... */ + mode[i+k] = insn_data[icode].operand[i+k].mode; + if (!mode[i+k]) + mode[i+k] = Pmode; + } + + /* Check for restricted constant arguments. */ + for (int i = 0; i < 2; i++) + { + switch (bifaddr->restr[i]) + { + case RES_BITS: + { + size_t mask = 1; + mask <<= bifaddr->restr_val1[i]; + mask--; + tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; + STRIP_NOPS (restr_arg); + if (!(TREE_CODE (restr_arg) == INTEGER_CST + && (TREE_INT_CST_LOW (restr_arg) & ~mask) == 0)) + { + unsigned p = (1U << bifaddr->restr_val1[i]) - 1; + error ("argument %d must be a literal between 0 and %d," + " inclusive", + bifaddr->restr_opnd[i], p); + return CONST0_RTX (mode[0]); + } + break; + } + case RES_RANGE: + { + tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; + STRIP_NOPS (restr_arg); + if (!(TREE_CODE (restr_arg) == INTEGER_CST + && IN_RANGE (tree_to_shwi (restr_arg), + bifaddr->restr_val1[i], + bifaddr->restr_val2[i]))) + { + error ("argument %d must be a literal between %d and %d," + " inclusive", + bifaddr->restr_opnd[i], bifaddr->restr_val1[i], + bifaddr->restr_val2[i]); + return CONST0_RTX (mode[0]); + } + break; + } + case RES_VAR_RANGE: + { + tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; + STRIP_NOPS (restr_arg); + if (TREE_CODE (restr_arg) == INTEGER_CST + && !IN_RANGE (tree_to_shwi (restr_arg), + bifaddr->restr_val1[i], + bifaddr->restr_val2[i])) + { + error ("argument %d must be a variable or a literal " + "between %d and %d, inclusive", + bifaddr->restr_opnd[i], bifaddr->restr_val1[i], + bifaddr->restr_val2[i]); + return CONST0_RTX (mode[0]); + } + break; + } + case RES_VALUES: + { + tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; + STRIP_NOPS (restr_arg); + if (!(TREE_CODE (restr_arg) == INTEGER_CST + && (tree_to_shwi (restr_arg) == bifaddr->restr_val1[i] + || tree_to_shwi (restr_arg) == bifaddr->restr_val2[i]))) + { + error ("argument %d must be either a literal %d or a " + "literal %d", + bifaddr->restr_opnd[i], bifaddr->restr_val1[i], + bifaddr->restr_val2[i]); + return CONST0_RTX (mode[0]); + } + break; + } + default: + case RES_NONE: + break; + } + } + + if (bif_is_ldstmask (*bifaddr)) + return rs6000_expand_ldst_mask (target, arg[0]); + + if (bif_is_stvec (*bifaddr)) + { + if (bif_is_reve (*bifaddr)) + icode = elemrev_icode (fcode); + return stv_expand_builtin (icode, op, mode[0], mode[1]); + } + + if (bif_is_ldvec (*bifaddr)) + { + if (bif_is_reve (*bifaddr)) + icode = elemrev_icode (fcode); + return ldv_expand_builtin (target, icode, op, mode[0]); + } + + if (bif_is_lxvrse (*bifaddr)) + return lxvrse_expand_builtin (target, icode, op, mode[0], mode[1]); + + if (bif_is_lxvrze (*bifaddr)) + return lxvrze_expand_builtin (target, icode, op, mode[0], mode[1]); + + if (bif_is_mma (*bifaddr)) + return mma_expand_builtin (exp, target, icode, fcode); + + if (fcode == RS6000_BIF_PACK_IF + && TARGET_LONG_DOUBLE_128 + && !TARGET_IEEEQUAD) + { + icode = CODE_FOR_packtf; + fcode = RS6000_BIF_PACK_TF; + uns_fcode = (size_t) fcode; + } + else if (fcode == RS6000_BIF_UNPACK_IF + && TARGET_LONG_DOUBLE_128 + && !TARGET_IEEEQUAD) + { + icode = CODE_FOR_unpacktf; + fcode = RS6000_BIF_UNPACK_TF; + uns_fcode = (size_t) fcode; + } + + if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node) + target = NULL_RTX; + else if (target == 0 + || GET_MODE (target) != mode[0] + || !insn_data[icode].operand[0].predicate (target, mode[0])) + target = gen_reg_rtx (mode[0]); + + for (int i = 0; i < nargs; i++) + if (!insn_data[icode].operand[i+k].predicate (op[i], mode[i+k])) + op[i] = copy_to_mode_reg (mode[i+k], op[i]); + + rtx pat; + + switch (nargs) + { + case 0: + pat = (void_func + ? GEN_FCN (icode) () + : GEN_FCN (icode) (target)); + break; + case 1: + pat = (void_func + ? GEN_FCN (icode) (op[0]) + : GEN_FCN (icode) (target, op[0])); + break; + case 2: + pat = (void_func + ? GEN_FCN (icode) (op[0], op[1]) + : GEN_FCN (icode) (target, op[0], op[1])); + break; + case 3: + pat = (void_func + ? GEN_FCN (icode) (op[0], op[1], op[2]) + : GEN_FCN (icode) (target, op[0], op[1], op[2])); + break; + case 4: + pat = (void_func + ? GEN_FCN (icode) (op[0], op[1], op[2], op[3]) + : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3])); + break; + case 5: + pat = (void_func + ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]) + : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4])); + break; + case 6: + pat = (void_func + ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]) + : GEN_FCN (icode) (target, op[0], op[1], + op[2], op[3], op[4], op[5])); + break; + default: + gcc_assert (MAX_BUILTIN_ARGS == 6); + gcc_unreachable (); + } + + if (!pat) + return 0; + + emit_insn (pat); + return target; +} diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def index a8ebb4a..ae2760c 100644 --- a/gcc/config/rs6000/rs6000-builtins.def +++ b/gcc/config/rs6000/rs6000-builtins.def @@ -2550,13 +2550,13 @@ VBPERMD altivec_vbpermd {} const signed int __builtin_altivec_vclzlsbb_v16qi (vsc); - VCLZLSBB_V16QI vclzlsbb_v16qi {} + VCLZLSBB_V16QI vctzlsbb_v16qi {endian} const signed int __builtin_altivec_vclzlsbb_v4si (vsi); - VCLZLSBB_V4SI vclzlsbb_v4si {} + VCLZLSBB_V4SI vctzlsbb_v4si {endian} const signed int __builtin_altivec_vclzlsbb_v8hi (vss); - VCLZLSBB_V8HI vclzlsbb_v8hi {} + VCLZLSBB_V8HI vctzlsbb_v8hi {endian} const vsc __builtin_altivec_vctzb (vsc); VCTZB ctzv16qi2 {} @@ -2571,13 +2571,13 @@ VCTZW ctzv4si2 {} const signed int __builtin_altivec_vctzlsbb_v16qi (vsc); - VCTZLSBB_V16QI vctzlsbb_v16qi {} + VCTZLSBB_V16QI vclzlsbb_v16qi {endian} const signed int __builtin_altivec_vctzlsbb_v4si (vsi); - VCTZLSBB_V4SI vctzlsbb_v4si {} + VCTZLSBB_V4SI vclzlsbb_v4si {endian} const signed int __builtin_altivec_vctzlsbb_v8hi (vss); - VCTZLSBB_V8HI vctzlsbb_v8hi {} + VCTZLSBB_V8HI vclzlsbb_v8hi {endian} const signed int __builtin_altivec_vcmpaeb_p (vsc, vsc); VCMPAEB_P vector_ae_v16qi_p {} @@ -3387,25 +3387,25 @@ const vull __builtin_altivec_vpextd (vull, vull); VPEXTD vpextd {} - const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, \ - const int<4>); + const vuc __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, \ + const int<4>); VREPLACE_UN_UV2DI vreplace_un_v2di {} - const vui __builtin_altivec_vreplace_un_uv4si (vui, unsigned int, \ + const vuc __builtin_altivec_vreplace_un_uv4si (vui, unsigned int, \ const int<4>); VREPLACE_UN_UV4SI vreplace_un_v4si {} - const vd __builtin_altivec_vreplace_un_v2df (vd, double, const int<4>); + const vuc __builtin_altivec_vreplace_un_v2df (vd, double, const int<4>); VREPLACE_UN_V2DF vreplace_un_v2df {} - const vsll __builtin_altivec_vreplace_un_v2di (vsll, signed long long, \ - const int<4>); + const vuc __builtin_altivec_vreplace_un_v2di (vsll, signed long long, \ + const int<4>); VREPLACE_UN_V2DI vreplace_un_v2di {} - const vf __builtin_altivec_vreplace_un_v4sf (vf, float, const int<4>); + const vuc __builtin_altivec_vreplace_un_v4sf (vf, float, const int<4>); VREPLACE_UN_V4SF vreplace_un_v4sf {} - const vsi __builtin_altivec_vreplace_un_v4si (vsi, signed int, const int<4>); + const vuc __builtin_altivec_vreplace_un_v4si (vsi, signed int, const int<4>); VREPLACE_UN_V4SI vreplace_un_v4si {} const vull __builtin_altivec_vreplace_uv2di (vull, unsigned long long, \ @@ -3497,6 +3497,9 @@ const signed int __builtin_altivec_vstrihr_p (vss); VSTRIHR_P vstrir_p_v8hi {} + const vuq __builtin_vsx_vmsumcud (vull, vull, vuq); + VMSUMCUD vmsumcud {} + const signed int __builtin_vsx_xvtlsbb_all_ones (vsc); XVTLSBB_ONES xvtlsbbo {} diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc index 145421a..d2e480a 100644 --- a/gcc/config/rs6000/rs6000-c.cc +++ b/gcc/config/rs6000/rs6000-c.cc @@ -623,7 +623,11 @@ rs6000_cpu_cpp_builtins (cpp_reader *pfile) if (TARGET_FRSQRTES) builtin_define ("__RSQRTEF__"); if (TARGET_FLOAT128_TYPE) - builtin_define ("__FLOAT128_TYPE__"); + builtin_define ("__FLOAT128_TYPE__"); + if (ibm128_float_type_node) + builtin_define ("__SIZEOF_IBM128__=16"); + if (ieee128_float_type_node) + builtin_define ("__SIZEOF_FLOAT128__=16"); #ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB builtin_define ("__BUILTIN_CPU_SUPPORTS__"); #endif @@ -939,37 +943,25 @@ altivec_build_resolved_builtin (tree *args, int n, tree fntype, tree ret_type, enum resolution { unresolved, resolved, resolved_bad }; /* Resolve an overloaded vec_mul call and return a tree expression for the - resolved call if successful. NARGS is the number of arguments to the call. - ARGLIST contains the arguments. RES must be set to indicate the status of + resolved call if successful. ARGS contains the arguments to the call. + TYPES contains their types. RES must be set to indicate the status of the resolution attempt. LOC contains statement location information. */ static tree -resolve_vec_mul (resolution *res, vec<tree, va_gc> *arglist, unsigned nargs, - location_t loc) +resolve_vec_mul (resolution *res, tree *args, tree *types, location_t loc) { /* vec_mul needs to be special cased because there are no instructions for it for the {un}signed char, {un}signed short, and {un}signed int types. */ - if (nargs != 2) - { - error ("builtin %qs only accepts 2 arguments", "vec_mul"); - *res = resolved; - return error_mark_node; - } - - tree arg0 = (*arglist)[0]; - tree arg0_type = TREE_TYPE (arg0); - tree arg1 = (*arglist)[1]; - tree arg1_type = TREE_TYPE (arg1); /* Both arguments must be vectors and the types must be compatible. */ - if (TREE_CODE (arg0_type) != VECTOR_TYPE - || !lang_hooks.types_compatible_p (arg0_type, arg1_type)) + if (TREE_CODE (types[0]) != VECTOR_TYPE + || !lang_hooks.types_compatible_p (types[0], types[1])) { *res = resolved_bad; return error_mark_node; } - switch (TYPE_MODE (TREE_TYPE (arg0_type))) + switch (TYPE_MODE (TREE_TYPE (types[0]))) { case E_QImode: case E_HImode: @@ -978,21 +970,21 @@ resolve_vec_mul (resolution *res, vec<tree, va_gc> *arglist, unsigned nargs, case E_TImode: /* For scalar types just use a multiply expression. */ *res = resolved; - return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0, - fold_convert (TREE_TYPE (arg0), arg1)); + return fold_build2_loc (loc, MULT_EXPR, types[0], args[0], + fold_convert (types[0], args[1])); case E_SFmode: { /* For floats use the xvmulsp instruction directly. */ *res = resolved; tree call = rs6000_builtin_decls[RS6000_BIF_XVMULSP]; - return build_call_expr (call, 2, arg0, arg1); + return build_call_expr (call, 2, args[0], args[1]); } case E_DFmode: { /* For doubles use the xvmuldp instruction directly. */ *res = resolved; tree call = rs6000_builtin_decls[RS6000_BIF_XVMULDP]; - return build_call_expr (call, 2, arg0, arg1); + return build_call_expr (call, 2, args[0], args[1]); } /* Other types are errors. */ default: @@ -1002,37 +994,25 @@ resolve_vec_mul (resolution *res, vec<tree, va_gc> *arglist, unsigned nargs, } /* Resolve an overloaded vec_cmpne call and return a tree expression for the - resolved call if successful. NARGS is the number of arguments to the call. - ARGLIST contains the arguments. RES must be set to indicate the status of + resolved call if successful. ARGS contains the arguments to the call. + TYPES contains their types. RES must be set to indicate the status of the resolution attempt. LOC contains statement location information. */ static tree -resolve_vec_cmpne (resolution *res, vec<tree, va_gc> *arglist, unsigned nargs, - location_t loc) +resolve_vec_cmpne (resolution *res, tree *args, tree *types, location_t loc) { /* vec_cmpne needs to be special cased because there are no instructions for it (prior to power 9). */ - if (nargs != 2) - { - error ("builtin %qs only accepts 2 arguments", "vec_cmpne"); - *res = resolved; - return error_mark_node; - } - - tree arg0 = (*arglist)[0]; - tree arg0_type = TREE_TYPE (arg0); - tree arg1 = (*arglist)[1]; - tree arg1_type = TREE_TYPE (arg1); /* Both arguments must be vectors and the types must be compatible. */ - if (TREE_CODE (arg0_type) != VECTOR_TYPE - || !lang_hooks.types_compatible_p (arg0_type, arg1_type)) + if (TREE_CODE (types[0]) != VECTOR_TYPE + || !lang_hooks.types_compatible_p (types[0], types[1])) { *res = resolved_bad; return error_mark_node; } - machine_mode arg0_elt_mode = TYPE_MODE (TREE_TYPE (arg0_type)); + machine_mode arg0_elt_mode = TYPE_MODE (TREE_TYPE (types[0])); /* Power9 instructions provide the most efficient implementation of ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode @@ -1060,8 +1040,8 @@ resolve_vec_cmpne (resolution *res, vec<tree, va_gc> *arglist, unsigned nargs, /* call = vec_cmpeq (va, vb) result = vec_nor (call, call). */ vec<tree, va_gc> *params = make_tree_vector (); - vec_safe_push (params, arg0); - vec_safe_push (params, arg1); + vec_safe_push (params, args[0]); + vec_safe_push (params, args[1]); tree decl = rs6000_builtin_decls[RS6000_OVLD_VEC_CMPEQ]; tree call = altivec_resolve_overloaded_builtin (loc, decl, params); /* Use save_expr to ensure that operands used more than once @@ -1088,46 +1068,30 @@ resolve_vec_cmpne (resolution *res, vec<tree, va_gc> *arglist, unsigned nargs, return error_mark_node; } -/* Resolve an overloaded vec_adde or vec_sube call and return a tree - expression for the resolved call if successful. NARGS is the number of - arguments to the call. ARGLIST contains the arguments. RES must be set - to indicate the status of the resolution attempt. LOC contains statement - location information. */ +/* Resolve an overloaded vec_adde or vec_sube call and return a tree expression + for the resolved call if successful. ARGS contains the arguments to the + call. TYPES contains their arguments. RES must be set to indicate the + status of the resolution attempt. LOC contains statement location + information. */ static tree resolve_vec_adde_sube (resolution *res, rs6000_gen_builtins fcode, - vec<tree, va_gc> *arglist, unsigned nargs, - location_t loc) + tree *args, tree *types, location_t loc) { /* vec_adde needs to be special cased because there is no instruction for the {un}signed int version. */ - if (nargs != 3) - { - const char *name; - name = fcode == RS6000_OVLD_VEC_ADDE ? "vec_adde" : "vec_sube"; - error ("builtin %qs only accepts 3 arguments", name); - *res = resolved; - return error_mark_node; - } - - tree arg0 = (*arglist)[0]; - tree arg0_type = TREE_TYPE (arg0); - tree arg1 = (*arglist)[1]; - tree arg1_type = TREE_TYPE (arg1); - tree arg2 = (*arglist)[2]; - tree arg2_type = TREE_TYPE (arg2); /* All 3 arguments must be vectors of (signed or unsigned) (int or __int128) and the types must be compatible. */ - if (TREE_CODE (arg0_type) != VECTOR_TYPE - || !lang_hooks.types_compatible_p (arg0_type, arg1_type) - || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + if (TREE_CODE (types[0]) != VECTOR_TYPE + || !lang_hooks.types_compatible_p (types[0], types[1]) + || !lang_hooks.types_compatible_p (types[1], types[2])) { *res = resolved_bad; return error_mark_node; } - switch (TYPE_MODE (TREE_TYPE (arg0_type))) + switch (TYPE_MODE (TREE_TYPE (types[0]))) { /* For {un}signed ints, vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb), @@ -1137,8 +1101,8 @@ resolve_vec_adde_sube (resolution *res, rs6000_gen_builtins fcode, case E_SImode: { vec<tree, va_gc> *params = make_tree_vector (); - vec_safe_push (params, arg0); - vec_safe_push (params, arg1); + vec_safe_push (params, args[0]); + vec_safe_push (params, args[1]); tree add_sub_builtin; if (fcode == RS6000_OVLD_VEC_ADDE) @@ -1148,10 +1112,10 @@ resolve_vec_adde_sube (resolution *res, rs6000_gen_builtins fcode, tree call = altivec_resolve_overloaded_builtin (loc, add_sub_builtin, params); - tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); - tree ones_vector = build_vector_from_val (arg0_type, const1); - tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, - arg2, ones_vector); + tree const1 = build_int_cstu (TREE_TYPE (types[0]), 1); + tree ones_vector = build_vector_from_val (types[0], const1); + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, types[0], + args[2], ones_vector); params = make_tree_vector (); vec_safe_push (params, call); vec_safe_push (params, and_expr); @@ -1175,45 +1139,29 @@ resolve_vec_adde_sube (resolution *res, rs6000_gen_builtins fcode, } /* Resolve an overloaded vec_addec or vec_subec call and return a tree - expression for the resolved call if successful. NARGS is the number of - arguments to the call. ARGLIST contains the arguments. RES must be set - to indicate the status of the resolution attempt. LOC contains statement - location information. */ + expression for the resolved call if successful. ARGS contains the arguments + to the call. TYPES contains their types. RES must be set to indicate the + status of the resolution attempt. LOC contains statement location + information. */ static tree resolve_vec_addec_subec (resolution *res, rs6000_gen_builtins fcode, - vec<tree, va_gc> *arglist, unsigned nargs, - location_t loc) + tree *args, tree *types, location_t loc) { /* vec_addec and vec_subec needs to be special cased because there is no instruction for the (un)signed int version. */ - if (nargs != 3) - { - const char *name; - name = fcode == RS6000_OVLD_VEC_ADDEC ? "vec_addec" : "vec_subec"; - error ("builtin %qs only accepts 3 arguments", name); - *res = resolved; - return error_mark_node; - } - - tree arg0 = (*arglist)[0]; - tree arg0_type = TREE_TYPE (arg0); - tree arg1 = (*arglist)[1]; - tree arg1_type = TREE_TYPE (arg1); - tree arg2 = (*arglist)[2]; - tree arg2_type = TREE_TYPE (arg2); /* All 3 arguments must be vectors of (signed or unsigned) (int or __int128) and the types must be compatible. */ - if (TREE_CODE (arg0_type) != VECTOR_TYPE - || !lang_hooks.types_compatible_p (arg0_type, arg1_type) - || !lang_hooks.types_compatible_p (arg1_type, arg2_type)) + if (TREE_CODE (types[0]) != VECTOR_TYPE + || !lang_hooks.types_compatible_p (types[0], types[1]) + || !lang_hooks.types_compatible_p (types[1], types[2])) { *res = resolved_bad; return error_mark_node; } - switch (TYPE_MODE (TREE_TYPE (arg0_type))) + switch (TYPE_MODE (TREE_TYPE (types[0]))) { /* For {un}signed ints, vec_addec (va, vb, carryv) == @@ -1224,11 +1172,11 @@ resolve_vec_addec_subec (resolution *res, rs6000_gen_builtins fcode, { /* Use save_expr to ensure that operands used more than once that may have side effects (like calls) are only evaluated once. */ - arg0 = save_expr (arg0); - arg1 = save_expr (arg1); + args[0] = save_expr (args[0]); + args[1] = save_expr (args[1]); vec<tree, va_gc> *params = make_tree_vector (); - vec_safe_push (params, arg0); - vec_safe_push (params, arg1); + vec_safe_push (params, args[0]); + vec_safe_push (params, args[1]); tree as_c_builtin; if (fcode == RS6000_OVLD_VEC_ADDEC) @@ -1239,8 +1187,8 @@ resolve_vec_addec_subec (resolution *res, rs6000_gen_builtins fcode, tree call1 = altivec_resolve_overloaded_builtin (loc, as_c_builtin, params); params = make_tree_vector (); - vec_safe_push (params, arg0); - vec_safe_push (params, arg1); + vec_safe_push (params, args[0]); + vec_safe_push (params, args[1]); tree as_builtin; if (fcode == RS6000_OVLD_VEC_ADDEC) @@ -1250,10 +1198,10 @@ resolve_vec_addec_subec (resolution *res, rs6000_gen_builtins fcode, tree call2 = altivec_resolve_overloaded_builtin (loc, as_builtin, params); - tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1); - tree ones_vector = build_vector_from_val (arg0_type, const1); - tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type, - arg2, ones_vector); + tree const1 = build_int_cstu (TREE_TYPE (types[0]), 1); + tree ones_vector = build_vector_from_val (types[0], const1); + tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, types[0], + args[2], ones_vector); params = make_tree_vector (); vec_safe_push (params, call2); vec_safe_push (params, and_expr); @@ -1783,78 +1731,22 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, "%<vec_lvsr%> is deprecated for little endian; use " "assignment for unaligned loads and stores"); - /* Some overloads require special handling. */ - /* FIXME: Could we simplify the helper functions if we gathered arguments - and types into arrays first? */ - tree returned_expr = NULL; - resolution res = unresolved; - vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist); - unsigned int nargs = vec_safe_length (arglist); - - switch (fcode) - { - case RS6000_OVLD_VEC_MUL: - returned_expr = resolve_vec_mul (&res, arglist, nargs, loc); - break; - - case RS6000_OVLD_VEC_CMPNE: - returned_expr = resolve_vec_cmpne (&res, arglist, nargs, loc); - break; - - case RS6000_OVLD_VEC_ADDE: - case RS6000_OVLD_VEC_SUBE: - returned_expr = resolve_vec_adde_sube (&res, fcode, arglist, nargs, loc); - break; - - case RS6000_OVLD_VEC_ADDEC: - case RS6000_OVLD_VEC_SUBEC: - returned_expr = resolve_vec_addec_subec (&res, fcode, arglist, nargs, - loc); - break; - - case RS6000_OVLD_VEC_SPLATS: - case RS6000_OVLD_VEC_PROMOTE: - returned_expr = resolve_vec_splats (&res, fcode, arglist, nargs); - break; - - case RS6000_OVLD_VEC_EXTRACT: - returned_expr = resolve_vec_extract (&res, arglist, nargs, loc); - break; - - case RS6000_OVLD_VEC_INSERT: - returned_expr = resolve_vec_insert (&res, arglist, nargs, loc); - break; - - case RS6000_OVLD_VEC_STEP: - returned_expr = resolve_vec_step (&res, arglist, nargs); - break; - - default: - ; - } - - if (res == resolved) - return returned_expr; - - /* "Regular" built-in functions and overloaded functions share a namespace - for some arrays, like rs6000_builtin_decls. But rs6000_overload_info - only has information for the overloaded functions, so we need an - adjusted index for that. */ - unsigned int adj_fcode = fcode - RS6000_OVLD_NONE; - - if (res == resolved_bad) - { - const char *name = rs6000_overload_info[adj_fcode].ovld_name; - error ("invalid parameter combination for AltiVec intrinsic %qs", name); - return error_mark_node; - } - /* Gather the arguments and their types into arrays for easier handling. */ tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); tree types[MAX_OVLD_ARGS]; tree args[MAX_OVLD_ARGS]; unsigned int n; + /* Count the number of expected arguments. */ + unsigned expected_args = 0; + for (tree chain = fnargs; + chain && !VOID_TYPE_P (TREE_VALUE (chain)); + chain = TREE_CHAIN (chain)) + expected_args++; + + vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist); + unsigned int nargs = vec_safe_length (arglist); + for (n = 0; !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs; fnargs = TREE_CHAIN (fnargs), n++) @@ -1915,10 +1807,72 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl, } /* If the number of arguments did not match the prototype, return NULL - and the generic code will issue the appropriate error message. */ - if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs) + and the generic code will issue the appropriate error message. Skip + this test for functions where we don't fully describe all the possible + overload signatures in rs6000-overload.def (because they aren't relevant + to the expansion here). If we don't, we get confusing error messages. */ + /* As an example, for vec_splats we have: + +; There are no actual builtins for vec_splats. There is special handling for +; this in altivec_resolve_overloaded_builtin in rs6000-c.cc, where the call +; is replaced by a constructor. The single overload here causes +; __builtin_vec_splats to be registered with the front end so that can happen. +[VEC_SPLATS, vec_splats, __builtin_vec_splats] + vsi __builtin_vec_splats (vsi); + ABS_V4SI SPLATS_FAKERY + + So even though __builtin_vec_splats accepts all vector types, the + infrastructure cheats and just records one prototype. We end up getting + an error message that refers to this specific prototype even when we + are handling a different argument type. That is completely confusing + to the user, so it's best to let these cases be handled individually + in the resolve_vec_splats, etc., helper functions. */ + + if (n != expected_args + && !(fcode == RS6000_OVLD_VEC_PROMOTE + || fcode == RS6000_OVLD_VEC_SPLATS + || fcode == RS6000_OVLD_VEC_EXTRACT + || fcode == RS6000_OVLD_VEC_INSERT + || fcode == RS6000_OVLD_VEC_STEP)) return NULL; + /* Some overloads require special handling. */ + tree returned_expr = NULL; + resolution res = unresolved; + + if (fcode == RS6000_OVLD_VEC_MUL) + returned_expr = resolve_vec_mul (&res, args, types, loc); + else if (fcode == RS6000_OVLD_VEC_CMPNE) + returned_expr = resolve_vec_cmpne (&res, args, types, loc); + else if (fcode == RS6000_OVLD_VEC_ADDE || fcode == RS6000_OVLD_VEC_SUBE) + returned_expr = resolve_vec_adde_sube (&res, fcode, args, types, loc); + else if (fcode == RS6000_OVLD_VEC_ADDEC || fcode == RS6000_OVLD_VEC_SUBEC) + returned_expr = resolve_vec_addec_subec (&res, fcode, args, types, loc); + else if (fcode == RS6000_OVLD_VEC_SPLATS || fcode == RS6000_OVLD_VEC_PROMOTE) + returned_expr = resolve_vec_splats (&res, fcode, arglist, nargs); + else if (fcode == RS6000_OVLD_VEC_EXTRACT) + returned_expr = resolve_vec_extract (&res, arglist, nargs, loc); + else if (fcode == RS6000_OVLD_VEC_INSERT) + returned_expr = resolve_vec_insert (&res, arglist, nargs, loc); + else if (fcode == RS6000_OVLD_VEC_STEP) + returned_expr = resolve_vec_step (&res, arglist, nargs); + + if (res == resolved) + return returned_expr; + + /* "Regular" built-in functions and overloaded functions share a namespace + for some arrays, like rs6000_builtin_decls. But rs6000_overload_info + only has information for the overloaded functions, so we need an + adjusted index for that. */ + unsigned int adj_fcode = fcode - RS6000_OVLD_NONE; + + if (res == resolved_bad) + { + const char *name = rs6000_overload_info[adj_fcode].ovld_name; + error ("invalid parameter combination for AltiVec intrinsic %qs", name); + return error_mark_node; + } + bool unsupported_builtin = false; rs6000_gen_builtins instance_code; bool supported = false; diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc index 5c870d4..f06c692 100644 --- a/gcc/config/rs6000/rs6000-call.cc +++ b/gcc/config/rs6000/rs6000-call.cc @@ -89,85 +89,6 @@ #define TARGET_NO_PROTOTYPE 0 #endif -/* Used by __builtin_cpu_is(), mapping from PLATFORM names to values. */ -static const struct -{ - const char *cpu; - unsigned int cpuid; -} cpu_is_info[] = { - { "power10", PPC_PLATFORM_POWER10 }, - { "power9", PPC_PLATFORM_POWER9 }, - { "power8", PPC_PLATFORM_POWER8 }, - { "power7", PPC_PLATFORM_POWER7 }, - { "power6x", PPC_PLATFORM_POWER6X }, - { "power6", PPC_PLATFORM_POWER6 }, - { "power5+", PPC_PLATFORM_POWER5_PLUS }, - { "power5", PPC_PLATFORM_POWER5 }, - { "ppc970", PPC_PLATFORM_PPC970 }, - { "power4", PPC_PLATFORM_POWER4 }, - { "ppca2", PPC_PLATFORM_PPCA2 }, - { "ppc476", PPC_PLATFORM_PPC476 }, - { "ppc464", PPC_PLATFORM_PPC464 }, - { "ppc440", PPC_PLATFORM_PPC440 }, - { "ppc405", PPC_PLATFORM_PPC405 }, - { "ppc-cell-be", PPC_PLATFORM_CELL_BE } -}; - -/* Used by __builtin_cpu_supports(), mapping from HWCAP names to masks. */ -static const struct -{ - const char *hwcap; - int mask; - unsigned int id; -} cpu_supports_info[] = { - /* AT_HWCAP masks. */ - { "4xxmac", PPC_FEATURE_HAS_4xxMAC, 0 }, - { "altivec", PPC_FEATURE_HAS_ALTIVEC, 0 }, - { "arch_2_05", PPC_FEATURE_ARCH_2_05, 0 }, - { "arch_2_06", PPC_FEATURE_ARCH_2_06, 0 }, - { "archpmu", PPC_FEATURE_PERFMON_COMPAT, 0 }, - { "booke", PPC_FEATURE_BOOKE, 0 }, - { "cellbe", PPC_FEATURE_CELL_BE, 0 }, - { "dfp", PPC_FEATURE_HAS_DFP, 0 }, - { "efpdouble", PPC_FEATURE_HAS_EFP_DOUBLE, 0 }, - { "efpsingle", PPC_FEATURE_HAS_EFP_SINGLE, 0 }, - { "fpu", PPC_FEATURE_HAS_FPU, 0 }, - { "ic_snoop", PPC_FEATURE_ICACHE_SNOOP, 0 }, - { "mmu", PPC_FEATURE_HAS_MMU, 0 }, - { "notb", PPC_FEATURE_NO_TB, 0 }, - { "pa6t", PPC_FEATURE_PA6T, 0 }, - { "power4", PPC_FEATURE_POWER4, 0 }, - { "power5", PPC_FEATURE_POWER5, 0 }, - { "power5+", PPC_FEATURE_POWER5_PLUS, 0 }, - { "power6x", PPC_FEATURE_POWER6_EXT, 0 }, - { "ppc32", PPC_FEATURE_32, 0 }, - { "ppc601", PPC_FEATURE_601_INSTR, 0 }, - { "ppc64", PPC_FEATURE_64, 0 }, - { "ppcle", PPC_FEATURE_PPC_LE, 0 }, - { "smt", PPC_FEATURE_SMT, 0 }, - { "spe", PPC_FEATURE_HAS_SPE, 0 }, - { "true_le", PPC_FEATURE_TRUE_LE, 0 }, - { "ucache", PPC_FEATURE_UNIFIED_CACHE, 0 }, - { "vsx", PPC_FEATURE_HAS_VSX, 0 }, - - /* AT_HWCAP2 masks. */ - { "arch_2_07", PPC_FEATURE2_ARCH_2_07, 1 }, - { "dscr", PPC_FEATURE2_HAS_DSCR, 1 }, - { "ebb", PPC_FEATURE2_HAS_EBB, 1 }, - { "htm", PPC_FEATURE2_HAS_HTM, 1 }, - { "htm-nosc", PPC_FEATURE2_HTM_NOSC, 1 }, - { "htm-no-suspend", PPC_FEATURE2_HTM_NO_SUSPEND, 1 }, - { "isel", PPC_FEATURE2_HAS_ISEL, 1 }, - { "tar", PPC_FEATURE2_HAS_TAR, 1 }, - { "vcrypto", PPC_FEATURE2_HAS_VEC_CRYPTO, 1 }, - { "arch_3_00", PPC_FEATURE2_ARCH_3_00, 1 }, - { "ieee128", PPC_FEATURE2_HAS_IEEE128, 1 }, - { "darn", PPC_FEATURE2_DARN, 1 }, - { "scv", PPC_FEATURE2_SCV, 1 }, - { "arch_3_1", PPC_FEATURE2_ARCH_3_1, 1 }, - { "mma", PPC_FEATURE2_MMA, 1 }, -}; - /* Nonzero if we can use a floating-point register to pass this arg. */ #define USE_FP_FOR_ARG_P(CUM,MODE) \ (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \ @@ -2880,188 +2801,6 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, return build_va_arg_indirect_ref (addr); } -/* Debug utility to translate a type node to a single textual token. */ -static -const char *rs6000_type_string (tree type_node) -{ - if (type_node == void_type_node) - return "void"; - else if (type_node == long_integer_type_node) - return "long"; - else if (type_node == long_unsigned_type_node) - return "ulong"; - else if (type_node == long_long_integer_type_node) - return "longlong"; - else if (type_node == long_long_unsigned_type_node) - return "ulonglong"; - else if (type_node == bool_V2DI_type_node) - return "vbll"; - else if (type_node == bool_V4SI_type_node) - return "vbi"; - else if (type_node == bool_V8HI_type_node) - return "vbs"; - else if (type_node == bool_V16QI_type_node) - return "vbc"; - else if (type_node == bool_int_type_node) - return "bool"; - else if (type_node == dfloat64_type_node) - return "_Decimal64"; - else if (type_node == double_type_node) - return "double"; - else if (type_node == intDI_type_node) - return "sll"; - else if (type_node == intHI_type_node) - return "ss"; - else if (type_node == ibm128_float_type_node) - return "__ibm128"; - else if (type_node == opaque_V4SI_type_node) - return "opaque"; - else if (POINTER_TYPE_P (type_node)) - return "void*"; - else if (type_node == intQI_type_node || type_node == char_type_node) - return "sc"; - else if (type_node == dfloat32_type_node) - return "_Decimal32"; - else if (type_node == float_type_node) - return "float"; - else if (type_node == intSI_type_node || type_node == integer_type_node) - return "si"; - else if (type_node == dfloat128_type_node) - return "_Decimal128"; - else if (type_node == long_double_type_node) - return "longdouble"; - else if (type_node == intTI_type_node) - return "sq"; - else if (type_node == unsigned_intDI_type_node) - return "ull"; - else if (type_node == unsigned_intHI_type_node) - return "us"; - else if (type_node == unsigned_intQI_type_node) - return "uc"; - else if (type_node == unsigned_intSI_type_node) - return "ui"; - else if (type_node == unsigned_intTI_type_node) - return "uq"; - else if (type_node == unsigned_V1TI_type_node) - return "vuq"; - else if (type_node == unsigned_V2DI_type_node) - return "vull"; - else if (type_node == unsigned_V4SI_type_node) - return "vui"; - else if (type_node == unsigned_V8HI_type_node) - return "vus"; - else if (type_node == unsigned_V16QI_type_node) - return "vuc"; - else if (type_node == V16QI_type_node) - return "vsc"; - else if (type_node == V1TI_type_node) - return "vsq"; - else if (type_node == V2DF_type_node) - return "vd"; - else if (type_node == V2DI_type_node) - return "vsll"; - else if (type_node == V4SF_type_node) - return "vf"; - else if (type_node == V4SI_type_node) - return "vsi"; - else if (type_node == V8HI_type_node) - return "vss"; - else if (type_node == pixel_V8HI_type_node) - return "vp"; - else if (type_node == pcvoid_type_node) - return "voidc*"; - else if (type_node == float128_type_node) - return "_Float128"; - else if (type_node == vector_pair_type_node) - return "__vector_pair"; - else if (type_node == vector_quad_type_node) - return "__vector_quad"; - - return "unknown"; -} - -static rtx -altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) -{ - rtx pat, scratch; - tree cr6_form = CALL_EXPR_ARG (exp, 0); - tree arg0 = CALL_EXPR_ARG (exp, 1); - tree arg1 = CALL_EXPR_ARG (exp, 2); - rtx op0 = expand_normal (arg0); - rtx op1 = expand_normal (arg1); - machine_mode tmode = SImode; - machine_mode mode0 = insn_data[icode].operand[1].mode; - machine_mode mode1 = insn_data[icode].operand[2].mode; - int cr6_form_int; - - if (TREE_CODE (cr6_form) != INTEGER_CST) - { - error ("argument 1 of %qs must be a constant", - "__builtin_altivec_predicate"); - return const0_rtx; - } - else - cr6_form_int = TREE_INT_CST_LOW (cr6_form); - - gcc_assert (mode0 == mode1); - - /* If we have invalid arguments, bail out before generating bad rtl. */ - if (arg0 == error_mark_node || arg1 == error_mark_node) - return const0_rtx; - - if (target == 0 - || GET_MODE (target) != tmode - || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) - target = gen_reg_rtx (tmode); - - if (! (*insn_data[icode].operand[1].predicate) (op0, mode0)) - op0 = copy_to_mode_reg (mode0, op0); - if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) - op1 = copy_to_mode_reg (mode1, op1); - - /* Note that for many of the relevant operations (e.g. cmpne or - cmpeq) with float or double operands, it makes more sense for the - mode of the allocated scratch register to select a vector of - integer. But the choice to copy the mode of operand 0 was made - long ago and there are no plans to change it. */ - scratch = gen_reg_rtx (mode0); - - pat = GEN_FCN (icode) (scratch, op0, op1); - if (! pat) - return 0; - emit_insn (pat); - - /* The vec_any* and vec_all* predicates use the same opcodes for two - different operations, but the bits in CR6 will be different - depending on what information we want. So we have to play tricks - with CR6 to get the right bits out. - - If you think this is disgusting, look at the specs for the - AltiVec predicates. */ - - switch (cr6_form_int) - { - case 0: - emit_insn (gen_cr6_test_for_zero (target)); - break; - case 1: - emit_insn (gen_cr6_test_for_zero_reverse (target)); - break; - case 2: - emit_insn (gen_cr6_test_for_lt (target)); - break; - case 3: - emit_insn (gen_cr6_test_for_lt_reverse (target)); - break; - default: - error ("argument 1 of %qs is out of range", - "__builtin_altivec_predicate"); - break; - } - - return target; -} - rtx swap_endian_selector_for_mode (machine_mode mode) { @@ -3100,3269 +2839,6 @@ swap_endian_selector_for_mode (machine_mode mode) gen_rtvec_v (16, perm))); } -/* Return the correct ICODE value depending on whether we are - setting or reading the HTM SPRs. */ -static inline enum insn_code -rs6000_htm_spr_icode (bool nonvoid) -{ - if (nonvoid) - return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; - else - return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; -} - -/* Expand vec_init builtin. */ -static rtx -altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) -{ - machine_mode tmode = TYPE_MODE (type); - machine_mode inner_mode = GET_MODE_INNER (tmode); - int i, n_elt = GET_MODE_NUNITS (tmode); - - gcc_assert (VECTOR_MODE_P (tmode)); - gcc_assert (n_elt == call_expr_nargs (exp)); - - if (!target || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); - - /* If we have a vector compromised of a single element, such as V1TImode, do - the initialization directly. */ - if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode)) - { - rtx x = expand_normal (CALL_EXPR_ARG (exp, 0)); - emit_move_insn (target, gen_lowpart (tmode, x)); - } - else - { - rtvec v = rtvec_alloc (n_elt); - - for (i = 0; i < n_elt; ++i) - { - rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); - RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); - } - - rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v)); - } - - return target; -} - -/* Return the integer constant in ARG. Constrain it to be in the range - of the subparts of VEC_TYPE; issue an error if not. */ - -static int -get_element_number (tree vec_type, tree arg) -{ - unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; - - if (!tree_fits_uhwi_p (arg) - || (elt = tree_to_uhwi (arg), elt > max)) - { - error ("selector must be an integer constant in the range [0, %wi]", max); - return 0; - } - - return elt; -} - -/* Expand vec_set builtin. */ -static rtx -altivec_expand_vec_set_builtin (tree exp) -{ - machine_mode tmode, mode1; - tree arg0, arg1, arg2; - int elt; - rtx op0, op1; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - arg2 = CALL_EXPR_ARG (exp, 2); - - tmode = TYPE_MODE (TREE_TYPE (arg0)); - mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); - gcc_assert (VECTOR_MODE_P (tmode)); - - op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); - op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); - elt = get_element_number (TREE_TYPE (arg0), arg2); - - if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) - op1 = convert_modes (mode1, GET_MODE (op1), op1, true); - - op0 = force_reg (tmode, op0); - op1 = force_reg (mode1, op1); - - rs6000_expand_vector_set (op0, op1, GEN_INT (elt)); - - return op0; -} - -/* Expand vec_ext builtin. */ -static rtx -altivec_expand_vec_ext_builtin (tree exp, rtx target) -{ - machine_mode tmode, mode0; - tree arg0, arg1; - rtx op0; - rtx op1; - - arg0 = CALL_EXPR_ARG (exp, 0); - arg1 = CALL_EXPR_ARG (exp, 1); - - op0 = expand_normal (arg0); - op1 = expand_normal (arg1); - - if (TREE_CODE (arg1) == INTEGER_CST) - { - unsigned HOST_WIDE_INT elt; - unsigned HOST_WIDE_INT size = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)); - unsigned int truncated_selector; - /* Even if !tree_fits_uhwi_p (arg1)), TREE_INT_CST_LOW (arg0) - returns low-order bits of INTEGER_CST for modulo indexing. */ - elt = TREE_INT_CST_LOW (arg1); - truncated_selector = elt % size; - op1 = GEN_INT (truncated_selector); - } - - tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); - mode0 = TYPE_MODE (TREE_TYPE (arg0)); - gcc_assert (VECTOR_MODE_P (mode0)); - - op0 = force_reg (mode0, op0); - - if (optimize || !target || !register_operand (target, tmode)) - target = gen_reg_rtx (tmode); - - rs6000_expand_vector_extract (target, op0, op1); - - return target; -} - -/* Raise an error message for a builtin function that is called without the - appropriate target options being set. */ - -void -rs6000_invalid_builtin (enum rs6000_gen_builtins fncode) -{ - size_t j = (size_t) fncode; - const char *name = rs6000_builtin_info[j].bifname; - - switch (rs6000_builtin_info[j].enable) - { - case ENB_P5: - error ("%qs requires the %qs option", name, "-mcpu=power5"); - break; - case ENB_P6: - error ("%qs requires the %qs option", name, "-mcpu=power6"); - break; - case ENB_P6_64: - error ("%qs requires the %qs option and either the %qs or %qs option", - name, "-mcpu=power6", "-m64", "-mpowerpc64"); - break; - case ENB_ALTIVEC: - error ("%qs requires the %qs option", name, "-maltivec"); - break; - case ENB_CELL: - error ("%qs requires the %qs option", name, "-mcpu=cell"); - break; - case ENB_VSX: - error ("%qs requires the %qs option", name, "-mvsx"); - break; - case ENB_P7: - error ("%qs requires the %qs option", name, "-mcpu=power7"); - break; - case ENB_P7_64: - error ("%qs requires the %qs option and either the %qs or %qs option", - name, "-mcpu=power7", "-m64", "-mpowerpc64"); - break; - case ENB_P8: - error ("%qs requires the %qs option", name, "-mcpu=power8"); - break; - case ENB_P8V: - error ("%qs requires the %qs and %qs options", name, "-mcpu=power8", - "-mvsx"); - break; - case ENB_P9: - error ("%qs requires the %qs option", name, "-mcpu=power9"); - break; - case ENB_P9_64: - error ("%qs requires the %qs option and either the %qs or %qs option", - name, "-mcpu=power9", "-m64", "-mpowerpc64"); - break; - case ENB_P9V: - error ("%qs requires the %qs and %qs options", name, "-mcpu=power9", - "-mvsx"); - break; - case ENB_IEEE128_HW: - error ("%qs requires quad-precision floating-point arithmetic", name); - break; - case ENB_DFP: - error ("%qs requires the %qs option", name, "-mhard-dfp"); - break; - case ENB_CRYPTO: - error ("%qs requires the %qs option", name, "-mcrypto"); - break; - case ENB_HTM: - error ("%qs requires the %qs option", name, "-mhtm"); - break; - case ENB_P10: - error ("%qs requires the %qs option", name, "-mcpu=power10"); - break; - case ENB_P10_64: - error ("%qs requires the %qs option and either the %qs or %qs option", - name, "-mcpu=power10", "-m64", "-mpowerpc64"); - break; - case ENB_MMA: - error ("%qs requires the %qs option", name, "-mmma"); - break; - default: - case ENB_ALWAYS: - gcc_unreachable (); - } -} - -/* Target hook for early folding of built-ins, shamelessly stolen - from ia64.cc. */ - -tree -rs6000_fold_builtin (tree fndecl ATTRIBUTE_UNUSED, - int n_args ATTRIBUTE_UNUSED, - tree *args ATTRIBUTE_UNUSED, - bool ignore ATTRIBUTE_UNUSED) -{ -#ifdef SUBTARGET_FOLD_BUILTIN - return SUBTARGET_FOLD_BUILTIN (fndecl, n_args, args, ignore); -#else - return NULL_TREE; -#endif -} - -/* Helper function to handle the gimple folding of a vector compare - operation. This sets up true/false vectors, and uses the - VEC_COND_EXPR operation. - CODE indicates which comparison is to be made. (EQ, GT, ...). - TYPE indicates the type of the result. - Code is inserted before GSI. */ -static tree -fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1, - gimple_stmt_iterator *gsi) -{ - tree cmp_type = truth_type_for (type); - tree zero_vec = build_zero_cst (type); - tree minus_one_vec = build_minus_one_cst (type); - tree temp = create_tmp_reg_or_ssa_name (cmp_type); - gimple *g = gimple_build_assign (temp, code, arg0, arg1); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec); -} - -/* Helper function to handle the in-between steps for the - vector compare built-ins. */ -static void -fold_compare_helper (gimple_stmt_iterator *gsi, tree_code code, gimple *stmt) -{ - tree arg0 = gimple_call_arg (stmt, 0); - tree arg1 = gimple_call_arg (stmt, 1); - tree lhs = gimple_call_lhs (stmt); - tree cmp = fold_build_vec_cmp (code, TREE_TYPE (lhs), arg0, arg1, gsi); - gimple *g = gimple_build_assign (lhs, cmp); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); -} - -/* Helper function to map V2DF and V4SF types to their - integral equivalents (V2DI and V4SI). */ -tree map_to_integral_tree_type (tree input_tree_type) -{ - if (INTEGRAL_TYPE_P (TREE_TYPE (input_tree_type))) - return input_tree_type; - else - { - if (types_compatible_p (TREE_TYPE (input_tree_type), - TREE_TYPE (V2DF_type_node))) - return V2DI_type_node; - else if (types_compatible_p (TREE_TYPE (input_tree_type), - TREE_TYPE (V4SF_type_node))) - return V4SI_type_node; - else - gcc_unreachable (); - } -} - -/* Helper function to handle the vector merge[hl] built-ins. The - implementation difference between h and l versions for this code are in - the values used when building of the permute vector for high word versus - low word merge. The variance is keyed off the use_high parameter. */ -static void -fold_mergehl_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_high) -{ - tree arg0 = gimple_call_arg (stmt, 0); - tree arg1 = gimple_call_arg (stmt, 1); - tree lhs = gimple_call_lhs (stmt); - tree lhs_type = TREE_TYPE (lhs); - int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); - int midpoint = n_elts / 2; - int offset = 0; - - if (use_high == 1) - offset = midpoint; - - /* The permute_type will match the lhs for integral types. For double and - float types, the permute type needs to map to the V2 or V4 type that - matches size. */ - tree permute_type; - permute_type = map_to_integral_tree_type (lhs_type); - tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1); - - for (int i = 0; i < midpoint; i++) - { - elts.safe_push (build_int_cst (TREE_TYPE (permute_type), - offset + i)); - elts.safe_push (build_int_cst (TREE_TYPE (permute_type), - offset + n_elts + i)); - } - - tree permute = elts.build (); - - gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); -} - -/* Helper function to handle the vector merge[eo] built-ins. */ -static void -fold_mergeeo_helper (gimple_stmt_iterator *gsi, gimple *stmt, int use_odd) -{ - tree arg0 = gimple_call_arg (stmt, 0); - tree arg1 = gimple_call_arg (stmt, 1); - tree lhs = gimple_call_lhs (stmt); - tree lhs_type = TREE_TYPE (lhs); - int n_elts = TYPE_VECTOR_SUBPARTS (lhs_type); - - /* The permute_type will match the lhs for integral types. For double and - float types, the permute type needs to map to the V2 or V4 type that - matches size. */ - tree permute_type; - permute_type = map_to_integral_tree_type (lhs_type); - - tree_vector_builder elts (permute_type, VECTOR_CST_NELTS (arg0), 1); - - /* Build the permute vector. */ - for (int i = 0; i < n_elts / 2; i++) - { - elts.safe_push (build_int_cst (TREE_TYPE (permute_type), - 2*i + use_odd)); - elts.safe_push (build_int_cst (TREE_TYPE (permute_type), - 2*i + use_odd + n_elts)); - } - - tree permute = elts.build (); - - gimple *g = gimple_build_assign (lhs, VEC_PERM_EXPR, arg0, arg1, permute); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); -} - -/* Helper function to sort out which built-ins may be valid without having - a LHS. */ -static bool -rs6000_builtin_valid_without_lhs (enum rs6000_gen_builtins fn_code, - tree fndecl) -{ - if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node) - return true; - - switch (fn_code) - { - case RS6000_BIF_STVX_V16QI: - case RS6000_BIF_STVX_V8HI: - case RS6000_BIF_STVX_V4SI: - case RS6000_BIF_STVX_V4SF: - case RS6000_BIF_STVX_V2DI: - case RS6000_BIF_STVX_V2DF: - case RS6000_BIF_STXVW4X_V16QI: - case RS6000_BIF_STXVW4X_V8HI: - case RS6000_BIF_STXVW4X_V4SF: - case RS6000_BIF_STXVW4X_V4SI: - case RS6000_BIF_STXVD2X_V2DF: - case RS6000_BIF_STXVD2X_V2DI: - return true; - default: - return false; - } -} - -/* Check whether a builtin function is supported in this target - configuration. */ -bool -rs6000_builtin_is_supported (enum rs6000_gen_builtins fncode) -{ - switch (rs6000_builtin_info[(size_t) fncode].enable) - { - case ENB_ALWAYS: - return true; - case ENB_P5: - return TARGET_POPCNTB; - case ENB_P6: - return TARGET_CMPB; - case ENB_P6_64: - return TARGET_CMPB && TARGET_POWERPC64; - case ENB_P7: - return TARGET_POPCNTD; - case ENB_P7_64: - return TARGET_POPCNTD && TARGET_POWERPC64; - case ENB_P8: - return TARGET_DIRECT_MOVE; - case ENB_P8V: - return TARGET_P8_VECTOR; - case ENB_P9: - return TARGET_MODULO; - case ENB_P9_64: - return TARGET_MODULO && TARGET_POWERPC64; - case ENB_P9V: - return TARGET_P9_VECTOR; - case ENB_P10: - return TARGET_POWER10; - case ENB_P10_64: - return TARGET_POWER10 && TARGET_POWERPC64; - case ENB_ALTIVEC: - return TARGET_ALTIVEC; - case ENB_VSX: - return TARGET_VSX; - case ENB_CELL: - return TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL; - case ENB_IEEE128_HW: - return TARGET_FLOAT128_HW; - case ENB_DFP: - return TARGET_DFP; - case ENB_CRYPTO: - return TARGET_CRYPTO; - case ENB_HTM: - return TARGET_HTM; - case ENB_MMA: - return TARGET_MMA; - default: - gcc_unreachable (); - } - gcc_unreachable (); -} - -/* Expand the MMA built-ins early, so that we can convert the pass-by-reference - __vector_quad arguments into pass-by-value arguments, leading to more - efficient code generation. */ -static bool -rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi, - rs6000_gen_builtins fn_code) -{ - gimple *stmt = gsi_stmt (*gsi); - size_t fncode = (size_t) fn_code; - - if (!bif_is_mma (rs6000_builtin_info[fncode])) - return false; - - /* Each call that can be gimple-expanded has an associated built-in - function that it will expand into. If this one doesn't, we have - already expanded it! Exceptions: lxvp and stxvp. */ - if (rs6000_builtin_info[fncode].assoc_bif == RS6000_BIF_NONE - && fncode != RS6000_BIF_LXVP - && fncode != RS6000_BIF_STXVP) - return false; - - bifdata *bd = &rs6000_builtin_info[fncode]; - unsigned nopnds = bd->nargs; - gimple_seq new_seq = NULL; - gimple *new_call; - tree new_decl; - - /* Compatibility built-ins; we used to call these - __builtin_mma_{dis,}assemble_pair, but now we call them - __builtin_vsx_{dis,}assemble_pair. Handle the old versions. */ - if (fncode == RS6000_BIF_ASSEMBLE_PAIR) - fncode = RS6000_BIF_ASSEMBLE_PAIR_V; - else if (fncode == RS6000_BIF_DISASSEMBLE_PAIR) - fncode = RS6000_BIF_DISASSEMBLE_PAIR_V; - - if (fncode == RS6000_BIF_DISASSEMBLE_ACC - || fncode == RS6000_BIF_DISASSEMBLE_PAIR_V) - { - /* This is an MMA disassemble built-in function. */ - push_gimplify_context (true); - unsigned nvec = (fncode == RS6000_BIF_DISASSEMBLE_ACC) ? 4 : 2; - tree dst_ptr = gimple_call_arg (stmt, 0); - tree src_ptr = gimple_call_arg (stmt, 1); - tree src_type = TREE_TYPE (src_ptr); - tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type)); - gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq); - - /* If we are not disassembling an accumulator/pair or our destination is - another accumulator/pair, then just copy the entire thing as is. */ - if ((fncode == RS6000_BIF_DISASSEMBLE_ACC - && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_quad_type_node) - || (fncode == RS6000_BIF_DISASSEMBLE_PAIR_V - && TREE_TYPE (TREE_TYPE (dst_ptr)) == vector_pair_type_node)) - { - tree dst = build_simple_mem_ref (build1 (VIEW_CONVERT_EXPR, - src_type, dst_ptr)); - gimplify_assign (dst, src, &new_seq); - pop_gimplify_context (NULL); - gsi_replace_with_seq (gsi, new_seq, true); - return true; - } - - /* If we're disassembling an accumulator into a different type, we need - to emit a xxmfacc instruction now, since we cannot do it later. */ - if (fncode == RS6000_BIF_DISASSEMBLE_ACC) - { - new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL]; - new_call = gimple_build_call (new_decl, 1, src); - src = create_tmp_reg_or_ssa_name (vector_quad_type_node); - gimple_call_set_lhs (new_call, src); - gimple_seq_add_stmt (&new_seq, new_call); - } - - /* Copy the accumulator/pair vector by vector. */ - new_decl - = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif]; - tree dst_type = build_pointer_type_for_mode (unsigned_V16QI_type_node, - ptr_mode, true); - tree dst_base = build1 (VIEW_CONVERT_EXPR, dst_type, dst_ptr); - for (unsigned i = 0; i < nvec; i++) - { - unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i; - tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base, - build_int_cst (dst_type, index * 16)); - tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node); - new_call = gimple_build_call (new_decl, 2, src, - build_int_cstu (uint16_type_node, i)); - gimple_call_set_lhs (new_call, dstssa); - gimple_seq_add_stmt (&new_seq, new_call); - gimplify_assign (dst, dstssa, &new_seq); - } - pop_gimplify_context (NULL); - gsi_replace_with_seq (gsi, new_seq, true); - return true; - } - - /* TODO: Do some factoring on these two chunks. */ - if (fncode == RS6000_BIF_LXVP) - { - push_gimplify_context (true); - tree offset = gimple_call_arg (stmt, 0); - tree ptr = gimple_call_arg (stmt, 1); - tree lhs = gimple_call_lhs (stmt); - if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) - ptr = build1 (VIEW_CONVERT_EXPR, - build_pointer_type (vector_pair_type_node), ptr); - tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, - TREE_TYPE (ptr), ptr, offset)); - gimplify_assign (lhs, mem, &new_seq); - pop_gimplify_context (NULL); - gsi_replace_with_seq (gsi, new_seq, true); - return true; - } - - if (fncode == RS6000_BIF_STXVP) - { - push_gimplify_context (true); - tree src = gimple_call_arg (stmt, 0); - tree offset = gimple_call_arg (stmt, 1); - tree ptr = gimple_call_arg (stmt, 2); - if (TREE_TYPE (TREE_TYPE (ptr)) != vector_pair_type_node) - ptr = build1 (VIEW_CONVERT_EXPR, - build_pointer_type (vector_pair_type_node), ptr); - tree mem = build_simple_mem_ref (build2 (POINTER_PLUS_EXPR, - TREE_TYPE (ptr), ptr, offset)); - gimplify_assign (mem, src, &new_seq); - pop_gimplify_context (NULL); - gsi_replace_with_seq (gsi, new_seq, true); - return true; - } - - /* Convert this built-in into an internal version that uses pass-by-value - arguments. The internal built-in is found in the assoc_bif field. */ - new_decl = rs6000_builtin_decls[rs6000_builtin_info[fncode].assoc_bif]; - tree lhs, op[MAX_MMA_OPERANDS]; - tree acc = gimple_call_arg (stmt, 0); - push_gimplify_context (true); - - if (bif_is_quad (*bd)) - { - /* This built-in has a pass-by-reference accumulator input, so load it - into a temporary accumulator for use as a pass-by-value input. */ - op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node); - for (unsigned i = 1; i < nopnds; i++) - op[i] = gimple_call_arg (stmt, i); - gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq); - } - else - { - /* This built-in does not use its pass-by-reference accumulator argument - as an input argument, so remove it from the input list. */ - nopnds--; - for (unsigned i = 0; i < nopnds; i++) - op[i] = gimple_call_arg (stmt, i + 1); - } - - switch (nopnds) - { - case 0: - new_call = gimple_build_call (new_decl, 0); - break; - case 1: - new_call = gimple_build_call (new_decl, 1, op[0]); - break; - case 2: - new_call = gimple_build_call (new_decl, 2, op[0], op[1]); - break; - case 3: - new_call = gimple_build_call (new_decl, 3, op[0], op[1], op[2]); - break; - case 4: - new_call = gimple_build_call (new_decl, 4, op[0], op[1], op[2], op[3]); - break; - case 5: - new_call = gimple_build_call (new_decl, 5, op[0], op[1], op[2], op[3], - op[4]); - break; - case 6: - new_call = gimple_build_call (new_decl, 6, op[0], op[1], op[2], op[3], - op[4], op[5]); - break; - case 7: - new_call = gimple_build_call (new_decl, 7, op[0], op[1], op[2], op[3], - op[4], op[5], op[6]); - break; - default: - gcc_unreachable (); - } - - if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V) - lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node); - else - lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node); - gimple_call_set_lhs (new_call, lhs); - gimple_seq_add_stmt (&new_seq, new_call); - gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq); - pop_gimplify_context (NULL); - gsi_replace_with_seq (gsi, new_seq, true); - - return true; -} - -/* Fold a machine-dependent built-in in GIMPLE. (For folding into - a constant, use rs6000_fold_builtin.) */ -bool -rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi) -{ - gimple *stmt = gsi_stmt (*gsi); - tree fndecl = gimple_call_fndecl (stmt); - gcc_checking_assert (fndecl && DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD); - enum rs6000_gen_builtins fn_code - = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); - tree arg0, arg1, lhs, temp; - enum tree_code bcode; - gimple *g; - - size_t uns_fncode = (size_t) fn_code; - enum insn_code icode = rs6000_builtin_info[uns_fncode].icode; - const char *fn_name1 = rs6000_builtin_info[uns_fncode].bifname; - const char *fn_name2 = (icode != CODE_FOR_nothing) - ? get_insn_name ((int) icode) - : "nothing"; - - if (TARGET_DEBUG_BUILTIN) - fprintf (stderr, "rs6000_gimple_fold_builtin %d %s %s\n", - fn_code, fn_name1, fn_name2); - - if (!rs6000_fold_gimple) - return false; - - /* Prevent gimple folding for code that does not have a LHS, unless it is - allowed per the rs6000_builtin_valid_without_lhs helper function. */ - if (!gimple_call_lhs (stmt) - && !rs6000_builtin_valid_without_lhs (fn_code, fndecl)) - return false; - - /* Don't fold invalid builtins, let rs6000_expand_builtin diagnose it. */ - if (!rs6000_builtin_is_supported (fn_code)) - return false; - - if (rs6000_gimple_fold_mma_builtin (gsi, fn_code)) - return true; - - switch (fn_code) - { - /* Flavors of vec_add. We deliberately don't expand - RS6000_BIF_VADDUQM as it gets lowered from V1TImode to - TImode, resulting in much poorer code generation. */ - case RS6000_BIF_VADDUBM: - case RS6000_BIF_VADDUHM: - case RS6000_BIF_VADDUWM: - case RS6000_BIF_VADDUDM: - case RS6000_BIF_VADDFP: - case RS6000_BIF_XVADDDP: - case RS6000_BIF_XVADDSP: - bcode = PLUS_EXPR; - do_binary: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (lhs))) - && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (lhs)))) - { - /* Ensure the binary operation is performed in a type - that wraps if it is integral type. */ - gimple_seq stmts = NULL; - tree type = unsigned_type_for (TREE_TYPE (lhs)); - tree uarg0 = gimple_build (&stmts, VIEW_CONVERT_EXPR, - type, arg0); - tree uarg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, - type, arg1); - tree res = gimple_build (&stmts, gimple_location (stmt), bcode, - type, uarg0, uarg1); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, - build1 (VIEW_CONVERT_EXPR, - TREE_TYPE (lhs), res)); - gsi_replace (gsi, g, true); - return true; - } - g = gimple_build_assign (lhs, bcode, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_sub. We deliberately don't expand - RS6000_BIF_VSUBUQM. */ - case RS6000_BIF_VSUBUBM: - case RS6000_BIF_VSUBUHM: - case RS6000_BIF_VSUBUWM: - case RS6000_BIF_VSUBUDM: - case RS6000_BIF_VSUBFP: - case RS6000_BIF_XVSUBDP: - case RS6000_BIF_XVSUBSP: - bcode = MINUS_EXPR; - goto do_binary; - case RS6000_BIF_XVMULSP: - case RS6000_BIF_XVMULDP: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, MULT_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Even element flavors of vec_mul (signed). */ - case RS6000_BIF_VMULESB: - case RS6000_BIF_VMULESH: - case RS6000_BIF_VMULESW: - /* Even element flavors of vec_mul (unsigned). */ - case RS6000_BIF_VMULEUB: - case RS6000_BIF_VMULEUH: - case RS6000_BIF_VMULEUW: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, VEC_WIDEN_MULT_EVEN_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Odd element flavors of vec_mul (signed). */ - case RS6000_BIF_VMULOSB: - case RS6000_BIF_VMULOSH: - case RS6000_BIF_VMULOSW: - /* Odd element flavors of vec_mul (unsigned). */ - case RS6000_BIF_VMULOUB: - case RS6000_BIF_VMULOUH: - case RS6000_BIF_VMULOUW: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, VEC_WIDEN_MULT_ODD_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_div (Integer). */ - case RS6000_BIF_DIV_V2DI: - case RS6000_BIF_UDIV_V2DI: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, TRUNC_DIV_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_div (Float). */ - case RS6000_BIF_XVDIVSP: - case RS6000_BIF_XVDIVDP: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, RDIV_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_and. */ - case RS6000_BIF_VAND_V16QI_UNS: - case RS6000_BIF_VAND_V16QI: - case RS6000_BIF_VAND_V8HI_UNS: - case RS6000_BIF_VAND_V8HI: - case RS6000_BIF_VAND_V4SI_UNS: - case RS6000_BIF_VAND_V4SI: - case RS6000_BIF_VAND_V2DI_UNS: - case RS6000_BIF_VAND_V2DI: - case RS6000_BIF_VAND_V4SF: - case RS6000_BIF_VAND_V2DF: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_andc. */ - case RS6000_BIF_VANDC_V16QI_UNS: - case RS6000_BIF_VANDC_V16QI: - case RS6000_BIF_VANDC_V8HI_UNS: - case RS6000_BIF_VANDC_V8HI: - case RS6000_BIF_VANDC_V4SI_UNS: - case RS6000_BIF_VANDC_V4SI: - case RS6000_BIF_VANDC_V2DI_UNS: - case RS6000_BIF_VANDC_V2DI: - case RS6000_BIF_VANDC_V4SF: - case RS6000_BIF_VANDC_V2DF: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); - g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - g = gimple_build_assign (lhs, BIT_AND_EXPR, arg0, temp); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_nand. */ - case RS6000_BIF_NAND_V16QI_UNS: - case RS6000_BIF_NAND_V16QI: - case RS6000_BIF_NAND_V8HI_UNS: - case RS6000_BIF_NAND_V8HI: - case RS6000_BIF_NAND_V4SI_UNS: - case RS6000_BIF_NAND_V4SI: - case RS6000_BIF_NAND_V2DI_UNS: - case RS6000_BIF_NAND_V2DI: - case RS6000_BIF_NAND_V4SF: - case RS6000_BIF_NAND_V2DF: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); - g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_or. */ - case RS6000_BIF_VOR_V16QI_UNS: - case RS6000_BIF_VOR_V16QI: - case RS6000_BIF_VOR_V8HI_UNS: - case RS6000_BIF_VOR_V8HI: - case RS6000_BIF_VOR_V4SI_UNS: - case RS6000_BIF_VOR_V4SI: - case RS6000_BIF_VOR_V2DI_UNS: - case RS6000_BIF_VOR_V2DI: - case RS6000_BIF_VOR_V4SF: - case RS6000_BIF_VOR_V2DF: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* flavors of vec_orc. */ - case RS6000_BIF_ORC_V16QI_UNS: - case RS6000_BIF_ORC_V16QI: - case RS6000_BIF_ORC_V8HI_UNS: - case RS6000_BIF_ORC_V8HI: - case RS6000_BIF_ORC_V4SI_UNS: - case RS6000_BIF_ORC_V4SI: - case RS6000_BIF_ORC_V2DI_UNS: - case RS6000_BIF_ORC_V2DI: - case RS6000_BIF_ORC_V4SF: - case RS6000_BIF_ORC_V2DF: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); - g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - g = gimple_build_assign (lhs, BIT_IOR_EXPR, arg0, temp); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_xor. */ - case RS6000_BIF_VXOR_V16QI_UNS: - case RS6000_BIF_VXOR_V16QI: - case RS6000_BIF_VXOR_V8HI_UNS: - case RS6000_BIF_VXOR_V8HI: - case RS6000_BIF_VXOR_V4SI_UNS: - case RS6000_BIF_VXOR_V4SI: - case RS6000_BIF_VXOR_V2DI_UNS: - case RS6000_BIF_VXOR_V2DI: - case RS6000_BIF_VXOR_V4SF: - case RS6000_BIF_VXOR_V2DF: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, BIT_XOR_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_nor. */ - case RS6000_BIF_VNOR_V16QI_UNS: - case RS6000_BIF_VNOR_V16QI: - case RS6000_BIF_VNOR_V8HI_UNS: - case RS6000_BIF_VNOR_V8HI: - case RS6000_BIF_VNOR_V4SI_UNS: - case RS6000_BIF_VNOR_V4SI: - case RS6000_BIF_VNOR_V2DI_UNS: - case RS6000_BIF_VNOR_V2DI: - case RS6000_BIF_VNOR_V4SF: - case RS6000_BIF_VNOR_V2DF: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); - g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* flavors of vec_abs. */ - case RS6000_BIF_ABS_V16QI: - case RS6000_BIF_ABS_V8HI: - case RS6000_BIF_ABS_V4SI: - case RS6000_BIF_ABS_V4SF: - case RS6000_BIF_ABS_V2DI: - case RS6000_BIF_XVABSDP: - case RS6000_BIF_XVABSSP: - arg0 = gimple_call_arg (stmt, 0); - if (INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (arg0))) - && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (TREE_TYPE (arg0)))) - return false; - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, ABS_EXPR, arg0); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* flavors of vec_min. */ - case RS6000_BIF_XVMINDP: - case RS6000_BIF_XVMINSP: - case RS6000_BIF_VMINFP: - { - lhs = gimple_call_lhs (stmt); - tree type = TREE_TYPE (lhs); - if (HONOR_NANS (type)) - return false; - gcc_fallthrough (); - } - case RS6000_BIF_VMINSD: - case RS6000_BIF_VMINUD: - case RS6000_BIF_VMINSB: - case RS6000_BIF_VMINSH: - case RS6000_BIF_VMINSW: - case RS6000_BIF_VMINUB: - case RS6000_BIF_VMINUH: - case RS6000_BIF_VMINUW: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, MIN_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* flavors of vec_max. */ - case RS6000_BIF_XVMAXDP: - case RS6000_BIF_XVMAXSP: - case RS6000_BIF_VMAXFP: - { - lhs = gimple_call_lhs (stmt); - tree type = TREE_TYPE (lhs); - if (HONOR_NANS (type)) - return false; - gcc_fallthrough (); - } - case RS6000_BIF_VMAXSD: - case RS6000_BIF_VMAXUD: - case RS6000_BIF_VMAXSB: - case RS6000_BIF_VMAXSH: - case RS6000_BIF_VMAXSW: - case RS6000_BIF_VMAXUB: - case RS6000_BIF_VMAXUH: - case RS6000_BIF_VMAXUW: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, MAX_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_eqv. */ - case RS6000_BIF_EQV_V16QI: - case RS6000_BIF_EQV_V8HI: - case RS6000_BIF_EQV_V4SI: - case RS6000_BIF_EQV_V4SF: - case RS6000_BIF_EQV_V2DF: - case RS6000_BIF_EQV_V2DI: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1)); - g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - g = gimple_build_assign (lhs, BIT_NOT_EXPR, temp); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vec_rotate_left. */ - case RS6000_BIF_VRLB: - case RS6000_BIF_VRLH: - case RS6000_BIF_VRLW: - case RS6000_BIF_VRLD: - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - g = gimple_build_assign (lhs, LROTATE_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - /* Flavors of vector shift right algebraic. - vec_sra{b,h,w} -> vsra{b,h,w}. */ - case RS6000_BIF_VSRAB: - case RS6000_BIF_VSRAH: - case RS6000_BIF_VSRAW: - case RS6000_BIF_VSRAD: - { - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - tree arg1_type = TREE_TYPE (arg1); - tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); - tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); - location_t loc = gimple_location (stmt); - /* Force arg1 into the range valid matching the arg0 type. */ - /* Build a vector consisting of the max valid bit-size values. */ - int n_elts = VECTOR_CST_NELTS (arg1); - tree element_size = build_int_cst (unsigned_element_type, - 128 / n_elts); - tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); - for (int i = 0; i < n_elts; i++) - elts.safe_push (element_size); - tree modulo_tree = elts.build (); - /* Modulo the provided shift value against that vector. */ - gimple_seq stmts = NULL; - tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, - unsigned_arg1_type, arg1); - tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, - unsigned_arg1_type, unsigned_arg1, - modulo_tree); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - /* And finally, do the shift. */ - g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1); - gimple_set_location (g, loc); - gsi_replace (gsi, g, true); - return true; - } - /* Flavors of vector shift left. - builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}. */ - case RS6000_BIF_VSLB: - case RS6000_BIF_VSLH: - case RS6000_BIF_VSLW: - case RS6000_BIF_VSLD: - { - location_t loc; - gimple_seq stmts = NULL; - arg0 = gimple_call_arg (stmt, 0); - tree arg0_type = TREE_TYPE (arg0); - if (INTEGRAL_TYPE_P (TREE_TYPE (arg0_type)) - && !TYPE_OVERFLOW_WRAPS (TREE_TYPE (arg0_type))) - return false; - arg1 = gimple_call_arg (stmt, 1); - tree arg1_type = TREE_TYPE (arg1); - tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); - tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); - loc = gimple_location (stmt); - lhs = gimple_call_lhs (stmt); - /* Force arg1 into the range valid matching the arg0 type. */ - /* Build a vector consisting of the max valid bit-size values. */ - int n_elts = VECTOR_CST_NELTS (arg1); - int tree_size_in_bits = TREE_INT_CST_LOW (size_in_bytes (arg1_type)) - * BITS_PER_UNIT; - tree element_size = build_int_cst (unsigned_element_type, - tree_size_in_bits / n_elts); - tree_vector_builder elts (unsigned_type_for (arg1_type), n_elts, 1); - for (int i = 0; i < n_elts; i++) - elts.safe_push (element_size); - tree modulo_tree = elts.build (); - /* Modulo the provided shift value against that vector. */ - tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, - unsigned_arg1_type, arg1); - tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, - unsigned_arg1_type, unsigned_arg1, - modulo_tree); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - /* And finally, do the shift. */ - g = gimple_build_assign (lhs, LSHIFT_EXPR, arg0, new_arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - } - /* Flavors of vector shift right. */ - case RS6000_BIF_VSRB: - case RS6000_BIF_VSRH: - case RS6000_BIF_VSRW: - case RS6000_BIF_VSRD: - { - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - tree arg1_type = TREE_TYPE (arg1); - tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1)); - tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type)); - location_t loc = gimple_location (stmt); - gimple_seq stmts = NULL; - /* Convert arg0 to unsigned. */ - tree arg0_unsigned - = gimple_build (&stmts, VIEW_CONVERT_EXPR, - unsigned_type_for (TREE_TYPE (arg0)), arg0); - /* Force arg1 into the range valid matching the arg0 type. */ - /* Build a vector consisting of the max valid bit-size values. */ - int n_elts = VECTOR_CST_NELTS (arg1); - tree element_size = build_int_cst (unsigned_element_type, - 128 / n_elts); - tree_vector_builder elts (unsigned_arg1_type, n_elts, 1); - for (int i = 0; i < n_elts; i++) - elts.safe_push (element_size); - tree modulo_tree = elts.build (); - /* Modulo the provided shift value against that vector. */ - tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR, - unsigned_arg1_type, arg1); - tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR, - unsigned_arg1_type, unsigned_arg1, - modulo_tree); - /* Do the shift. */ - tree res - = gimple_build (&stmts, RSHIFT_EXPR, - TREE_TYPE (arg0_unsigned), arg0_unsigned, new_arg1); - /* Convert result back to the lhs type. */ - res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - replace_call_with_value (gsi, res); - return true; - } - /* Vector loads. */ - case RS6000_BIF_LVX_V16QI: - case RS6000_BIF_LVX_V8HI: - case RS6000_BIF_LVX_V4SI: - case RS6000_BIF_LVX_V4SF: - case RS6000_BIF_LVX_V2DI: - case RS6000_BIF_LVX_V2DF: - case RS6000_BIF_LVX_V1TI: - { - arg0 = gimple_call_arg (stmt, 0); // offset - arg1 = gimple_call_arg (stmt, 1); // address - lhs = gimple_call_lhs (stmt); - location_t loc = gimple_location (stmt); - /* Since arg1 may be cast to a different type, just use ptr_type_node - here instead of trying to enforce TBAA on pointer types. */ - tree arg1_type = ptr_type_node; - tree lhs_type = TREE_TYPE (lhs); - /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create - the tree using the value from arg0. The resulting type will match - the type of arg1. */ - gimple_seq stmts = NULL; - tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); - tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, - arg1_type, arg1, temp_offset); - /* Mask off any lower bits from the address. */ - tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, - arg1_type, temp_addr, - build_int_cst (arg1_type, -16)); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - if (!is_gimple_mem_ref_addr (aligned_addr)) - { - tree t = make_ssa_name (TREE_TYPE (aligned_addr)); - gimple *g = gimple_build_assign (t, aligned_addr); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - aligned_addr = t; - } - /* Use the build2 helper to set up the mem_ref. The MEM_REF could also - take an offset, but since we've already incorporated the offset - above, here we just pass in a zero. */ - gimple *g - = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr, - build_int_cst (arg1_type, 0))); - gimple_set_location (g, loc); - gsi_replace (gsi, g, true); - return true; - } - /* Vector stores. */ - case RS6000_BIF_STVX_V16QI: - case RS6000_BIF_STVX_V8HI: - case RS6000_BIF_STVX_V4SI: - case RS6000_BIF_STVX_V4SF: - case RS6000_BIF_STVX_V2DI: - case RS6000_BIF_STVX_V2DF: - { - arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ - arg1 = gimple_call_arg (stmt, 1); /* Offset. */ - tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ - location_t loc = gimple_location (stmt); - tree arg0_type = TREE_TYPE (arg0); - /* Use ptr_type_node (no TBAA) for the arg2_type. - FIXME: (Richard) "A proper fix would be to transition this type as - seen from the frontend to GIMPLE, for example in a similar way we - do for MEM_REFs by piggy-backing that on an extra argument, a - constant zero pointer of the alias pointer type to use (which would - also serve as a type indicator of the store itself). I'd use a - target specific internal function for this (not sure if we can have - those target specific, but I guess if it's folded away then that's - fine) and get away with the overload set." */ - tree arg2_type = ptr_type_node; - /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create - the tree using the value from arg0. The resulting type will match - the type of arg2. */ - gimple_seq stmts = NULL; - tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); - tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, - arg2_type, arg2, temp_offset); - /* Mask off any lower bits from the address. */ - tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR, - arg2_type, temp_addr, - build_int_cst (arg2_type, -16)); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - if (!is_gimple_mem_ref_addr (aligned_addr)) - { - tree t = make_ssa_name (TREE_TYPE (aligned_addr)); - gimple *g = gimple_build_assign (t, aligned_addr); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - aligned_addr = t; - } - /* The desired gimple result should be similar to: - MEM[(__vector floatD.1407 *)_1] = vf1D.2697; */ - gimple *g - = gimple_build_assign (build2 (MEM_REF, arg0_type, aligned_addr, - build_int_cst (arg2_type, 0)), arg0); - gimple_set_location (g, loc); - gsi_replace (gsi, g, true); - return true; - } - - /* unaligned Vector loads. */ - case RS6000_BIF_LXVW4X_V16QI: - case RS6000_BIF_LXVW4X_V8HI: - case RS6000_BIF_LXVW4X_V4SF: - case RS6000_BIF_LXVW4X_V4SI: - case RS6000_BIF_LXVD2X_V2DF: - case RS6000_BIF_LXVD2X_V2DI: - { - arg0 = gimple_call_arg (stmt, 0); // offset - arg1 = gimple_call_arg (stmt, 1); // address - lhs = gimple_call_lhs (stmt); - location_t loc = gimple_location (stmt); - /* Since arg1 may be cast to a different type, just use ptr_type_node - here instead of trying to enforce TBAA on pointer types. */ - tree arg1_type = ptr_type_node; - tree lhs_type = TREE_TYPE (lhs); - /* In GIMPLE the type of the MEM_REF specifies the alignment. The - required alignment (power) is 4 bytes regardless of data type. */ - tree align_ltype = build_aligned_type (lhs_type, 4); - /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create - the tree using the value from arg0. The resulting type will match - the type of arg1. */ - gimple_seq stmts = NULL; - tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0); - tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, - arg1_type, arg1, temp_offset); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - if (!is_gimple_mem_ref_addr (temp_addr)) - { - tree t = make_ssa_name (TREE_TYPE (temp_addr)); - gimple *g = gimple_build_assign (t, temp_addr); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - temp_addr = t; - } - /* Use the build2 helper to set up the mem_ref. The MEM_REF could also - take an offset, but since we've already incorporated the offset - above, here we just pass in a zero. */ - gimple *g; - g = gimple_build_assign (lhs, build2 (MEM_REF, align_ltype, temp_addr, - build_int_cst (arg1_type, 0))); - gimple_set_location (g, loc); - gsi_replace (gsi, g, true); - return true; - } - - /* unaligned Vector stores. */ - case RS6000_BIF_STXVW4X_V16QI: - case RS6000_BIF_STXVW4X_V8HI: - case RS6000_BIF_STXVW4X_V4SF: - case RS6000_BIF_STXVW4X_V4SI: - case RS6000_BIF_STXVD2X_V2DF: - case RS6000_BIF_STXVD2X_V2DI: - { - arg0 = gimple_call_arg (stmt, 0); /* Value to be stored. */ - arg1 = gimple_call_arg (stmt, 1); /* Offset. */ - tree arg2 = gimple_call_arg (stmt, 2); /* Store-to address. */ - location_t loc = gimple_location (stmt); - tree arg0_type = TREE_TYPE (arg0); - /* Use ptr_type_node (no TBAA) for the arg2_type. */ - tree arg2_type = ptr_type_node; - /* In GIMPLE the type of the MEM_REF specifies the alignment. The - required alignment (power) is 4 bytes regardless of data type. */ - tree align_stype = build_aligned_type (arg0_type, 4); - /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'. Create - the tree using the value from arg1. */ - gimple_seq stmts = NULL; - tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg1); - tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR, - arg2_type, arg2, temp_offset); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - if (!is_gimple_mem_ref_addr (temp_addr)) - { - tree t = make_ssa_name (TREE_TYPE (temp_addr)); - gimple *g = gimple_build_assign (t, temp_addr); - gsi_insert_before (gsi, g, GSI_SAME_STMT); - temp_addr = t; - } - gimple *g; - g = gimple_build_assign (build2 (MEM_REF, align_stype, temp_addr, - build_int_cst (arg2_type, 0)), arg0); - gimple_set_location (g, loc); - gsi_replace (gsi, g, true); - return true; - } - - /* Vector Fused multiply-add (fma). */ - case RS6000_BIF_VMADDFP: - case RS6000_BIF_XVMADDDP: - case RS6000_BIF_XVMADDSP: - case RS6000_BIF_VMLADDUHM: - { - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - tree arg2 = gimple_call_arg (stmt, 2); - lhs = gimple_call_lhs (stmt); - gcall *g = gimple_build_call_internal (IFN_FMA, 3, arg0, arg1, arg2); - gimple_call_set_lhs (g, lhs); - gimple_call_set_nothrow (g, true); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - } - - /* Vector compares; EQ, NE, GE, GT, LE. */ - case RS6000_BIF_VCMPEQUB: - case RS6000_BIF_VCMPEQUH: - case RS6000_BIF_VCMPEQUW: - case RS6000_BIF_VCMPEQUD: - /* We deliberately omit RS6000_BIF_VCMPEQUT for now, because gimple - folding produces worse code for 128-bit compares. */ - fold_compare_helper (gsi, EQ_EXPR, stmt); - return true; - - case RS6000_BIF_VCMPNEB: - case RS6000_BIF_VCMPNEH: - case RS6000_BIF_VCMPNEW: - /* We deliberately omit RS6000_BIF_VCMPNET for now, because gimple - folding produces worse code for 128-bit compares. */ - fold_compare_helper (gsi, NE_EXPR, stmt); - return true; - - case RS6000_BIF_CMPGE_16QI: - case RS6000_BIF_CMPGE_U16QI: - case RS6000_BIF_CMPGE_8HI: - case RS6000_BIF_CMPGE_U8HI: - case RS6000_BIF_CMPGE_4SI: - case RS6000_BIF_CMPGE_U4SI: - case RS6000_BIF_CMPGE_2DI: - case RS6000_BIF_CMPGE_U2DI: - /* We deliberately omit RS6000_BIF_CMPGE_1TI and RS6000_BIF_CMPGE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ - fold_compare_helper (gsi, GE_EXPR, stmt); - return true; - - case RS6000_BIF_VCMPGTSB: - case RS6000_BIF_VCMPGTUB: - case RS6000_BIF_VCMPGTSH: - case RS6000_BIF_VCMPGTUH: - case RS6000_BIF_VCMPGTSW: - case RS6000_BIF_VCMPGTUW: - case RS6000_BIF_VCMPGTUD: - case RS6000_BIF_VCMPGTSD: - /* We deliberately omit RS6000_BIF_VCMPGTUT and RS6000_BIF_VCMPGTST - for now, because gimple folding produces worse code for 128-bit - compares. */ - fold_compare_helper (gsi, GT_EXPR, stmt); - return true; - - case RS6000_BIF_CMPLE_16QI: - case RS6000_BIF_CMPLE_U16QI: - case RS6000_BIF_CMPLE_8HI: - case RS6000_BIF_CMPLE_U8HI: - case RS6000_BIF_CMPLE_4SI: - case RS6000_BIF_CMPLE_U4SI: - case RS6000_BIF_CMPLE_2DI: - case RS6000_BIF_CMPLE_U2DI: - /* We deliberately omit RS6000_BIF_CMPLE_1TI and RS6000_BIF_CMPLE_U1TI - for now, because gimple folding produces worse code for 128-bit - compares. */ - fold_compare_helper (gsi, LE_EXPR, stmt); - return true; - - /* flavors of vec_splat_[us]{8,16,32}. */ - case RS6000_BIF_VSPLTISB: - case RS6000_BIF_VSPLTISH: - case RS6000_BIF_VSPLTISW: - { - arg0 = gimple_call_arg (stmt, 0); - lhs = gimple_call_lhs (stmt); - - /* Only fold the vec_splat_*() if the lower bits of arg 0 is a - 5-bit signed constant in range -16 to +15. */ - if (TREE_CODE (arg0) != INTEGER_CST - || !IN_RANGE (TREE_INT_CST_LOW (arg0), -16, 15)) - return false; - gimple_seq stmts = NULL; - location_t loc = gimple_location (stmt); - tree splat_value = gimple_convert (&stmts, loc, - TREE_TYPE (TREE_TYPE (lhs)), arg0); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - tree splat_tree = build_vector_from_val (TREE_TYPE (lhs), splat_value); - g = gimple_build_assign (lhs, splat_tree); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - } - - /* Flavors of vec_splat. */ - /* a = vec_splat (b, 0x3) becomes a = { b[3],b[3],b[3],...}; */ - case RS6000_BIF_VSPLTB: - case RS6000_BIF_VSPLTH: - case RS6000_BIF_VSPLTW: - case RS6000_BIF_XXSPLTD_V2DI: - case RS6000_BIF_XXSPLTD_V2DF: - { - arg0 = gimple_call_arg (stmt, 0); /* input vector. */ - arg1 = gimple_call_arg (stmt, 1); /* index into arg0. */ - /* Only fold the vec_splat_*() if arg1 is both a constant value and - is a valid index into the arg0 vector. */ - unsigned int n_elts = VECTOR_CST_NELTS (arg0); - if (TREE_CODE (arg1) != INTEGER_CST - || TREE_INT_CST_LOW (arg1) > (n_elts -1)) - return false; - lhs = gimple_call_lhs (stmt); - tree lhs_type = TREE_TYPE (lhs); - tree arg0_type = TREE_TYPE (arg0); - tree splat; - if (TREE_CODE (arg0) == VECTOR_CST) - splat = VECTOR_CST_ELT (arg0, TREE_INT_CST_LOW (arg1)); - else - { - /* Determine (in bits) the length and start location of the - splat value for a call to the tree_vec_extract helper. */ - int splat_elem_size = TREE_INT_CST_LOW (size_in_bytes (arg0_type)) - * BITS_PER_UNIT / n_elts; - int splat_start_bit = TREE_INT_CST_LOW (arg1) * splat_elem_size; - tree len = build_int_cst (bitsizetype, splat_elem_size); - tree start = build_int_cst (bitsizetype, splat_start_bit); - splat = tree_vec_extract (gsi, TREE_TYPE (lhs_type), arg0, - len, start); - } - /* And finally, build the new vector. */ - tree splat_tree = build_vector_from_val (lhs_type, splat); - g = gimple_build_assign (lhs, splat_tree); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - } - - /* vec_mergel (integrals). */ - case RS6000_BIF_VMRGLH: - case RS6000_BIF_VMRGLW: - case RS6000_BIF_XXMRGLW_4SI: - case RS6000_BIF_VMRGLB: - case RS6000_BIF_VEC_MERGEL_V2DI: - case RS6000_BIF_XXMRGLW_4SF: - case RS6000_BIF_VEC_MERGEL_V2DF: - fold_mergehl_helper (gsi, stmt, 1); - return true; - /* vec_mergeh (integrals). */ - case RS6000_BIF_VMRGHH: - case RS6000_BIF_VMRGHW: - case RS6000_BIF_XXMRGHW_4SI: - case RS6000_BIF_VMRGHB: - case RS6000_BIF_VEC_MERGEH_V2DI: - case RS6000_BIF_XXMRGHW_4SF: - case RS6000_BIF_VEC_MERGEH_V2DF: - fold_mergehl_helper (gsi, stmt, 0); - return true; - - /* Flavors of vec_mergee. */ - case RS6000_BIF_VMRGEW_V4SI: - case RS6000_BIF_VMRGEW_V2DI: - case RS6000_BIF_VMRGEW_V4SF: - case RS6000_BIF_VMRGEW_V2DF: - fold_mergeeo_helper (gsi, stmt, 0); - return true; - /* Flavors of vec_mergeo. */ - case RS6000_BIF_VMRGOW_V4SI: - case RS6000_BIF_VMRGOW_V2DI: - case RS6000_BIF_VMRGOW_V4SF: - case RS6000_BIF_VMRGOW_V2DF: - fold_mergeeo_helper (gsi, stmt, 1); - return true; - - /* d = vec_pack (a, b) */ - case RS6000_BIF_VPKUDUM: - case RS6000_BIF_VPKUHUM: - case RS6000_BIF_VPKUWUM: - { - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - lhs = gimple_call_lhs (stmt); - gimple *g = gimple_build_assign (lhs, VEC_PACK_TRUNC_EXPR, arg0, arg1); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - } - - /* d = vec_unpackh (a) */ - /* Note that the UNPACK_{HI,LO}_EXPR used in the gimple_build_assign call - in this code is sensitive to endian-ness, and needs to be inverted to - handle both LE and BE targets. */ - case RS6000_BIF_VUPKHSB: - case RS6000_BIF_VUPKHSH: - case RS6000_BIF_VUPKHSW: - { - arg0 = gimple_call_arg (stmt, 0); - lhs = gimple_call_lhs (stmt); - if (BYTES_BIG_ENDIAN) - g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0); - else - g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - } - /* d = vec_unpackl (a) */ - case RS6000_BIF_VUPKLSB: - case RS6000_BIF_VUPKLSH: - case RS6000_BIF_VUPKLSW: - { - arg0 = gimple_call_arg (stmt, 0); - lhs = gimple_call_lhs (stmt); - if (BYTES_BIG_ENDIAN) - g = gimple_build_assign (lhs, VEC_UNPACK_LO_EXPR, arg0); - else - g = gimple_build_assign (lhs, VEC_UNPACK_HI_EXPR, arg0); - gimple_set_location (g, gimple_location (stmt)); - gsi_replace (gsi, g, true); - return true; - } - /* There is no gimple type corresponding with pixel, so just return. */ - case RS6000_BIF_VUPKHPX: - case RS6000_BIF_VUPKLPX: - return false; - - /* vec_perm. */ - case RS6000_BIF_VPERM_16QI: - case RS6000_BIF_VPERM_8HI: - case RS6000_BIF_VPERM_4SI: - case RS6000_BIF_VPERM_2DI: - case RS6000_BIF_VPERM_4SF: - case RS6000_BIF_VPERM_2DF: - case RS6000_BIF_VPERM_16QI_UNS: - case RS6000_BIF_VPERM_8HI_UNS: - case RS6000_BIF_VPERM_4SI_UNS: - case RS6000_BIF_VPERM_2DI_UNS: - { - arg0 = gimple_call_arg (stmt, 0); - arg1 = gimple_call_arg (stmt, 1); - tree permute = gimple_call_arg (stmt, 2); - lhs = gimple_call_lhs (stmt); - location_t loc = gimple_location (stmt); - gimple_seq stmts = NULL; - // convert arg0 and arg1 to match the type of the permute - // for the VEC_PERM_EXPR operation. - tree permute_type = (TREE_TYPE (permute)); - tree arg0_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, - permute_type, arg0); - tree arg1_ptype = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, - permute_type, arg1); - tree lhs_ptype = gimple_build (&stmts, loc, VEC_PERM_EXPR, - permute_type, arg0_ptype, arg1_ptype, - permute); - // Convert the result back to the desired lhs type upon completion. - tree temp = gimple_build (&stmts, loc, VIEW_CONVERT_EXPR, - TREE_TYPE (lhs), lhs_ptype); - gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT); - g = gimple_build_assign (lhs, temp); - gimple_set_location (g, loc); - gsi_replace (gsi, g, true); - return true; - } - - default: - if (TARGET_DEBUG_BUILTIN) - fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n", - fn_code, fn_name1, fn_name2); - break; - } - - return false; -} - -/* Expand ALTIVEC_BUILTIN_MASK_FOR_LOAD. */ -rtx -rs6000_expand_ldst_mask (rtx target, tree arg0) -{ - int icode2 = BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct - : (int) CODE_FOR_altivec_lvsl_direct; - machine_mode tmode = insn_data[icode2].operand[0].mode; - machine_mode mode = insn_data[icode2].operand[1].mode; - - gcc_assert (TARGET_ALTIVEC); - - gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg0))); - rtx op = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL); - rtx addr = memory_address (mode, op); - /* We need to negate the address. */ - op = gen_reg_rtx (GET_MODE (addr)); - emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr))); - op = gen_rtx_MEM (mode, op); - - if (target == 0 - || GET_MODE (target) != tmode - || !insn_data[icode2].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - rtx pat = GEN_FCN (icode2) (target, op); - if (!pat) - return 0; - emit_insn (pat); - - return target; -} - -/* Expand the CPU builtin in FCODE and store the result in TARGET. */ -static rtx -cpu_expand_builtin (enum rs6000_gen_builtins fcode, - tree exp ATTRIBUTE_UNUSED, rtx target) -{ - /* __builtin_cpu_init () is a nop, so expand to nothing. */ - if (fcode == RS6000_BIF_CPU_INIT) - return const0_rtx; - - if (target == 0 || GET_MODE (target) != SImode) - target = gen_reg_rtx (SImode); - - /* TODO: Factor the #ifdef'd code into a separate function. */ -#ifdef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB - tree arg = TREE_OPERAND (CALL_EXPR_ARG (exp, 0), 0); - /* Target clones creates an ARRAY_REF instead of STRING_CST, convert it back - to a STRING_CST. */ - if (TREE_CODE (arg) == ARRAY_REF - && TREE_CODE (TREE_OPERAND (arg, 0)) == STRING_CST - && TREE_CODE (TREE_OPERAND (arg, 1)) == INTEGER_CST - && compare_tree_int (TREE_OPERAND (arg, 1), 0) == 0) - arg = TREE_OPERAND (arg, 0); - - if (TREE_CODE (arg) != STRING_CST) - { - error ("builtin %qs only accepts a string argument", - rs6000_builtin_info[(size_t) fcode].bifname); - return const0_rtx; - } - - if (fcode == RS6000_BIF_CPU_IS) - { - const char *cpu = TREE_STRING_POINTER (arg); - rtx cpuid = NULL_RTX; - for (size_t i = 0; i < ARRAY_SIZE (cpu_is_info); i++) - if (strcmp (cpu, cpu_is_info[i].cpu) == 0) - { - /* The CPUID value in the TCB is offset by _DL_FIRST_PLATFORM. */ - cpuid = GEN_INT (cpu_is_info[i].cpuid + _DL_FIRST_PLATFORM); - break; - } - if (cpuid == NULL_RTX) - { - /* Invalid CPU argument. */ - error ("cpu %qs is an invalid argument to builtin %qs", - cpu, rs6000_builtin_info[(size_t) fcode].bifname); - return const0_rtx; - } - - rtx platform = gen_reg_rtx (SImode); - rtx address = gen_rtx_PLUS (Pmode, - gen_rtx_REG (Pmode, TLS_REGNUM), - GEN_INT (TCB_PLATFORM_OFFSET)); - rtx tcbmem = gen_const_mem (SImode, address); - emit_move_insn (platform, tcbmem); - emit_insn (gen_eqsi3 (target, platform, cpuid)); - } - else if (fcode == RS6000_BIF_CPU_SUPPORTS) - { - const char *hwcap = TREE_STRING_POINTER (arg); - rtx mask = NULL_RTX; - int hwcap_offset; - for (size_t i = 0; i < ARRAY_SIZE (cpu_supports_info); i++) - if (strcmp (hwcap, cpu_supports_info[i].hwcap) == 0) - { - mask = GEN_INT (cpu_supports_info[i].mask); - hwcap_offset = TCB_HWCAP_OFFSET (cpu_supports_info[i].id); - break; - } - if (mask == NULL_RTX) - { - /* Invalid HWCAP argument. */ - error ("%s %qs is an invalid argument to builtin %qs", - "hwcap", hwcap, - rs6000_builtin_info[(size_t) fcode].bifname); - return const0_rtx; - } - - rtx tcb_hwcap = gen_reg_rtx (SImode); - rtx address = gen_rtx_PLUS (Pmode, - gen_rtx_REG (Pmode, TLS_REGNUM), - GEN_INT (hwcap_offset)); - rtx tcbmem = gen_const_mem (SImode, address); - emit_move_insn (tcb_hwcap, tcbmem); - rtx scratch1 = gen_reg_rtx (SImode); - emit_insn (gen_rtx_SET (scratch1, - gen_rtx_AND (SImode, tcb_hwcap, mask))); - rtx scratch2 = gen_reg_rtx (SImode); - emit_insn (gen_eqsi3 (scratch2, scratch1, const0_rtx)); - emit_insn (gen_rtx_SET (target, - gen_rtx_XOR (SImode, scratch2, const1_rtx))); - } - else - gcc_unreachable (); - - /* Record that we have expanded a CPU builtin, so that we can later - emit a reference to the special symbol exported by LIBC to ensure we - do not link against an old LIBC that doesn't support this feature. */ - cpu_builtin_p = true; - -#else - warning (0, "builtin %qs needs GLIBC (2.23 and newer) that exports hardware " - "capability bits", rs6000_builtin_info[(size_t) fcode].bifname); - - /* For old LIBCs, always return FALSE. */ - emit_move_insn (target, GEN_INT (0)); -#endif /* TARGET_LIBC_PROVIDES_HWCAP_IN_TCB */ - - return target; -} - -/* For the element-reversing load/store built-ins, produce the correct - insn_code depending on the target endianness. */ -static insn_code -elemrev_icode (rs6000_gen_builtins fcode) -{ - switch (fcode) - { - case RS6000_BIF_ST_ELEMREV_V1TI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v1ti - : CODE_FOR_vsx_st_elemrev_v1ti; - - case RS6000_BIF_ST_ELEMREV_V2DF: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2df - : CODE_FOR_vsx_st_elemrev_v2df; - - case RS6000_BIF_ST_ELEMREV_V2DI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v2di - : CODE_FOR_vsx_st_elemrev_v2di; - - case RS6000_BIF_ST_ELEMREV_V4SF: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4sf - : CODE_FOR_vsx_st_elemrev_v4sf; - - case RS6000_BIF_ST_ELEMREV_V4SI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v4si - : CODE_FOR_vsx_st_elemrev_v4si; - - case RS6000_BIF_ST_ELEMREV_V8HI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v8hi - : CODE_FOR_vsx_st_elemrev_v8hi; - - case RS6000_BIF_ST_ELEMREV_V16QI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_store_v16qi - : CODE_FOR_vsx_st_elemrev_v16qi; - - case RS6000_BIF_LD_ELEMREV_V2DF: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2df - : CODE_FOR_vsx_ld_elemrev_v2df; - - case RS6000_BIF_LD_ELEMREV_V1TI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v1ti - : CODE_FOR_vsx_ld_elemrev_v1ti; - - case RS6000_BIF_LD_ELEMREV_V2DI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v2di - : CODE_FOR_vsx_ld_elemrev_v2di; - - case RS6000_BIF_LD_ELEMREV_V4SF: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4sf - : CODE_FOR_vsx_ld_elemrev_v4sf; - - case RS6000_BIF_LD_ELEMREV_V4SI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v4si - : CODE_FOR_vsx_ld_elemrev_v4si; - - case RS6000_BIF_LD_ELEMREV_V8HI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v8hi - : CODE_FOR_vsx_ld_elemrev_v8hi; - - case RS6000_BIF_LD_ELEMREV_V16QI: - return BYTES_BIG_ENDIAN ? CODE_FOR_vsx_load_v16qi - : CODE_FOR_vsx_ld_elemrev_v16qi; - default: - ; - } - - gcc_unreachable (); -} - -/* Expand an AltiVec vector load builtin, and return the expanded rtx. */ -static rtx -ldv_expand_builtin (rtx target, insn_code icode, rtx *op, machine_mode tmode) -{ - if (target == 0 - || GET_MODE (target) != tmode - || !insn_data[icode].operand[0].predicate (target, tmode)) - target = gen_reg_rtx (tmode); - - op[1] = copy_to_mode_reg (Pmode, op[1]); - - /* These CELL built-ins use BLKmode instead of tmode for historical - (i.e., unknown) reasons. TODO: Is this necessary? */ - bool blk = (icode == CODE_FOR_altivec_lvlx - || icode == CODE_FOR_altivec_lvlxl - || icode == CODE_FOR_altivec_lvrx - || icode == CODE_FOR_altivec_lvrxl); - - /* For LVX, express the RTL accurately by ANDing the address with -16. - LVXL and LVE*X expand to use UNSPECs to hide their special behavior, - so the raw address is fine. */ - /* TODO: That statement seems wrong, as the UNSPECs don't surround the - memory expression, so a latent bug may lie here. The &-16 is likely - needed for all VMX-style loads. */ - if (icode == CODE_FOR_altivec_lvx_v1ti - || icode == CODE_FOR_altivec_lvx_v2df - || icode == CODE_FOR_altivec_lvx_v2di - || icode == CODE_FOR_altivec_lvx_v4sf - || icode == CODE_FOR_altivec_lvx_v4si - || icode == CODE_FOR_altivec_lvx_v8hi - || icode == CODE_FOR_altivec_lvx_v16qi) - { - rtx rawaddr; - if (op[0] == const0_rtx) - rawaddr = op[1]; - else - { - op[0] = copy_to_mode_reg (Pmode, op[0]); - rawaddr = gen_rtx_PLUS (Pmode, op[1], op[0]); - } - rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); - addr = gen_rtx_MEM (blk ? BLKmode : tmode, addr); - - emit_insn (gen_rtx_SET (target, addr)); - } - else - { - rtx addr; - if (op[0] == const0_rtx) - addr = gen_rtx_MEM (blk ? BLKmode : tmode, op[1]); - else - { - op[0] = copy_to_mode_reg (Pmode, op[0]); - addr = gen_rtx_MEM (blk ? BLKmode : tmode, - gen_rtx_PLUS (Pmode, op[1], op[0])); - } - - rtx pat = GEN_FCN (icode) (target, addr); - if (!pat) - return 0; - emit_insn (pat); - } - - return target; -} - -/* Expand a builtin function that loads a scalar into a vector register - with sign extension, and return the expanded rtx. */ -static rtx -lxvrse_expand_builtin (rtx target, insn_code icode, rtx *op, - machine_mode tmode, machine_mode smode) -{ - rtx pat, addr; - op[1] = copy_to_mode_reg (Pmode, op[1]); - - if (op[0] == const0_rtx) - addr = gen_rtx_MEM (tmode, op[1]); - else - { - op[0] = copy_to_mode_reg (Pmode, op[0]); - addr = gen_rtx_MEM (smode, - gen_rtx_PLUS (Pmode, op[1], op[0])); - } - - rtx discratch = gen_reg_rtx (V2DImode); - rtx tiscratch = gen_reg_rtx (TImode); - - /* Emit the lxvr*x insn. */ - pat = GEN_FCN (icode) (tiscratch, addr); - if (!pat) - return 0; - emit_insn (pat); - - /* Emit a sign extension from V16QI,V8HI,V4SI to V2DI. */ - rtx temp1; - if (icode == CODE_FOR_vsx_lxvrbx) - { - temp1 = simplify_gen_subreg (V16QImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_qi_v2di (discratch, temp1)); - } - else if (icode == CODE_FOR_vsx_lxvrhx) - { - temp1 = simplify_gen_subreg (V8HImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_hi_v2di (discratch, temp1)); - } - else if (icode == CODE_FOR_vsx_lxvrwx) - { - temp1 = simplify_gen_subreg (V4SImode, tiscratch, TImode, 0); - emit_insn (gen_vsx_sign_extend_si_v2di (discratch, temp1)); - } - else if (icode == CODE_FOR_vsx_lxvrdx) - discratch = simplify_gen_subreg (V2DImode, tiscratch, TImode, 0); - else - gcc_unreachable (); - - /* Emit the sign extension from V2DI (double) to TI (quad). */ - rtx temp2 = simplify_gen_subreg (TImode, discratch, V2DImode, 0); - emit_insn (gen_extendditi2_vector (target, temp2)); - - return target; -} - -/* Expand a builtin function that loads a scalar into a vector register - with zero extension, and return the expanded rtx. */ -static rtx -lxvrze_expand_builtin (rtx target, insn_code icode, rtx *op, - machine_mode tmode, machine_mode smode) -{ - rtx pat, addr; - op[1] = copy_to_mode_reg (Pmode, op[1]); - - if (op[0] == const0_rtx) - addr = gen_rtx_MEM (tmode, op[1]); - else - { - op[0] = copy_to_mode_reg (Pmode, op[0]); - addr = gen_rtx_MEM (smode, - gen_rtx_PLUS (Pmode, op[1], op[0])); - } - - pat = GEN_FCN (icode) (target, addr); - if (!pat) - return 0; - emit_insn (pat); - return target; -} - -/* Expand an AltiVec vector store builtin, and return the expanded rtx. */ -static rtx -stv_expand_builtin (insn_code icode, rtx *op, - machine_mode tmode, machine_mode smode) -{ - op[2] = copy_to_mode_reg (Pmode, op[2]); - - /* For STVX, express the RTL accurately by ANDing the address with -16. - STVXL and STVE*X expand to use UNSPECs to hide their special behavior, - so the raw address is fine. */ - /* TODO: That statement seems wrong, as the UNSPECs don't surround the - memory expression, so a latent bug may lie here. The &-16 is likely - needed for all VMX-style stores. */ - if (icode == CODE_FOR_altivec_stvx_v2df - || icode == CODE_FOR_altivec_stvx_v2di - || icode == CODE_FOR_altivec_stvx_v4sf - || icode == CODE_FOR_altivec_stvx_v4si - || icode == CODE_FOR_altivec_stvx_v8hi - || icode == CODE_FOR_altivec_stvx_v16qi) - { - rtx rawaddr; - if (op[1] == const0_rtx) - rawaddr = op[2]; - else - { - op[1] = copy_to_mode_reg (Pmode, op[1]); - rawaddr = gen_rtx_PLUS (Pmode, op[2], op[1]); - } - - rtx addr = gen_rtx_AND (Pmode, rawaddr, gen_rtx_CONST_INT (Pmode, -16)); - addr = gen_rtx_MEM (tmode, addr); - op[0] = copy_to_mode_reg (tmode, op[0]); - emit_insn (gen_rtx_SET (addr, op[0])); - } - else if (icode == CODE_FOR_vsx_stxvrbx - || icode == CODE_FOR_vsx_stxvrhx - || icode == CODE_FOR_vsx_stxvrwx - || icode == CODE_FOR_vsx_stxvrdx) - { - rtx truncrtx = gen_rtx_TRUNCATE (tmode, op[0]); - op[0] = copy_to_mode_reg (E_TImode, truncrtx); - - rtx addr; - if (op[1] == const0_rtx) - addr = gen_rtx_MEM (Pmode, op[2]); - else - { - op[1] = copy_to_mode_reg (Pmode, op[1]); - addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1])); - } - rtx pat = GEN_FCN (icode) (addr, op[0]); - if (pat) - emit_insn (pat); - } - else - { - if (!insn_data[icode].operand[1].predicate (op[0], smode)) - op[0] = copy_to_mode_reg (smode, op[0]); - - rtx addr; - if (op[1] == const0_rtx) - addr = gen_rtx_MEM (tmode, op[2]); - else - { - op[1] = copy_to_mode_reg (Pmode, op[1]); - addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op[2], op[1])); - } - - rtx pat = GEN_FCN (icode) (addr, op[0]); - if (pat) - emit_insn (pat); - } - - return NULL_RTX; -} - -/* Expand the MMA built-in in EXP, and return it. */ -static rtx -mma_expand_builtin (tree exp, rtx target, insn_code icode, - rs6000_gen_builtins fcode) -{ - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; - machine_mode tmode = VOIDmode; - rtx op[MAX_MMA_OPERANDS]; - unsigned nopnds = 0; - - if (!void_func) - { - tmode = insn_data[icode].operand[0].mode; - if (!(target - && GET_MODE (target) == tmode - && insn_data[icode].operand[0].predicate (target, tmode))) - target = gen_reg_rtx (tmode); - op[nopnds++] = target; - } - else - target = const0_rtx; - - call_expr_arg_iterator iter; - tree arg; - FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) - { - if (arg == error_mark_node) - return const0_rtx; - - rtx opnd; - const struct insn_operand_data *insn_op; - insn_op = &insn_data[icode].operand[nopnds]; - if (TREE_CODE (arg) == ADDR_EXPR - && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0)))) - opnd = DECL_RTL (TREE_OPERAND (arg, 0)); - else - opnd = expand_normal (arg); - - if (!insn_op->predicate (opnd, insn_op->mode)) - { - /* TODO: This use of constraints needs explanation. */ - if (!strcmp (insn_op->constraint, "n")) - { - if (!CONST_INT_P (opnd)) - error ("argument %d must be an unsigned literal", nopnds); - else - error ("argument %d is an unsigned literal that is " - "out of range", nopnds); - return const0_rtx; - } - opnd = copy_to_mode_reg (insn_op->mode, opnd); - } - - /* Some MMA instructions have INOUT accumulator operands, so force - their target register to be the same as their input register. */ - if (!void_func - && nopnds == 1 - && !strcmp (insn_op->constraint, "0") - && insn_op->mode == tmode - && REG_P (opnd) - && insn_data[icode].operand[0].predicate (opnd, tmode)) - target = op[0] = opnd; - - op[nopnds++] = opnd; - } - - rtx pat; - switch (nopnds) - { - case 1: - pat = GEN_FCN (icode) (op[0]); - break; - case 2: - pat = GEN_FCN (icode) (op[0], op[1]); - break; - case 3: - /* The ASSEMBLE builtin source operands are reversed in little-endian - mode, so reorder them. */ - if (fcode == RS6000_BIF_ASSEMBLE_PAIR_V_INTERNAL && !WORDS_BIG_ENDIAN) - std::swap (op[1], op[2]); - pat = GEN_FCN (icode) (op[0], op[1], op[2]); - break; - case 4: - pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); - break; - case 5: - /* The ASSEMBLE builtin source operands are reversed in little-endian - mode, so reorder them. */ - if (fcode == RS6000_BIF_ASSEMBLE_ACC_INTERNAL && !WORDS_BIG_ENDIAN) - { - std::swap (op[1], op[4]); - std::swap (op[2], op[3]); - } - pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]); - break; - case 6: - pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]); - break; - case 7: - pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]); - break; - default: - gcc_unreachable (); - } - - if (!pat) - return NULL_RTX; - - emit_insn (pat); - return target; -} - -/* Return the appropriate SPR number associated with the given builtin. */ -static inline HOST_WIDE_INT -htm_spr_num (enum rs6000_gen_builtins code) -{ - if (code == RS6000_BIF_GET_TFHAR - || code == RS6000_BIF_SET_TFHAR) - return TFHAR_SPR; - else if (code == RS6000_BIF_GET_TFIAR - || code == RS6000_BIF_SET_TFIAR) - return TFIAR_SPR; - else if (code == RS6000_BIF_GET_TEXASR - || code == RS6000_BIF_SET_TEXASR) - return TEXASR_SPR; - gcc_assert (code == RS6000_BIF_GET_TEXASRU - || code == RS6000_BIF_SET_TEXASRU); - return TEXASRU_SPR; -} - -/* Expand the HTM builtin in EXP and store the result in TARGET. - Return the expanded rtx. */ -static rtx -htm_expand_builtin (bifdata *bifaddr, rs6000_gen_builtins fcode, - tree exp, rtx target) -{ - if (!TARGET_POWERPC64 - && (fcode == RS6000_BIF_TABORTDC - || fcode == RS6000_BIF_TABORTDCI)) - { - error ("builtin %qs is only valid in 64-bit mode", bifaddr->bifname); - return const0_rtx; - } - - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; - bool uses_spr = bif_is_htmspr (*bifaddr); - insn_code icode = bifaddr->icode; - - if (uses_spr) - icode = rs6000_htm_spr_icode (nonvoid); - - rtx op[MAX_HTM_OPERANDS]; - int nopnds = 0; - const insn_operand_data *insn_op = &insn_data[icode].operand[0]; - - if (nonvoid) - { - machine_mode tmode = (uses_spr) ? insn_op->mode : E_SImode; - if (!target - || GET_MODE (target) != tmode - || (uses_spr && !insn_op->predicate (target, tmode))) - target = gen_reg_rtx (tmode); - if (uses_spr) - op[nopnds++] = target; - } - - tree arg; - call_expr_arg_iterator iter; - - FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) - { - if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS) - return const0_rtx; - - insn_op = &insn_data[icode].operand[nopnds]; - op[nopnds] = expand_normal (arg); - - if (!insn_op->predicate (op[nopnds], insn_op->mode)) - { - /* TODO: This use of constraints could use explanation. - This happens a couple of places, perhaps make that a - function to document what's happening. */ - if (!strcmp (insn_op->constraint, "n")) - { - int arg_num = nonvoid ? nopnds : nopnds + 1; - if (!CONST_INT_P (op[nopnds])) - error ("argument %d must be an unsigned literal", arg_num); - else - error ("argument %d is an unsigned literal that is " - "out of range", arg_num); - return const0_rtx; - } - op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]); - } - - nopnds++; - } - - /* Handle the builtins for extended mnemonics. These accept - no arguments, but map to builtins that take arguments. */ - switch (fcode) - { - case RS6000_BIF_TENDALL: /* Alias for: tend. 1 */ - case RS6000_BIF_TRESUME: /* Alias for: tsr. 1 */ - op[nopnds++] = GEN_INT (1); - break; - case RS6000_BIF_TSUSPEND: /* Alias for: tsr. 0 */ - op[nopnds++] = GEN_INT (0); - break; - default: - break; - } - - /* If this builtin accesses SPRs, then pass in the appropriate - SPR number and SPR regno as the last two operands. */ - rtx cr = NULL_RTX; - if (uses_spr) - { - machine_mode mode = TARGET_POWERPC64 ? DImode : SImode; - op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode)); - } - /* If this builtin accesses a CR field, then pass in a scratch - CR field as the last operand. */ - else if (bif_is_htmcr (*bifaddr)) - { - cr = gen_reg_rtx (CCmode); - op[nopnds++] = cr; - } - - rtx pat; - switch (nopnds) - { - case 1: - pat = GEN_FCN (icode) (op[0]); - break; - case 2: - pat = GEN_FCN (icode) (op[0], op[1]); - break; - case 3: - pat = GEN_FCN (icode) (op[0], op[1], op[2]); - break; - case 4: - pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]); - break; - default: - gcc_unreachable (); - } - if (!pat) - return NULL_RTX; - emit_insn (pat); - - if (bif_is_htmcr (*bifaddr)) - { - if (fcode == RS6000_BIF_TBEGIN) - { - /* Emit code to set TARGET to true or false depending on - whether the tbegin. instruction succeeded or failed - to start a transaction. We do this by placing the 1's - complement of CR's EQ bit into TARGET. */ - rtx scratch = gen_reg_rtx (SImode); - emit_insn (gen_rtx_SET (scratch, - gen_rtx_EQ (SImode, cr, - const0_rtx))); - emit_insn (gen_rtx_SET (target, - gen_rtx_XOR (SImode, scratch, - GEN_INT (1)))); - } - else - { - /* Emit code to copy the 4-bit condition register field - CR into the least significant end of register TARGET. */ - rtx scratch1 = gen_reg_rtx (SImode); - rtx scratch2 = gen_reg_rtx (SImode); - rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0); - emit_insn (gen_movcc (subreg, cr)); - emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28))); - emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf))); - } - } - - if (nonvoid) - return target; - return const0_rtx; -} - -/* Expand an expression EXP that calls a built-in function, - with result going to TARGET if that's convenient - (and in mode MODE if that's convenient). - SUBTARGET may be used as the target for computing one of EXP's operands. - IGNORE is nonzero if the value is to be ignored. - Use the new builtin infrastructure. */ -rtx -rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */, - machine_mode /* mode */, int ignore) -{ - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - enum rs6000_gen_builtins fcode - = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); - size_t uns_fcode = (size_t)fcode; - enum insn_code icode = rs6000_builtin_info[uns_fcode].icode; - - /* TODO: The following commentary and code is inherited from the original - builtin processing code. The commentary is a bit confusing, with the - intent being that KFmode is always IEEE-128, IFmode is always IBM - double-double, and TFmode is the current long double. The code is - confusing in that it converts from KFmode to TFmode pattern names, - when the other direction is more intuitive. Try to address this. */ - - /* We have two different modes (KFmode, TFmode) that are the IEEE - 128-bit floating point type, depending on whether long double is the - IBM extended double (KFmode) or long double is IEEE 128-bit (TFmode). - It is simpler if we only define one variant of the built-in function, - and switch the code when defining it, rather than defining two built- - ins and using the overload table in rs6000-c.cc to switch between the - two. If we don't have the proper assembler, don't do this switch - because CODE_FOR_*kf* and CODE_FOR_*tf* will be CODE_FOR_nothing. */ - if (FLOAT128_IEEE_P (TFmode)) - switch (icode) - { - case CODE_FOR_sqrtkf2_odd: - icode = CODE_FOR_sqrttf2_odd; - break; - case CODE_FOR_trunckfdf2_odd: - icode = CODE_FOR_trunctfdf2_odd; - break; - case CODE_FOR_addkf3_odd: - icode = CODE_FOR_addtf3_odd; - break; - case CODE_FOR_subkf3_odd: - icode = CODE_FOR_subtf3_odd; - break; - case CODE_FOR_mulkf3_odd: - icode = CODE_FOR_multf3_odd; - break; - case CODE_FOR_divkf3_odd: - icode = CODE_FOR_divtf3_odd; - break; - case CODE_FOR_fmakf4_odd: - icode = CODE_FOR_fmatf4_odd; - break; - case CODE_FOR_xsxexpqp_kf: - icode = CODE_FOR_xsxexpqp_tf; - break; - case CODE_FOR_xsxsigqp_kf: - icode = CODE_FOR_xsxsigqp_tf; - break; - case CODE_FOR_xststdcnegqp_kf: - icode = CODE_FOR_xststdcnegqp_tf; - break; - case CODE_FOR_xsiexpqp_kf: - icode = CODE_FOR_xsiexpqp_tf; - break; - case CODE_FOR_xsiexpqpf_kf: - icode = CODE_FOR_xsiexpqpf_tf; - break; - case CODE_FOR_xststdcqp_kf: - icode = CODE_FOR_xststdcqp_tf; - break; - case CODE_FOR_xscmpexpqp_eq_kf: - icode = CODE_FOR_xscmpexpqp_eq_tf; - break; - case CODE_FOR_xscmpexpqp_lt_kf: - icode = CODE_FOR_xscmpexpqp_lt_tf; - break; - case CODE_FOR_xscmpexpqp_gt_kf: - icode = CODE_FOR_xscmpexpqp_gt_tf; - break; - case CODE_FOR_xscmpexpqp_unordered_kf: - icode = CODE_FOR_xscmpexpqp_unordered_tf; - break; - default: - break; - } - - /* In case of "#pragma target" changes, we initialize all builtins - but check for actual availability now, during expand time. For - invalid builtins, generate a normal call. */ - bifdata *bifaddr = &rs6000_builtin_info[uns_fcode]; - bif_enable e = bifaddr->enable; - - if (!(e == ENB_ALWAYS - || (e == ENB_P5 && TARGET_POPCNTB) - || (e == ENB_P6 && TARGET_CMPB) - || (e == ENB_P6_64 && TARGET_CMPB && TARGET_POWERPC64) - || (e == ENB_ALTIVEC && TARGET_ALTIVEC) - || (e == ENB_CELL && TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL) - || (e == ENB_VSX && TARGET_VSX) - || (e == ENB_P7 && TARGET_POPCNTD) - || (e == ENB_P7_64 && TARGET_POPCNTD && TARGET_POWERPC64) - || (e == ENB_P8 && TARGET_DIRECT_MOVE) - || (e == ENB_P8V && TARGET_P8_VECTOR) - || (e == ENB_P9 && TARGET_MODULO) - || (e == ENB_P9_64 && TARGET_MODULO && TARGET_POWERPC64) - || (e == ENB_P9V && TARGET_P9_VECTOR) - || (e == ENB_IEEE128_HW && TARGET_FLOAT128_HW) - || (e == ENB_DFP && TARGET_DFP) - || (e == ENB_CRYPTO && TARGET_CRYPTO) - || (e == ENB_HTM && TARGET_HTM) - || (e == ENB_P10 && TARGET_POWER10) - || (e == ENB_P10_64 && TARGET_POWER10 && TARGET_POWERPC64) - || (e == ENB_MMA && TARGET_MMA))) - { - rs6000_invalid_builtin (fcode); - return expand_call (exp, target, ignore); - } - - if (bif_is_nosoft (*bifaddr) - && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT) - { - error ("%qs not supported with %<-msoft-float%>", - bifaddr->bifname); - return const0_rtx; - } - - if (bif_is_no32bit (*bifaddr) && TARGET_32BIT) - { - error ("%qs is not supported in 32-bit mode", bifaddr->bifname); - return const0_rtx; - } - - if (bif_is_ibmld (*bifaddr) && !FLOAT128_2REG_P (TFmode)) - { - error ("%qs requires %<long double%> to be IBM 128-bit format", - bifaddr->bifname); - return const0_rtx; - } - - if (bif_is_cpu (*bifaddr)) - return cpu_expand_builtin (fcode, exp, target); - - if (bif_is_init (*bifaddr)) - return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); - - if (bif_is_set (*bifaddr)) - return altivec_expand_vec_set_builtin (exp); - - if (bif_is_extract (*bifaddr)) - return altivec_expand_vec_ext_builtin (exp, target); - - if (bif_is_predicate (*bifaddr)) - return altivec_expand_predicate_builtin (icode, exp, target); - - if (bif_is_htm (*bifaddr)) - return htm_expand_builtin (bifaddr, fcode, exp, target); - - if (bif_is_32bit (*bifaddr) && TARGET_32BIT) - { - if (fcode == RS6000_BIF_MFTB) - icode = CODE_FOR_rs6000_mftb_si; - else if (fcode == RS6000_BIF_BPERMD) - icode = CODE_FOR_bpermd_si; - else if (fcode == RS6000_BIF_DARN) - icode = CODE_FOR_darn_64_si; - else if (fcode == RS6000_BIF_DARN_32) - icode = CODE_FOR_darn_32_si; - else if (fcode == RS6000_BIF_DARN_RAW) - icode = CODE_FOR_darn_raw_si; - else - gcc_unreachable (); - } - - if (bif_is_endian (*bifaddr) && BYTES_BIG_ENDIAN) - { - if (fcode == RS6000_BIF_LD_ELEMREV_V1TI) - icode = CODE_FOR_vsx_load_v1ti; - else if (fcode == RS6000_BIF_LD_ELEMREV_V2DF) - icode = CODE_FOR_vsx_load_v2df; - else if (fcode == RS6000_BIF_LD_ELEMREV_V2DI) - icode = CODE_FOR_vsx_load_v2di; - else if (fcode == RS6000_BIF_LD_ELEMREV_V4SF) - icode = CODE_FOR_vsx_load_v4sf; - else if (fcode == RS6000_BIF_LD_ELEMREV_V4SI) - icode = CODE_FOR_vsx_load_v4si; - else if (fcode == RS6000_BIF_LD_ELEMREV_V8HI) - icode = CODE_FOR_vsx_load_v8hi; - else if (fcode == RS6000_BIF_LD_ELEMREV_V16QI) - icode = CODE_FOR_vsx_load_v16qi; - else if (fcode == RS6000_BIF_ST_ELEMREV_V1TI) - icode = CODE_FOR_vsx_store_v1ti; - else if (fcode == RS6000_BIF_ST_ELEMREV_V2DF) - icode = CODE_FOR_vsx_store_v2df; - else if (fcode == RS6000_BIF_ST_ELEMREV_V2DI) - icode = CODE_FOR_vsx_store_v2di; - else if (fcode == RS6000_BIF_ST_ELEMREV_V4SF) - icode = CODE_FOR_vsx_store_v4sf; - else if (fcode == RS6000_BIF_ST_ELEMREV_V4SI) - icode = CODE_FOR_vsx_store_v4si; - else if (fcode == RS6000_BIF_ST_ELEMREV_V8HI) - icode = CODE_FOR_vsx_store_v8hi; - else if (fcode == RS6000_BIF_ST_ELEMREV_V16QI) - icode = CODE_FOR_vsx_store_v16qi; - else - gcc_unreachable (); - } - - - /* TRUE iff the built-in function returns void. */ - bool void_func = TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node; - /* Position of first argument (0 for void-returning functions, else 1). */ - int k; - /* Modes for the return value, if any, and arguments. */ - const int MAX_BUILTIN_ARGS = 6; - machine_mode mode[MAX_BUILTIN_ARGS + 1]; - - if (void_func) - k = 0; - else - { - k = 1; - mode[0] = insn_data[icode].operand[0].mode; - } - - /* Tree expressions for each argument. */ - tree arg[MAX_BUILTIN_ARGS]; - /* RTL expressions for each argument. */ - rtx op[MAX_BUILTIN_ARGS]; - - int nargs = bifaddr->nargs; - gcc_assert (nargs <= MAX_BUILTIN_ARGS); - - - for (int i = 0; i < nargs; i++) - { - arg[i] = CALL_EXPR_ARG (exp, i); - if (arg[i] == error_mark_node) - return const0_rtx; - STRIP_NOPS (arg[i]); - op[i] = expand_normal (arg[i]); - /* We have a couple of pesky patterns that don't specify the mode... */ - mode[i+k] = insn_data[icode].operand[i+k].mode; - if (!mode[i+k]) - mode[i+k] = Pmode; - } - - /* Check for restricted constant arguments. */ - for (int i = 0; i < 2; i++) - { - switch (bifaddr->restr[i]) - { - case RES_BITS: - { - size_t mask = 1; - mask <<= bifaddr->restr_val1[i]; - mask--; - tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; - STRIP_NOPS (restr_arg); - if (!(TREE_CODE (restr_arg) == INTEGER_CST - && (TREE_INT_CST_LOW (restr_arg) & ~mask) == 0)) - { - error ("argument %d must be a %d-bit unsigned literal", - bifaddr->restr_opnd[i], bifaddr->restr_val1[i]); - return CONST0_RTX (mode[0]); - } - break; - } - case RES_RANGE: - { - tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; - STRIP_NOPS (restr_arg); - if (!(TREE_CODE (restr_arg) == INTEGER_CST - && IN_RANGE (tree_to_shwi (restr_arg), - bifaddr->restr_val1[i], - bifaddr->restr_val2[i]))) - { - error ("argument %d must be a literal between %d and %d," - " inclusive", - bifaddr->restr_opnd[i], bifaddr->restr_val1[i], - bifaddr->restr_val2[i]); - return CONST0_RTX (mode[0]); - } - break; - } - case RES_VAR_RANGE: - { - tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; - STRIP_NOPS (restr_arg); - if (TREE_CODE (restr_arg) == INTEGER_CST - && !IN_RANGE (tree_to_shwi (restr_arg), - bifaddr->restr_val1[i], - bifaddr->restr_val2[i])) - { - error ("argument %d must be a variable or a literal " - "between %d and %d, inclusive", - bifaddr->restr_opnd[i], bifaddr->restr_val1[i], - bifaddr->restr_val2[i]); - return CONST0_RTX (mode[0]); - } - break; - } - case RES_VALUES: - { - tree restr_arg = arg[bifaddr->restr_opnd[i] - 1]; - STRIP_NOPS (restr_arg); - if (!(TREE_CODE (restr_arg) == INTEGER_CST - && (tree_to_shwi (restr_arg) == bifaddr->restr_val1[i] - || tree_to_shwi (restr_arg) == bifaddr->restr_val2[i]))) - { - error ("argument %d must be either a literal %d or a " - "literal %d", - bifaddr->restr_opnd[i], bifaddr->restr_val1[i], - bifaddr->restr_val2[i]); - return CONST0_RTX (mode[0]); - } - break; - } - default: - case RES_NONE: - break; - } - } - - if (bif_is_ldstmask (*bifaddr)) - return rs6000_expand_ldst_mask (target, arg[0]); - - if (bif_is_stvec (*bifaddr)) - { - if (bif_is_reve (*bifaddr)) - icode = elemrev_icode (fcode); - return stv_expand_builtin (icode, op, mode[0], mode[1]); - } - - if (bif_is_ldvec (*bifaddr)) - { - if (bif_is_reve (*bifaddr)) - icode = elemrev_icode (fcode); - return ldv_expand_builtin (target, icode, op, mode[0]); - } - - if (bif_is_lxvrse (*bifaddr)) - return lxvrse_expand_builtin (target, icode, op, mode[0], mode[1]); - - if (bif_is_lxvrze (*bifaddr)) - return lxvrze_expand_builtin (target, icode, op, mode[0], mode[1]); - - if (bif_is_mma (*bifaddr)) - return mma_expand_builtin (exp, target, icode, fcode); - - if (fcode == RS6000_BIF_PACK_IF - && TARGET_LONG_DOUBLE_128 - && !TARGET_IEEEQUAD) - { - icode = CODE_FOR_packtf; - fcode = RS6000_BIF_PACK_TF; - uns_fcode = (size_t) fcode; - } - else if (fcode == RS6000_BIF_UNPACK_IF - && TARGET_LONG_DOUBLE_128 - && !TARGET_IEEEQUAD) - { - icode = CODE_FOR_unpacktf; - fcode = RS6000_BIF_UNPACK_TF; - uns_fcode = (size_t) fcode; - } - - if (TREE_TYPE (TREE_TYPE (fndecl)) == void_type_node) - target = NULL_RTX; - else if (target == 0 - || GET_MODE (target) != mode[0] - || !insn_data[icode].operand[0].predicate (target, mode[0])) - target = gen_reg_rtx (mode[0]); - - for (int i = 0; i < nargs; i++) - if (!insn_data[icode].operand[i+k].predicate (op[i], mode[i+k])) - op[i] = copy_to_mode_reg (mode[i+k], op[i]); - - rtx pat; - - switch (nargs) - { - case 0: - pat = (void_func - ? GEN_FCN (icode) () - : GEN_FCN (icode) (target)); - break; - case 1: - pat = (void_func - ? GEN_FCN (icode) (op[0]) - : GEN_FCN (icode) (target, op[0])); - break; - case 2: - pat = (void_func - ? GEN_FCN (icode) (op[0], op[1]) - : GEN_FCN (icode) (target, op[0], op[1])); - break; - case 3: - pat = (void_func - ? GEN_FCN (icode) (op[0], op[1], op[2]) - : GEN_FCN (icode) (target, op[0], op[1], op[2])); - break; - case 4: - pat = (void_func - ? GEN_FCN (icode) (op[0], op[1], op[2], op[3]) - : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3])); - break; - case 5: - pat = (void_func - ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]) - : GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4])); - break; - case 6: - pat = (void_func - ? GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]) - : GEN_FCN (icode) (target, op[0], op[1], - op[2], op[3], op[4], op[5])); - break; - default: - gcc_assert (MAX_BUILTIN_ARGS == 6); - gcc_unreachable (); - } - - if (!pat) - return 0; - - emit_insn (pat); - return target; -} - -/* Create a builtin vector type with a name. Taking care not to give - the canonical type a name. */ - -static tree -rs6000_vector_type (const char *name, tree elt_type, unsigned num_elts) -{ - tree result = build_vector_type (elt_type, num_elts); - - /* Copy so we don't give the canonical type a name. */ - result = build_variant_type_copy (result); - - add_builtin_type (name, result); - - return result; -} - -void -rs6000_init_builtins (void) -{ - tree tdecl; - tree t; - - if (TARGET_DEBUG_BUILTIN) - fprintf (stderr, "rs6000_init_builtins%s%s\n", - (TARGET_ALTIVEC) ? ", altivec" : "", - (TARGET_VSX) ? ", vsx" : ""); - - V2DI_type_node = rs6000_vector_type ("__vector long long", - long_long_integer_type_node, 2); - ptr_V2DI_type_node - = build_pointer_type (build_qualified_type (V2DI_type_node, - TYPE_QUAL_CONST)); - - V2DF_type_node = rs6000_vector_type ("__vector double", double_type_node, 2); - ptr_V2DF_type_node - = build_pointer_type (build_qualified_type (V2DF_type_node, - TYPE_QUAL_CONST)); - - V4SI_type_node = rs6000_vector_type ("__vector signed int", - intSI_type_node, 4); - ptr_V4SI_type_node - = build_pointer_type (build_qualified_type (V4SI_type_node, - TYPE_QUAL_CONST)); - - V4SF_type_node = rs6000_vector_type ("__vector float", float_type_node, 4); - ptr_V4SF_type_node - = build_pointer_type (build_qualified_type (V4SF_type_node, - TYPE_QUAL_CONST)); - - V8HI_type_node = rs6000_vector_type ("__vector signed short", - intHI_type_node, 8); - ptr_V8HI_type_node - = build_pointer_type (build_qualified_type (V8HI_type_node, - TYPE_QUAL_CONST)); - - V16QI_type_node = rs6000_vector_type ("__vector signed char", - intQI_type_node, 16); - ptr_V16QI_type_node - = build_pointer_type (build_qualified_type (V16QI_type_node, - TYPE_QUAL_CONST)); - - unsigned_V16QI_type_node = rs6000_vector_type ("__vector unsigned char", - unsigned_intQI_type_node, 16); - ptr_unsigned_V16QI_type_node - = build_pointer_type (build_qualified_type (unsigned_V16QI_type_node, - TYPE_QUAL_CONST)); - - unsigned_V8HI_type_node = rs6000_vector_type ("__vector unsigned short", - unsigned_intHI_type_node, 8); - ptr_unsigned_V8HI_type_node - = build_pointer_type (build_qualified_type (unsigned_V8HI_type_node, - TYPE_QUAL_CONST)); - - unsigned_V4SI_type_node = rs6000_vector_type ("__vector unsigned int", - unsigned_intSI_type_node, 4); - ptr_unsigned_V4SI_type_node - = build_pointer_type (build_qualified_type (unsigned_V4SI_type_node, - TYPE_QUAL_CONST)); - - unsigned_V2DI_type_node - = rs6000_vector_type ("__vector unsigned long long", - long_long_unsigned_type_node, 2); - - ptr_unsigned_V2DI_type_node - = build_pointer_type (build_qualified_type (unsigned_V2DI_type_node, - TYPE_QUAL_CONST)); - - opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4); - - const_str_type_node - = build_pointer_type (build_qualified_type (char_type_node, - TYPE_QUAL_CONST)); - - /* We use V1TI mode as a special container to hold __int128_t items that - must live in VSX registers. */ - if (intTI_type_node) - { - V1TI_type_node = rs6000_vector_type ("__vector __int128", - intTI_type_node, 1); - ptr_V1TI_type_node - = build_pointer_type (build_qualified_type (V1TI_type_node, - TYPE_QUAL_CONST)); - unsigned_V1TI_type_node - = rs6000_vector_type ("__vector unsigned __int128", - unsigned_intTI_type_node, 1); - ptr_unsigned_V1TI_type_node - = build_pointer_type (build_qualified_type (unsigned_V1TI_type_node, - TYPE_QUAL_CONST)); - } - - /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...' - types, especially in C++ land. Similarly, 'vector pixel' is distinct from - 'vector unsigned short'. */ - - bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node); - bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node); - bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node); - bool_long_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node); - pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node); - - long_integer_type_internal_node = long_integer_type_node; - long_unsigned_type_internal_node = long_unsigned_type_node; - long_long_integer_type_internal_node = long_long_integer_type_node; - long_long_unsigned_type_internal_node = long_long_unsigned_type_node; - intQI_type_internal_node = intQI_type_node; - uintQI_type_internal_node = unsigned_intQI_type_node; - intHI_type_internal_node = intHI_type_node; - uintHI_type_internal_node = unsigned_intHI_type_node; - intSI_type_internal_node = intSI_type_node; - uintSI_type_internal_node = unsigned_intSI_type_node; - intDI_type_internal_node = intDI_type_node; - uintDI_type_internal_node = unsigned_intDI_type_node; - intTI_type_internal_node = intTI_type_node; - uintTI_type_internal_node = unsigned_intTI_type_node; - float_type_internal_node = float_type_node; - double_type_internal_node = double_type_node; - long_double_type_internal_node = long_double_type_node; - dfloat64_type_internal_node = dfloat64_type_node; - dfloat128_type_internal_node = dfloat128_type_node; - void_type_internal_node = void_type_node; - - ptr_intQI_type_node - = build_pointer_type (build_qualified_type (intQI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_uintQI_type_node - = build_pointer_type (build_qualified_type (uintQI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_intHI_type_node - = build_pointer_type (build_qualified_type (intHI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_uintHI_type_node - = build_pointer_type (build_qualified_type (uintHI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_intSI_type_node - = build_pointer_type (build_qualified_type (intSI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_uintSI_type_node - = build_pointer_type (build_qualified_type (uintSI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_intDI_type_node - = build_pointer_type (build_qualified_type (intDI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_uintDI_type_node - = build_pointer_type (build_qualified_type (uintDI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_intTI_type_node - = build_pointer_type (build_qualified_type (intTI_type_internal_node, - TYPE_QUAL_CONST)); - ptr_uintTI_type_node - = build_pointer_type (build_qualified_type (uintTI_type_internal_node, - TYPE_QUAL_CONST)); - - t = build_qualified_type (long_integer_type_internal_node, TYPE_QUAL_CONST); - ptr_long_integer_type_node = build_pointer_type (t); - - t = build_qualified_type (long_unsigned_type_internal_node, TYPE_QUAL_CONST); - ptr_long_unsigned_type_node = build_pointer_type (t); - - ptr_float_type_node - = build_pointer_type (build_qualified_type (float_type_internal_node, - TYPE_QUAL_CONST)); - ptr_double_type_node - = build_pointer_type (build_qualified_type (double_type_internal_node, - TYPE_QUAL_CONST)); - ptr_long_double_type_node - = build_pointer_type (build_qualified_type (long_double_type_internal_node, - TYPE_QUAL_CONST)); - if (dfloat64_type_node) - { - t = build_qualified_type (dfloat64_type_internal_node, TYPE_QUAL_CONST); - ptr_dfloat64_type_node = build_pointer_type (t); - } - else - ptr_dfloat64_type_node = NULL; - - if (dfloat128_type_node) - { - t = build_qualified_type (dfloat128_type_internal_node, TYPE_QUAL_CONST); - ptr_dfloat128_type_node = build_pointer_type (t); - } - else - ptr_dfloat128_type_node = NULL; - - t = build_qualified_type (long_long_integer_type_internal_node, - TYPE_QUAL_CONST); - ptr_long_long_integer_type_node = build_pointer_type (t); - - t = build_qualified_type (long_long_unsigned_type_internal_node, - TYPE_QUAL_CONST); - ptr_long_long_unsigned_type_node = build_pointer_type (t); - - /* 128-bit floating point support. KFmode is IEEE 128-bit floating point. - IFmode is the IBM extended 128-bit format that is a pair of doubles. - TFmode will be either IEEE 128-bit floating point or the IBM double-double - format that uses a pair of doubles, depending on the switches and - defaults. - - If we don't support for either 128-bit IBM double double or IEEE 128-bit - floating point, we need make sure the type is non-zero or else self-test - fails during bootstrap. - - Always create __ibm128 as a separate type, even if the current long double - format is IBM extended double. - - For IEEE 128-bit floating point, always create the type __ieee128. If the - user used -mfloat128, rs6000-c.cc will create a define from __float128 to - __ieee128. */ - if (TARGET_FLOAT128_TYPE) - { - if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128) - ibm128_float_type_node = long_double_type_node; - else - { - ibm128_float_type_node = make_node (REAL_TYPE); - TYPE_PRECISION (ibm128_float_type_node) = 128; - SET_TYPE_MODE (ibm128_float_type_node, IFmode); - layout_type (ibm128_float_type_node); - } - t = build_qualified_type (ibm128_float_type_node, TYPE_QUAL_CONST); - ptr_ibm128_float_type_node = build_pointer_type (t); - lang_hooks.types.register_builtin_type (ibm128_float_type_node, - "__ibm128"); - - if (TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128) - ieee128_float_type_node = long_double_type_node; - else - ieee128_float_type_node = float128_type_node; - t = build_qualified_type (ieee128_float_type_node, TYPE_QUAL_CONST); - ptr_ieee128_float_type_node = build_pointer_type (t); - lang_hooks.types.register_builtin_type (ieee128_float_type_node, - "__ieee128"); - } - - else - ieee128_float_type_node = ibm128_float_type_node = long_double_type_node; - - /* Vector pair and vector quad support. */ - vector_pair_type_node = make_node (OPAQUE_TYPE); - SET_TYPE_MODE (vector_pair_type_node, OOmode); - TYPE_SIZE (vector_pair_type_node) = bitsize_int (GET_MODE_BITSIZE (OOmode)); - TYPE_PRECISION (vector_pair_type_node) = GET_MODE_BITSIZE (OOmode); - TYPE_SIZE_UNIT (vector_pair_type_node) = size_int (GET_MODE_SIZE (OOmode)); - SET_TYPE_ALIGN (vector_pair_type_node, 256); - TYPE_USER_ALIGN (vector_pair_type_node) = 0; - lang_hooks.types.register_builtin_type (vector_pair_type_node, - "__vector_pair"); - t = build_qualified_type (vector_pair_type_node, TYPE_QUAL_CONST); - ptr_vector_pair_type_node = build_pointer_type (t); - - vector_quad_type_node = make_node (OPAQUE_TYPE); - SET_TYPE_MODE (vector_quad_type_node, XOmode); - TYPE_SIZE (vector_quad_type_node) = bitsize_int (GET_MODE_BITSIZE (XOmode)); - TYPE_PRECISION (vector_quad_type_node) = GET_MODE_BITSIZE (XOmode); - TYPE_SIZE_UNIT (vector_quad_type_node) = size_int (GET_MODE_SIZE (XOmode)); - SET_TYPE_ALIGN (vector_quad_type_node, 512); - TYPE_USER_ALIGN (vector_quad_type_node) = 0; - lang_hooks.types.register_builtin_type (vector_quad_type_node, - "__vector_quad"); - t = build_qualified_type (vector_quad_type_node, TYPE_QUAL_CONST); - ptr_vector_quad_type_node = build_pointer_type (t); - - /* Initialize the modes for builtin_function_type, mapping a machine mode to - tree type node. */ - builtin_mode_to_type[QImode][0] = integer_type_node; - builtin_mode_to_type[QImode][1] = unsigned_intSI_type_node; - builtin_mode_to_type[HImode][0] = integer_type_node; - builtin_mode_to_type[HImode][1] = unsigned_intSI_type_node; - builtin_mode_to_type[SImode][0] = intSI_type_node; - builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node; - builtin_mode_to_type[DImode][0] = intDI_type_node; - builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node; - builtin_mode_to_type[TImode][0] = intTI_type_node; - builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node; - builtin_mode_to_type[SFmode][0] = float_type_node; - builtin_mode_to_type[DFmode][0] = double_type_node; - builtin_mode_to_type[IFmode][0] = ibm128_float_type_node; - builtin_mode_to_type[KFmode][0] = ieee128_float_type_node; - builtin_mode_to_type[TFmode][0] = long_double_type_node; - builtin_mode_to_type[DDmode][0] = dfloat64_type_node; - builtin_mode_to_type[TDmode][0] = dfloat128_type_node; - builtin_mode_to_type[V1TImode][0] = V1TI_type_node; - builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node; - builtin_mode_to_type[V2DImode][0] = V2DI_type_node; - builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node; - builtin_mode_to_type[V2DFmode][0] = V2DF_type_node; - builtin_mode_to_type[V4SImode][0] = V4SI_type_node; - builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node; - builtin_mode_to_type[V4SFmode][0] = V4SF_type_node; - builtin_mode_to_type[V8HImode][0] = V8HI_type_node; - builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node; - builtin_mode_to_type[V16QImode][0] = V16QI_type_node; - builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node; - builtin_mode_to_type[OOmode][1] = vector_pair_type_node; - builtin_mode_to_type[XOmode][1] = vector_quad_type_node; - - tdecl = add_builtin_type ("__bool char", bool_char_type_node); - TYPE_NAME (bool_char_type_node) = tdecl; - - tdecl = add_builtin_type ("__bool short", bool_short_type_node); - TYPE_NAME (bool_short_type_node) = tdecl; - - tdecl = add_builtin_type ("__bool int", bool_int_type_node); - TYPE_NAME (bool_int_type_node) = tdecl; - - tdecl = add_builtin_type ("__pixel", pixel_type_node); - TYPE_NAME (pixel_type_node) = tdecl; - - bool_V16QI_type_node = rs6000_vector_type ("__vector __bool char", - bool_char_type_node, 16); - ptr_bool_V16QI_type_node - = build_pointer_type (build_qualified_type (bool_V16QI_type_node, - TYPE_QUAL_CONST)); - - bool_V8HI_type_node = rs6000_vector_type ("__vector __bool short", - bool_short_type_node, 8); - ptr_bool_V8HI_type_node - = build_pointer_type (build_qualified_type (bool_V8HI_type_node, - TYPE_QUAL_CONST)); - - bool_V4SI_type_node = rs6000_vector_type ("__vector __bool int", - bool_int_type_node, 4); - ptr_bool_V4SI_type_node - = build_pointer_type (build_qualified_type (bool_V4SI_type_node, - TYPE_QUAL_CONST)); - - bool_V2DI_type_node = rs6000_vector_type (TARGET_POWERPC64 - ? "__vector __bool long" - : "__vector __bool long long", - bool_long_long_type_node, 2); - ptr_bool_V2DI_type_node - = build_pointer_type (build_qualified_type (bool_V2DI_type_node, - TYPE_QUAL_CONST)); - - bool_V1TI_type_node = rs6000_vector_type ("__vector __bool __int128", - intTI_type_node, 1); - ptr_bool_V1TI_type_node - = build_pointer_type (build_qualified_type (bool_V1TI_type_node, - TYPE_QUAL_CONST)); - - pixel_V8HI_type_node = rs6000_vector_type ("__vector __pixel", - pixel_type_node, 8); - ptr_pixel_V8HI_type_node - = build_pointer_type (build_qualified_type (pixel_V8HI_type_node, - TYPE_QUAL_CONST)); - pcvoid_type_node - = build_pointer_type (build_qualified_type (void_type_node, - TYPE_QUAL_CONST)); - - /* Execute the autogenerated initialization code for builtins. */ - rs6000_init_generated_builtins (); - - if (TARGET_DEBUG_BUILTIN) - { - fprintf (stderr, "\nAutogenerated built-in functions:\n\n"); - for (int i = 1; i < (int) RS6000_BIF_MAX; i++) - { - bif_enable e = rs6000_builtin_info[i].enable; - if (e == ENB_P5 && !TARGET_POPCNTB) - continue; - if (e == ENB_P6 && !TARGET_CMPB) - continue; - if (e == ENB_P6_64 && !(TARGET_CMPB && TARGET_POWERPC64)) - continue; - if (e == ENB_ALTIVEC && !TARGET_ALTIVEC) - continue; - if (e == ENB_VSX && !TARGET_VSX) - continue; - if (e == ENB_P7 && !TARGET_POPCNTD) - continue; - if (e == ENB_P7_64 && !(TARGET_POPCNTD && TARGET_POWERPC64)) - continue; - if (e == ENB_P8 && !TARGET_DIRECT_MOVE) - continue; - if (e == ENB_P8V && !TARGET_P8_VECTOR) - continue; - if (e == ENB_P9 && !TARGET_MODULO) - continue; - if (e == ENB_P9_64 && !(TARGET_MODULO && TARGET_POWERPC64)) - continue; - if (e == ENB_P9V && !TARGET_P9_VECTOR) - continue; - if (e == ENB_IEEE128_HW && !TARGET_FLOAT128_HW) - continue; - if (e == ENB_DFP && !TARGET_DFP) - continue; - if (e == ENB_CRYPTO && !TARGET_CRYPTO) - continue; - if (e == ENB_HTM && !TARGET_HTM) - continue; - if (e == ENB_P10 && !TARGET_POWER10) - continue; - if (e == ENB_P10_64 && !(TARGET_POWER10 && TARGET_POWERPC64)) - continue; - if (e == ENB_MMA && !TARGET_MMA) - continue; - tree fntype = rs6000_builtin_info[i].fntype; - tree t = TREE_TYPE (fntype); - fprintf (stderr, "%s %s (", rs6000_type_string (t), - rs6000_builtin_info[i].bifname); - t = TYPE_ARG_TYPES (fntype); - while (t && TREE_VALUE (t) != void_type_node) - { - fprintf (stderr, "%s", - rs6000_type_string (TREE_VALUE (t))); - t = TREE_CHAIN (t); - if (t && TREE_VALUE (t) != void_type_node) - fprintf (stderr, ", "); - } - fprintf (stderr, "); %s [%4d]\n", - rs6000_builtin_info[i].attr_string, (int) i); - } - fprintf (stderr, "\nEnd autogenerated built-in functions.\n\n\n"); - } - - if (TARGET_XCOFF) - { - /* AIX libm provides clog as __clog. */ - if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE) - set_user_assembler_name (tdecl, "__clog"); - - /* When long double is 64 bit, some long double builtins of libc - functions (like __builtin_frexpl) must call the double version - (frexp) not the long double version (frexpl) that expects a 128 bit - argument. */ - if (! TARGET_LONG_DOUBLE_128) - { - if ((tdecl = builtin_decl_explicit (BUILT_IN_FMODL)) != NULL_TREE) - set_user_assembler_name (tdecl, "fmod"); - if ((tdecl = builtin_decl_explicit (BUILT_IN_FREXPL)) != NULL_TREE) - set_user_assembler_name (tdecl, "frexp"); - if ((tdecl = builtin_decl_explicit (BUILT_IN_LDEXPL)) != NULL_TREE) - set_user_assembler_name (tdecl, "ldexp"); - if ((tdecl = builtin_decl_explicit (BUILT_IN_MODFL)) != NULL_TREE) - set_user_assembler_name (tdecl, "modf"); - } - } - - altivec_builtin_mask_for_load - = rs6000_builtin_decls[RS6000_BIF_MASK_FOR_LOAD]; - -#ifdef SUBTARGET_INIT_BUILTINS - SUBTARGET_INIT_BUILTINS; -#endif - - return; -} - -tree -rs6000_builtin_decl (unsigned code, bool /* initialize_p */) -{ - rs6000_gen_builtins fcode = (rs6000_gen_builtins) code; - - if (fcode >= RS6000_OVLD_MAX) - return error_mark_node; - - return rs6000_builtin_decls[code]; -} - /* Return the internal arg pointer used for function incoming arguments. When -fsplit-stack, the arg pointer is r12 so we need to copy it to a pseudo in order for it to be preserved over calls diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def index 325b219..963947f 100644 --- a/gcc/config/rs6000/rs6000-cpus.def +++ b/gcc/config/rs6000/rs6000-cpus.def @@ -85,13 +85,7 @@ #define ISA_3_1_MASKS_SERVER (ISA_3_0_MASKS_SERVER \ | OPTION_MASK_POWER10 \ | OTHER_POWER10_MASKS \ - | OPTION_MASK_P10_FUSION \ - | OPTION_MASK_P10_FUSION_LD_CMPI \ - | OPTION_MASK_P10_FUSION_2LOGICAL \ - | OPTION_MASK_P10_FUSION_LOGADD \ - | OPTION_MASK_P10_FUSION_ADDLOG \ - | OPTION_MASK_P10_FUSION_2ADD \ - | OPTION_MASK_P10_FUSION_2STORE) + | OPTION_MASK_P10_FUSION) /* Flags that need to be turned off if -mno-power9-vector. */ #define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \ @@ -139,12 +133,6 @@ | OPTION_MASK_FPRND \ | OPTION_MASK_POWER10 \ | OPTION_MASK_P10_FUSION \ - | OPTION_MASK_P10_FUSION_LD_CMPI \ - | OPTION_MASK_P10_FUSION_2LOGICAL \ - | OPTION_MASK_P10_FUSION_LOGADD \ - | OPTION_MASK_P10_FUSION_ADDLOG \ - | OPTION_MASK_P10_FUSION_2ADD \ - | OPTION_MASK_P10_FUSION_2STORE \ | OPTION_MASK_HTM \ | OPTION_MASK_ISEL \ | OPTION_MASK_MFCRF \ diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc b/gcc/config/rs6000/rs6000-gen-builtins.cc index 6a0858a..629ead9 100644 --- a/gcc/config/rs6000/rs6000-gen-builtins.cc +++ b/gcc/config/rs6000/rs6000-gen-builtins.cc @@ -2255,20 +2255,20 @@ write_decls (void) fprintf (header_file, "};\n\n"); fprintf (header_file, "#define PPC_MAXRESTROPNDS 3\n"); - fprintf (header_file, "struct GTY((user)) bifdata\n"); + fprintf (header_file, "struct GTY(()) bifdata\n"); fprintf (header_file, "{\n"); - fprintf (header_file, " const char *bifname;\n"); - fprintf (header_file, " bif_enable enable;\n"); + fprintf (header_file, " const char *GTY((skip(\"\"))) bifname;\n"); + fprintf (header_file, " bif_enable GTY((skip(\"\"))) enable;\n"); fprintf (header_file, " tree fntype;\n"); - fprintf (header_file, " insn_code icode;\n"); + fprintf (header_file, " insn_code GTY((skip(\"\"))) icode;\n"); fprintf (header_file, " int nargs;\n"); fprintf (header_file, " int bifattrs;\n"); fprintf (header_file, " int restr_opnd[PPC_MAXRESTROPNDS];\n"); - fprintf (header_file, " restriction restr[PPC_MAXRESTROPNDS];\n"); + fprintf (header_file, " restriction GTY((skip(\"\"))) restr[PPC_MAXRESTROPNDS];\n"); fprintf (header_file, " int restr_val1[PPC_MAXRESTROPNDS];\n"); fprintf (header_file, " int restr_val2[PPC_MAXRESTROPNDS];\n"); - fprintf (header_file, " const char *attr_string;\n"); - fprintf (header_file, " rs6000_gen_builtins assoc_bif;\n"); + fprintf (header_file, " const char *GTY((skip(\"\"))) attr_string;\n"); + fprintf (header_file, " rs6000_gen_builtins GTY((skip(\"\"))) assoc_bif;\n"); fprintf (header_file, "};\n\n"); fprintf (header_file, "#define bif_init_bit\t\t(0x00000001)\n"); @@ -2343,21 +2343,15 @@ write_decls (void) "#define bif_is_ibmld(x)\t((x).bifattrs & bif_ibmld_bit)\n"); fprintf (header_file, "\n"); - /* #### Cannot mark this as a GC root because only pointer types can - be marked as GTY((user)) and be GC roots. All trees in here are - kept alive by other globals, so not a big deal. Alternatively, - we could change the enum fields to ints and cast them in and out - to avoid requiring a GTY((user)) designation, but that seems - unnecessarily gross. */ fprintf (header_file, - "extern bifdata rs6000_builtin_info[RS6000_BIF_MAX];\n\n"); + "extern GTY(()) bifdata rs6000_builtin_info[RS6000_BIF_MAX];\n\n"); - fprintf (header_file, "struct GTY((user)) ovlddata\n"); + fprintf (header_file, "struct GTY(()) ovlddata\n"); fprintf (header_file, "{\n"); - fprintf (header_file, " const char *bifname;\n"); - fprintf (header_file, " rs6000_gen_builtins bifid;\n"); + fprintf (header_file, " const char *GTY((skip(\"\"))) bifname;\n"); + fprintf (header_file, " rs6000_gen_builtins GTY((skip(\"\"))) bifid;\n"); fprintf (header_file, " tree fntype;\n"); - fprintf (header_file, " ovlddata *next;\n"); + fprintf (header_file, " ovlddata *GTY((skip(\"\"))) next;\n"); fprintf (header_file, "};\n\n"); fprintf (header_file, "struct ovldrecord\n"); @@ -2367,14 +2361,7 @@ write_decls (void) fprintf (header_file, "};\n\n"); fprintf (header_file, - "/* #### Cannot mark this as a GC root because only pointer\n" - " types can be marked as GTY((user)) and be GC roots. All\n" - " trees in here are kept alive by other globals, so not a big\n" - " deal. Alternatively, we could change the enum fields to ints\n" - " and cast them in and out to avoid requiring a GTY((user))\n" - " designation, but that seems unnecessarily gross. */\n"); - fprintf (header_file, - "extern ovlddata rs6000_instance_info[RS6000_INST_MAX];\n"); + "extern GTY(()) ovlddata rs6000_instance_info[RS6000_INST_MAX];\n"); fprintf (header_file, "extern ovldrecord rs6000_overload_info[];\n\n"); fprintf (header_file, "extern void rs6000_init_generated_builtins ();\n\n"); @@ -2383,33 +2370,6 @@ write_decls (void) fprintf (header_file, "extern tree rs6000_builtin_decl (unsigned, " "bool ATTRIBUTE_UNUSED);\n\n"); - fprintf (header_file, - "extern void gt_ggc_mx (bifdata *bd);\n"); - fprintf (header_file, - "extern void gt_pch_nx (bifdata *bd);\n"); - fprintf (header_file, - "extern void gt_pch_nx (bifdata *bd, gt_pointer_operator op, " - "void *cookie);\n"); - fprintf (header_file, - "extern void gt_ggc_mx (ovlddata *od);\n"); - fprintf (header_file, - "extern void gt_pch_nx (ovlddata *od);\n"); - fprintf (header_file, - "extern void gt_pch_nx (ovlddata *od, gt_pointer_operator op, " - "void *cookie);\n"); -} - -/* Callback functions used for generating trees for function types. */ -void -write_extern_fntype (char *str) -{ - fprintf (header_file, "extern GTY(()) tree %s;\n", str); -} - -void -write_fntype (char *str) -{ - fprintf (init_file, "tree %s;\n", str); } /* Comparator for bsearch on the type map. */ @@ -2452,12 +2412,17 @@ write_fntype_init (char *str) /* Avoid side effects of strtok on the original string by using a copy. */ char *buf = strdup (str); + if (tf_found || dfp_found) + fprintf (init_file, " tree %s = NULL_TREE;\n", buf); + else + fprintf (init_file, " tree "); + if (tf_found) - fprintf (init_file, " if (float128_type_node)\n "); + fprintf (init_file, " if (float128_type_node)\n "); else if (dfp_found) - fprintf (init_file, " if (dfloat64_type_node)\n "); + fprintf (init_file, " if (dfloat64_type_node)\n "); - fprintf (init_file, " %s\n = build_function_type_list (", buf); + fprintf (init_file, "%s\n = build_function_type_list (", buf); tok = strtok (buf, "_"); write_type_node (tok, tf_found || dfp_found); tok = strtok (0, "_"); @@ -2491,8 +2456,6 @@ write_header_file (void) write_decls (); - /* Write function type list declarators to the header file. */ - rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_extern_fntype); fprintf (header_file, "\n"); fprintf (header_file, "\n#endif\n"); @@ -2846,9 +2809,6 @@ write_init_file (void) write_bif_static_init (); write_ovld_static_init (); - rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_fntype); - fprintf (init_file, "\n"); - fprintf (init_file, "void\n"); fprintf (init_file, "rs6000_init_generated_builtins ()\n"); fprintf (init_file, "{\n"); @@ -2868,33 +2828,6 @@ write_init_file (void) fprintf (init_file, "}\n\n"); - fprintf (init_file, - "void gt_ggc_mx (bifdata *bd)\n"); - fprintf (init_file, - "{\n gt_ggc_mx (bd->fntype);\n}\n\n"); - fprintf (init_file, - "void gt_pch_nx (bifdata *bd)\n"); - fprintf (init_file, - "{\n gt_pch_nx (bd->fntype);\n}\n\n"); - fprintf (init_file, - "void gt_pch_nx (bifdata *bd, gt_pointer_operator op, " - "void *cookie)\n"); - fprintf (init_file, - "{\n op(&(bd->fntype), NULL, cookie);\n}\n\n"); - fprintf (init_file, - "void gt_ggc_mx (ovlddata *od)\n"); - fprintf (init_file, - "{\n gt_ggc_mx (od->fntype);\n}\n\n"); - fprintf (init_file, - "void gt_pch_nx (ovlddata *od)\n"); - fprintf (init_file, - "{\n gt_pch_nx (od->fntype);\n}\n\n"); - fprintf (init_file, - "void gt_pch_nx (ovlddata *od, gt_pointer_operator op, " - "void *cookie)\n"); - fprintf (init_file, - "{\n op(&(od->fntype), NULL, cookie);\n}\n"); - return 1; } diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def index 7d030ab..44e2945 100644 --- a/gcc/config/rs6000/rs6000-overload.def +++ b/gcc/config/rs6000/rs6000-overload.def @@ -34,6 +34,10 @@ ; in rs6000-vecdefines.h. If no #define is desired, the <abi-name> should ; be replaced with the token SKIP. ; +; The <ifdef> token should be used sparingly, because a #define can't be +; overridden by __attribute__((target)). It is appropriate for cases +; where a target override isn't a possibility, like __PPU__. +; ; Each function entry has two lines. The first line is a prototype line. ; See rs6000-builtin-new.def for a description of the prototype line. ; A prototype line in this file differs in that it doesn't have an @@ -205,7 +209,7 @@ vd __builtin_vec_abs (vd); XVABSDP -[VEC_ABSD, vec_absd, __builtin_vec_vadu, _ARCH_PWR9] +[VEC_ABSD, vec_absd, __builtin_vec_vadu] vuc __builtin_vec_vadu (vuc, vuc); VADUB vus __builtin_vec_vadu (vus, vus); @@ -503,7 +507,7 @@ vui __builtin_vec_avg (vui, vui); VAVGUW -[VEC_BLENDV, vec_blendv, __builtin_vec_xxblend, _ARCH_PWR10] +[VEC_BLENDV, vec_blendv, __builtin_vec_xxblend] vsc __builtin_vec_xxblend (vsc, vsc, vuc); VXXBLEND_V16QI VXXBLEND_VSC vuc __builtin_vec_xxblend (vuc, vuc, vuc); @@ -525,7 +529,7 @@ vd __builtin_vec_xxblend (vd, vd, vull); VXXBLEND_V2DF -[VEC_BPERM, vec_bperm, __builtin_vec_vbperm_api, _ARCH_PWR8] +[VEC_BPERM, vec_bperm, __builtin_vec_vbperm_api] vull __builtin_vec_vbperm_api (vull, vuc); VBPERMD VBPERMD_VULL vull __builtin_vec_vbperm_api (vuq, vuc); @@ -541,25 +545,25 @@ vd __builtin_vec_ceil (vd); XVRDPIP -[VEC_CFUGE, vec_cfuge, __builtin_vec_cfuge, _ARCH_PWR10] +[VEC_CFUGE, vec_cfuge, __builtin_vec_cfuge] vull __builtin_vec_cfuge (vull, vull); VCFUGED -[VEC_CIPHER_BE, vec_cipher_be, __builtin_vec_vcipher_be, _ARCH_PWR8] +[VEC_CIPHER_BE, vec_cipher_be, __builtin_vec_vcipher_be] vuc __builtin_vec_vcipher_be (vuc, vuc); VCIPHER_BE -[VEC_CIPHERLAST_BE, vec_cipherlast_be, __builtin_vec_vcipherlast_be, _ARCH_PWR8] +[VEC_CIPHERLAST_BE, vec_cipherlast_be, __builtin_vec_vcipherlast_be] vuc __builtin_vec_vcipherlast_be (vuc, vuc); VCIPHERLAST_BE -[VEC_CLRL, vec_clrl, __builtin_vec_clrl, _ARCH_PWR10] +[VEC_CLRL, vec_clrl, __builtin_vec_clrl] vsc __builtin_vec_clrl (vsc, unsigned int); VCLRLB VCLRLB_S vuc __builtin_vec_clrl (vuc, unsigned int); VCLRLB VCLRLB_U -[VEC_CLRR, vec_clrr, __builtin_vec_clrr, _ARCH_PWR10] +[VEC_CLRR, vec_clrr, __builtin_vec_clrr] vsc __builtin_vec_clrr (vsc, unsigned int); VCLRRB VCLRRB_S vuc __builtin_vec_clrr (vuc, unsigned int); @@ -1026,7 +1030,7 @@ signed int __builtin_vec_vcmpne_p (signed int, vbll, vsll); VCMPNED_P VCMPNED_P_SB -[VEC_CMPNEZ, vec_cmpnez, __builtin_vec_vcmpnez, _ARCH_PWR9] +[VEC_CMPNEZ, vec_cmpnez, __builtin_vec_vcmpnez] vbc __builtin_vec_cmpnez (vsc, vsc); CMPNEZB CMPNEZB_S vbc __builtin_vec_cmpnez (vuc, vuc); @@ -1064,7 +1068,7 @@ signed int __builtin_byte_in_range (unsigned int, unsigned int); CMPRB2 -[VEC_CNTLZ, vec_cntlz, __builtin_vec_vclz, _ARCH_PWR8] +[VEC_CNTLZ, vec_cntlz, __builtin_vec_vclz] vsc __builtin_vec_vclz (vsc); VCLZB VCLZB_S vuc __builtin_vec_vclz (vuc); @@ -1082,15 +1086,15 @@ vull __builtin_vec_vclz (vull); VCLZD VCLZD_U -[VEC_CNTLZM, vec_cntlzm, __builtin_vec_vclzdm, _ARCH_PWR10] +[VEC_CNTLZM, vec_cntlzm, __builtin_vec_vclzdm] vull __builtin_vec_vclzdm (vull, vull); VCLZDM -[VEC_CNTTZM, vec_cnttzm, __builtin_vec_vctzdm, _ARCH_PWR10] +[VEC_CNTTZM, vec_cnttzm, __builtin_vec_vctzdm] vull __builtin_vec_vctzdm (vull, vull); VCTZDM -[VEC_CNTLZ_LSBB, vec_cntlz_lsbb, __builtin_vec_vclzlsbb, _ARCH_PWR9] +[VEC_CNTLZ_LSBB, vec_cntlz_lsbb, __builtin_vec_vclzlsbb] signed int __builtin_vec_vclzlsbb (vsc); VCLZLSBB_V16QI VCLZLSBB_VSC signed int __builtin_vec_vclzlsbb (vuc); @@ -1104,7 +1108,7 @@ signed int __builtin_vec_vclzlsbb (vui); VCLZLSBB_V4SI VCLZLSBB_VUI -[VEC_CNTM, vec_cntm, __builtin_vec_cntm, _ARCH_PWR10] +[VEC_CNTM, vec_cntm, __builtin_vec_cntm] unsigned long long __builtin_vec_cntm (vuc, const int); VCNTMBB unsigned long long __builtin_vec_cntm (vus, const int); @@ -1114,7 +1118,7 @@ unsigned long long __builtin_vec_cntm (vull, const int); VCNTMBD -[VEC_CNTTZ, vec_cnttz, __builtin_vec_vctz, _ARCH_PWR9] +[VEC_CNTTZ, vec_cnttz, __builtin_vec_vctz] vsc __builtin_vec_vctz (vsc); VCTZB VCTZB_S vuc __builtin_vec_vctz (vuc); @@ -1132,7 +1136,7 @@ vull __builtin_vec_vctz (vull); VCTZD VCTZD_U -[VEC_CNTTZ_LSBB, vec_cnttz_lsbb, __builtin_vec_vctzlsbb, _ARCH_PWR9] +[VEC_CNTTZ_LSBB, vec_cnttz_lsbb, __builtin_vec_vctzlsbb] signed int __builtin_vec_vctzlsbb (vsc); VCTZLSBB_V16QI VCTZLSBB_VSC signed int __builtin_vec_vctzlsbb (vuc); @@ -1150,7 +1154,7 @@ vus __builtin_vec_convert_4f32_8i16 (vf, vf); CONVERT_4F32_8I16 -[VEC_CONVERT_4F32_8F16, vec_pack_to_short_fp32, __builtin_vec_convert_4f32_8f16, _ARCH_PWR9] +[VEC_CONVERT_4F32_8F16, vec_pack_to_short_fp32, __builtin_vec_convert_4f32_8f16] vus __builtin_vec_convert_4f32_8f16 (vf, vf); CONVERT_4F32_8F16 @@ -1182,7 +1186,7 @@ vull __builtin_vec_ctu (vd, const int); XVCVDPUXDS_SCALE -[VEC_DIV, vec_div, __builtin_vec_div, __VSX__] +[VEC_DIV, vec_div, __builtin_vec_div] vsi __builtin_vec_div (vsi, vsi); VDIVSW vui __builtin_vec_div (vui, vui); @@ -1200,7 +1204,7 @@ vd __builtin_vec_div (vd, vd); XVDIVDP -[VEC_DIVE, vec_dive, __builtin_vec_dive, _ARCH_PWR10] +[VEC_DIVE, vec_dive, __builtin_vec_dive] vsi __builtin_vec_dive (vsi, vsi); VDIVESW vui __builtin_vec_dive (vui, vui); @@ -1436,7 +1440,7 @@ void __builtin_vec_dstt (vf *, const int, const int); DSTT DSTT_VF -[VEC_EQV, vec_eqv, __builtin_vec_eqv, _ARCH_PWR8] +[VEC_EQV, vec_eqv, __builtin_vec_eqv] vsc __builtin_vec_eqv (vsc, vsc); EQV_V16QI vuc __builtin_vec_eqv (vuc, vuc); @@ -1499,7 +1503,7 @@ vull __builtin_vec_eqv (vull, vbll); EQV_V2DI_UNS EQV_VULL_VBLL -[VEC_EXPANDM, vec_expandm, __builtin_vec_vexpandm, _ARCH_PWR10] +[VEC_EXPANDM, vec_expandm, __builtin_vec_vexpandm] vuc __builtin_vec_vexpandm (vuc); VEXPANDMB vus __builtin_vec_vexpandm (vus); @@ -1524,15 +1528,15 @@ vsi __builtin_vec_extract (vsi, signed int); VSPLTW EXTRACT_FAKERY -[VEC_EXTRACT_FP_FROM_SHORTH, vec_extract_fp32_from_shorth, __builtin_vec_vextract_fp_from_shorth, _ARCH_PWR9] +[VEC_EXTRACT_FP_FROM_SHORTH, vec_extract_fp32_from_shorth, __builtin_vec_vextract_fp_from_shorth] vf __builtin_vec_vextract_fp_from_shorth (vus); VEXTRACT_FP_FROM_SHORTH -[VEC_EXTRACT_FP_FROM_SHORTL, vec_extract_fp32_from_shortl, __builtin_vec_vextract_fp_from_shortl, _ARCH_PWR9] +[VEC_EXTRACT_FP_FROM_SHORTL, vec_extract_fp32_from_shortl, __builtin_vec_vextract_fp_from_shortl] vf __builtin_vec_vextract_fp_from_shortl (vus); VEXTRACT_FP_FROM_SHORTL -[VEC_EXTRACTH, vec_extracth, __builtin_vec_extracth, _ARCH_PWR10] +[VEC_EXTRACTH, vec_extracth, __builtin_vec_extracth] vull __builtin_vec_extracth (vuc, vuc, unsigned char); VEXTRACTBR vull __builtin_vec_extracth (vus, vus, unsigned char); @@ -1542,7 +1546,7 @@ vull __builtin_vec_extracth (vull, vull, unsigned char); VEXTRACTDR -[VEC_EXTRACTL, vec_extractl, __builtin_vec_extractl, _ARCH_PWR10] +[VEC_EXTRACTL, vec_extractl, __builtin_vec_extractl] vull __builtin_vec_extractl (vuc, vuc, unsigned char); VEXTRACTBL vull __builtin_vec_extractl (vus, vus, unsigned char); @@ -1552,7 +1556,7 @@ vull __builtin_vec_extractl (vull, vull, unsigned char); VEXTRACTDL -[VEC_EXTRACTM, vec_extractm, __builtin_vec_vextractm, _ARCH_PWR10] +[VEC_EXTRACTM, vec_extractm, __builtin_vec_vextractm] signed int __builtin_vec_vextractm (vuc); VEXTRACTMB signed int __builtin_vec_vextractm (vus); @@ -1564,11 +1568,11 @@ signed int __builtin_vec_vextractm (vuq); VEXTRACTMQ -[VEC_EXTRACT4B, vec_extract4b, __builtin_vec_extract4b, _ARCH_PWR9] +[VEC_EXTRACT4B, vec_extract4b, __builtin_vec_extract4b] vull __builtin_vec_extract4b (vuc, const int); EXTRACT4B -[VEC_EXTULX, vec_xlx, __builtin_vec_vextulx, _ARCH_PWR9] +[VEC_EXTULX, vec_xlx, __builtin_vec_vextulx] signed char __builtin_vec_vextulx (unsigned int, vsc); VEXTUBLX VEXTUBLX_S unsigned char __builtin_vec_vextulx (unsigned int, vuc); @@ -1584,7 +1588,7 @@ float __builtin_vec_vextulx (unsigned int, vf); VEXTUWLX VEXTUWLX_F -[VEC_EXTURX, vec_xrx, __builtin_vec_vexturx, _ARCH_PWR9] +[VEC_EXTURX, vec_xrx, __builtin_vec_vexturx] signed char __builtin_vec_vexturx (unsigned int, vsc); VEXTUBRX VEXTUBRX_S unsigned char __builtin_vec_vexturx (unsigned int, vuc); @@ -1600,7 +1604,7 @@ float __builtin_vec_vexturx (unsigned int, vf); VEXTUWRX VEXTUWRX_F -[VEC_FIRSTMATCHINDEX, vec_first_match_index, __builtin_vec_first_match_index, _ARCH_PWR9] +[VEC_FIRSTMATCHINDEX, vec_first_match_index, __builtin_vec_first_match_index] unsigned int __builtin_vec_first_match_index (vsc, vsc); VFIRSTMATCHINDEX_V16QI FIRSTMATCHINDEX_VSC unsigned int __builtin_vec_first_match_index (vuc, vuc); @@ -1614,7 +1618,7 @@ unsigned int __builtin_vec_first_match_index (vui, vui); VFIRSTMATCHINDEX_V4SI FIRSTMATCHINDEX_VUI -[VEC_FIRSTMATCHOREOSINDEX, vec_first_match_or_eos_index, __builtin_vec_first_match_or_eos_index, _ARCH_PWR9] +[VEC_FIRSTMATCHOREOSINDEX, vec_first_match_or_eos_index, __builtin_vec_first_match_or_eos_index] unsigned int __builtin_vec_first_match_or_eos_index (vsc, vsc); VFIRSTMATCHOREOSINDEX_V16QI FIRSTMATCHOREOSINDEX_VSC unsigned int __builtin_vec_first_match_or_eos_index (vuc, vuc); @@ -1628,7 +1632,7 @@ unsigned int __builtin_vec_first_match_or_eos_index (vui, vui); VFIRSTMATCHOREOSINDEX_V4SI FIRSTMATCHOREOSINDEX_VUI -[VEC_FIRSTMISMATCHINDEX, vec_first_mismatch_index, __builtin_vec_first_mismatch_index, _ARCH_PWR9] +[VEC_FIRSTMISMATCHINDEX, vec_first_mismatch_index, __builtin_vec_first_mismatch_index] unsigned int __builtin_vec_first_mismatch_index (vsc, vsc); VFIRSTMISMATCHINDEX_V16QI FIRSTMISMATCHINDEX_VSC unsigned int __builtin_vec_first_mismatch_index (vuc, vuc); @@ -1642,7 +1646,7 @@ unsigned int __builtin_vec_first_mismatch_index (vui, vui); VFIRSTMISMATCHINDEX_V4SI FIRSTMISMATCHINDEX_VUI -[VEC_FIRSTMISMATCHOREOSINDEX, vec_first_mismatch_or_eos_index, __builtin_vec_first_mismatch_or_eos_index, _ARCH_PWR9] +[VEC_FIRSTMISMATCHOREOSINDEX, vec_first_mismatch_or_eos_index, __builtin_vec_first_mismatch_or_eos_index] unsigned int __builtin_vec_first_mismatch_or_eos_index (vsc, vsc); VFIRSTMISMATCHOREOSINDEX_V16QI FIRSTMISMATCHOREOSINDEX_VSC unsigned int __builtin_vec_first_mismatch_or_eos_index (vuc, vuc); @@ -1692,33 +1696,33 @@ vd __builtin_vec_floor (vd); XVRDPIM -[VEC_GB, vec_gb, __builtin_vec_vgbbd, _ARCH_PWR8] +[VEC_GB, vec_gb, __builtin_vec_vgbbd] vsc __builtin_vec_vgbbd (vsc); VGBBD VGBBD_S vuc __builtin_vec_vgbbd (vuc); VGBBD VGBBD_U -[VEC_GENBM, vec_genbm, __builtin_vec_mtvsrbm, _ARCH_PWR10] +[VEC_GENBM, vec_genbm, __builtin_vec_mtvsrbm] vuc __builtin_vec_mtvsrbm (unsigned long long); MTVSRBM -[VEC_GENHM, vec_genhm, __builtin_vec_mtvsrhm, _ARCH_PWR10] +[VEC_GENHM, vec_genhm, __builtin_vec_mtvsrhm] vus __builtin_vec_mtvsrhm (unsigned long long); MTVSRHM -[VEC_GENWM, vec_genwm, __builtin_vec_mtvsrwm, _ARCH_PWR10] +[VEC_GENWM, vec_genwm, __builtin_vec_mtvsrwm] vui __builtin_vec_mtvsrwm (unsigned long long); MTVSRWM -[VEC_GENDM, vec_gendm, __builtin_vec_mtvsrdm, _ARCH_PWR10] +[VEC_GENDM, vec_gendm, __builtin_vec_mtvsrdm] vull __builtin_vec_mtvsrdm (unsigned long long); MTVSRDM -[VEC_GENQM, vec_genqm, __builtin_vec_mtvsrqm, _ARCH_PWR10] +[VEC_GENQM, vec_genqm, __builtin_vec_mtvsrqm] vuq __builtin_vec_mtvsrqm (unsigned long long); MTVSRQM -[VEC_GENPCVM, vec_genpcvm, __builtin_vec_xxgenpcvm, _ARCH_PWR10] +[VEC_GENPCVM, vec_genpcvm, __builtin_vec_xxgenpcvm] vuc __builtin_vec_xxgenpcvm (vuc, const int); XXGENPCVM_V16QI vus __builtin_vec_xxgenpcvm (vus, const int); @@ -1728,7 +1732,7 @@ vull __builtin_vec_xxgenpcvm (vull, const int); XXGENPCVM_V2DI -[VEC_GNB, vec_gnb, __builtin_vec_gnb, _ARCH_PWR10] +[VEC_GNB, vec_gnb, __builtin_vec_gnb] unsigned long long __builtin_vec_gnb (vuq, const int); VGNB @@ -1740,7 +1744,7 @@ vsi __builtin_vec_insert (vsi, vsi, signed int); XXPERMDI_4SI INSERT_FAKERY -[VEC_INSERTH, vec_inserth, __builtin_vec_inserth, _ARCH_PWR10] +[VEC_INSERTH, vec_inserth, __builtin_vec_inserth] vuc __builtin_vec_inserth (unsigned char, vuc, unsigned int); VINSERTGPRBR vuc __builtin_vec_inserth (vuc, vuc, unsigned int); @@ -1756,7 +1760,7 @@ vull __builtin_vec_inserth (unsigned long long, vull, unsigned int); VINSERTGPRDR -[VEC_INSERTL, vec_insertl, __builtin_vec_insertl, _ARCH_PWR10] +[VEC_INSERTL, vec_insertl, __builtin_vec_insertl] vuc __builtin_vec_insertl (unsigned char, vuc, unsigned int); VINSERTGPRBL vuc __builtin_vec_insertl (vuc, vuc, unsigned int); @@ -1772,7 +1776,7 @@ vull __builtin_vec_insertl (unsigned long long, vull, unsigned int); VINSERTGPRDL -[VEC_INSERT4B, vec_insert4b, __builtin_vec_insert4b, _ARCH_PWR9] +[VEC_INSERT4B, vec_insert4b, __builtin_vec_insert4b] vuc __builtin_vec_insert4b (vsi, vuc, const int); INSERT4B INSERT4B_S vuc __builtin_vec_insert4b (vui, vuc, const int); @@ -2128,7 +2132,7 @@ vuc __builtin_vec_lvsr (signed long, const double *); LVSR LVSR_D -[VEC_LXVL, vec_xl_len, __builtin_vec_lxvl, _ARCH_PPC64_PWR9] +[VEC_LXVL, vec_xl_len, __builtin_vec_lxvl] vsc __builtin_vec_lxvl (const signed char *, unsigned int); LXVL LXVL_VSC vuc __builtin_vec_lxvl (const unsigned char *, unsigned int); @@ -2227,7 +2231,7 @@ vull __builtin_vec_max (vbll, vull); VMAXUD VMAXUD_BU -[VEC_MERGEE, vec_mergee, __builtin_vec_vmrgew, _ARCH_PWR8] +[VEC_MERGEE, vec_mergee, __builtin_vec_vmrgew] vsi __builtin_vec_vmrgew (vsi, vsi); VMRGEW_V4SI VMRGEW_VSI vui __builtin_vec_vmrgew (vui, vui); @@ -2327,7 +2331,7 @@ vull __builtin_vec_mergel (vbll, vull); VEC_MERGEL_V2DI VEC_MERGEL_VBLL_VULL -[VEC_MERGEO, vec_mergeo, __builtin_vec_vmrgow, _ARCH_PWR8] +[VEC_MERGEO, vec_mergeo, __builtin_vec_vmrgow] vsi __builtin_vec_vmrgow (vsi, vsi); VMRGOW_V4SI VMRGOW_VSI vui __builtin_vec_vmrgow (vui, vui); @@ -2414,7 +2418,7 @@ vus __builtin_vec_mladd (vus, vus, vus); VMLADDUHM VMLADDUHM_VUS2 -[VEC_MOD, vec_mod, __builtin_vec_mod, _ARCH_PWR10] +[VEC_MOD, vec_mod, __builtin_vec_mod] vsi __builtin_vec_mod (vsi, vsi); VMODSW vui __builtin_vec_mod (vui, vui); @@ -2432,7 +2436,7 @@ vss __builtin_vec_mradds (vss, vss, vss); VMHRADDSHS -[VEC_MSUB, vec_msub, __builtin_vec_msub, __VSX__] +[VEC_MSUB, vec_msub, __builtin_vec_msub] vf __builtin_vec_msub (vf, vf, vf); XVMSUBSP vd __builtin_vec_msub (vd, vd, vd); @@ -2452,6 +2456,10 @@ vuq __builtin_vec_msum (vull, vull, vuq); VMSUMUDM VMSUMUDM_U +[VEC_MSUMC, vec_msumc, __builtin_vec_msumc] + vuq __builtin_vec_msumc (vull, vull, vuq); + VMSUMCUD + [VEC_MSUMS, vec_msums, __builtin_vec_msums] vui __builtin_vec_msums (vus, vus, vui); VMSUMUHS @@ -2511,7 +2519,7 @@ vuq __builtin_vec_mule (vull, vull); VMULEUD -[VEC_MULH, vec_mulh, __builtin_vec_mulh, _ARCH_PWR10] +[VEC_MULH, vec_mulh, __builtin_vec_mulh] vsi __builtin_vec_mulh (vsi, vsi); VMULHSW vui __builtin_vec_mulh (vui, vui); @@ -2553,7 +2561,7 @@ vd __builtin_vec_nabs (vd); NABS_V2DF -[VEC_NAND, vec_nand, __builtin_vec_nand, _ARCH_PWR8] +[VEC_NAND, vec_nand, __builtin_vec_nand] vsc __builtin_vec_nand (vsc, vsc); NAND_V16QI vuc __builtin_vec_nand (vuc, vuc); @@ -2616,15 +2624,15 @@ vull __builtin_vec_nand (vull, vbll); NAND_V2DI_UNS NAND_VULL_VBLL -[VEC_NCIPHER_BE, vec_ncipher_be, __builtin_vec_vncipher_be, _ARCH_PWR8] +[VEC_NCIPHER_BE, vec_ncipher_be, __builtin_vec_vncipher_be] vuc __builtin_vec_vncipher_be (vuc, vuc); VNCIPHER_BE -[VEC_NCIPHERLAST_BE, vec_ncipherlast_be, __builtin_vec_vncipherlast_be, _ARCH_PWR8] +[VEC_NCIPHERLAST_BE, vec_ncipherlast_be, __builtin_vec_vncipherlast_be] vuc __builtin_vec_vncipherlast_be (vuc, vuc); VNCIPHERLAST_BE -[VEC_NEARBYINT, vec_nearbyint, __builtin_vec_nearbyint, __VSX__] +[VEC_NEARBYINT, vec_nearbyint, __builtin_vec_nearbyint] vf __builtin_vec_nearbyint (vf); XVRSPI XVRSPI_NBI vd __builtin_vec_nearbyint (vd); @@ -2644,7 +2652,7 @@ vd __builtin_vec_neg (vd); NEG_V2DF -[VEC_NMADD, vec_nmadd, __builtin_vec_nmadd, __VSX__] +[VEC_NMADD, vec_nmadd, __builtin_vec_nmadd] vf __builtin_vec_nmadd (vf, vf, vf); XVNMADDSP vd __builtin_vec_nmadd (vd, vd, vd); @@ -2778,7 +2786,7 @@ vd __builtin_vec_or (vbll, vd); VOR_V2DF VOR_VBLL_VD -[VEC_ORC, vec_orc, __builtin_vec_orc, _ARCH_PWR8] +[VEC_ORC, vec_orc, __builtin_vec_orc] vsc __builtin_vec_orc (vsc, vsc); ORC_V16QI vuc __builtin_vec_orc (vuc, vuc); @@ -2895,7 +2903,7 @@ vui __builtin_vec_packsu (vsll, vsll); VPKSDUS -[VEC_PDEP, vec_pdep, __builtin_vec_vpdepd, _ARCH_PWR10] +[VEC_PDEP, vec_pdep, __builtin_vec_vpdepd] vull __builtin_vec_vpdepd (vull, vull); VPDEPD @@ -2940,7 +2948,7 @@ vbc __builtin_vec_perm (vbc, vbc, vbc); VPERM_16QI VPERM_VBC_VBC_VBC -[VEC_PERMX, vec_permx, __builtin_vec_xxpermx, _ARCH_PWR10] +[VEC_PERMX, vec_permx, __builtin_vec_xxpermx] vsc __builtin_vec_xxpermx (vsc, vsc, vuc, const int); XXPERMX_UV2DI XXPERMX_VSC vuc __builtin_vec_xxpermx (vuc, vuc, vuc, const int); @@ -2970,7 +2978,7 @@ vbc __builtin_vec_vpermxor (vbc, vbc, vbc); VPERMXOR VPERMXOR_VBC -[VEC_PEXT, vec_pext, __builtin_vec_vpextd, _ARCH_PWR10] +[VEC_PEXT, vec_pext, __builtin_vec_vpextd] vull __builtin_vec_vpextd (vull, vull); VPEXTD @@ -2984,7 +2992,7 @@ vuq __builtin_vec_vpmsum (vull, vull); VPMSUMD VPMSUMD_V -[VEC_POPCNT, vec_popcnt, __builtin_vec_vpopcntu, _ARCH_PWR8] +[VEC_POPCNT, vec_popcnt, __builtin_vec_vpopcntu] vuc __builtin_vec_vpopcntu (vsc); VPOPCNTB vuc __builtin_vec_vpopcntu (vuc); @@ -3002,7 +3010,7 @@ vull __builtin_vec_vpopcntu (vull); VPOPCNTUD -[VEC_PARITY_LSBB, vec_parity_lsbb, __builtin_vec_vparity_lsbb, _ARCH_PWR9] +[VEC_PARITY_LSBB, vec_parity_lsbb, __builtin_vec_vparity_lsbb] vui __builtin_vec_vparity_lsbb (vsi); VPRTYBW VPRTYBW_S vui __builtin_vec_vparity_lsbb (vui); @@ -3036,7 +3044,7 @@ vd __builtin_vec_recipdiv (vd, vd); RECIP_V2DF -[VEC_REPLACE_ELT, vec_replace_elt, __builtin_vec_replace_elt, _ARCH_PWR10] +[VEC_REPLACE_ELT, vec_replace_elt, __builtin_vec_replace_elt] vui __builtin_vec_replace_elt (vui, unsigned int, const int); VREPLACE_ELT_UV4SI vsi __builtin_vec_replace_elt (vsi, signed int, const int); @@ -3050,21 +3058,21 @@ vd __builtin_vec_replace_elt (vd, double, const int); VREPLACE_ELT_V2DF -[VEC_REPLACE_UN, vec_replace_unaligned, __builtin_vec_replace_un, _ARCH_PWR10] - vui __builtin_vec_replace_un (vui, unsigned int, const int); +[VEC_REPLACE_UN, vec_replace_unaligned, __builtin_vec_replace_un] + vuc __builtin_vec_replace_un (vui, unsigned int, const int); VREPLACE_UN_UV4SI - vsi __builtin_vec_replace_un (vsi, signed int, const int); + vuc __builtin_vec_replace_un (vsi, signed int, const int); VREPLACE_UN_V4SI - vull __builtin_vec_replace_un (vull, unsigned long long, const int); + vuc __builtin_vec_replace_un (vull, unsigned long long, const int); VREPLACE_UN_UV2DI - vsll __builtin_vec_replace_un (vsll, signed long long, const int); + vuc __builtin_vec_replace_un (vsll, signed long long, const int); VREPLACE_UN_V2DI - vf __builtin_vec_replace_un (vf, float, const int); + vuc __builtin_vec_replace_un (vf, float, const int); VREPLACE_UN_V4SF - vd __builtin_vec_replace_un (vd, double, const int); + vuc __builtin_vec_replace_un (vd, double, const int); VREPLACE_UN_V2DF -[VEC_REVB, vec_revb, __builtin_vec_revb, _ARCH_PWR8] +[VEC_REVB, vec_revb, __builtin_vec_revb] vss __builtin_vec_revb (vss); REVB_V8HI REVB_VSS vus __builtin_vec_revb (vus); @@ -3129,7 +3137,7 @@ vd __builtin_vec_vreve (vd); VREVE_V2DF -[VEC_RINT, vec_rint, __builtin_vec_rint, __VSX__] +[VEC_RINT, vec_rint, __builtin_vec_rint] vf __builtin_vec_rint (vf); XVRSPIC vd __builtin_vec_rint (vd); @@ -3157,7 +3165,7 @@ vuq __builtin_vec_rl (vuq, vuq); VRLQ VRLQ_VUQ -[VEC_RLMI, vec_rlmi, __builtin_vec_rlmi, _ARCH_PWR9] +[VEC_RLMI, vec_rlmi, __builtin_vec_rlmi] vui __builtin_vec_rlmi (vui, vui, vui); VRLWMI vull __builtin_vec_rlmi (vull, vull, vull); @@ -3167,7 +3175,7 @@ vuq __builtin_vec_rlmi (vuq, vuq, vuq); VRLQMI VRLQMI_VUQ -[VEC_RLNM, vec_vrlnm, __builtin_vec_rlnm, _ARCH_PWR9] +[VEC_RLNM, vec_vrlnm, __builtin_vec_rlnm] vui __builtin_vec_rlnm (vui, vui); VRLWNM vull __builtin_vec_rlnm (vull, vull); @@ -3195,7 +3203,7 @@ vd __builtin_vec_rsqrte (vd); XVRSQRTEDP -[VEC_SBOX_BE, vec_sbox_be, __builtin_vec_sbox_be, _ARCH_PWR8] +[VEC_SBOX_BE, vec_sbox_be, __builtin_vec_sbox_be] vuc __builtin_vec_sbox_be (vuc); VSBOX_BE @@ -3294,13 +3302,13 @@ vsi __builtin_vec_vsignedo (vd); VEC_VSIGNEDO_V2DF -[VEC_SIGNEXTI, vec_signexti, __builtin_vec_signexti, _ARCH_PWR9] +[VEC_SIGNEXTI, vec_signexti, __builtin_vec_signexti] vsi __builtin_vec_signexti (vsc); VSIGNEXTSB2W vsi __builtin_vec_signexti (vss); VSIGNEXTSH2W -[VEC_SIGNEXTLL, vec_signextll, __builtin_vec_signextll, _ARCH_PWR9] +[VEC_SIGNEXTLL, vec_signextll, __builtin_vec_signextll] vsll __builtin_vec_signextll (vsc); VSIGNEXTSB2D vsll __builtin_vec_signextll (vss); @@ -3308,7 +3316,7 @@ vsll __builtin_vec_signextll (vsi); VSIGNEXTSW2D -[VEC_SIGNEXTQ, vec_signextq, __builtin_vec_signextq, _ARCH_PWR10] +[VEC_SIGNEXTQ, vec_signextq, __builtin_vec_signextq] vsq __builtin_vec_signextq (vsll); VSIGNEXTSD2Q @@ -3366,7 +3374,7 @@ vd __builtin_vec_sld (vd, vd, const int); VSLDOI_2DF -[VEC_SLDB, vec_sldb, __builtin_vec_sldb, _ARCH_PWR10] +[VEC_SLDB, vec_sldb, __builtin_vec_sldb] vsc __builtin_vec_sldb (vsc, vsc, const int); VSLDB_V16QI VSLDB_VSC vuc __builtin_vec_sldb (vuc, vuc, const int); @@ -3521,7 +3529,7 @@ vf __builtin_vec_slo (vf, vuc); VSLO VSLO_VFU -[VEC_SLV, vec_slv, __builtin_vec_vslv, _ARCH_PWR9] +[VEC_SLV, vec_slv, __builtin_vec_vslv] vuc __builtin_vec_vslv (vuc, vuc); VSLV @@ -3572,17 +3580,17 @@ ; There are no entries for vec_splat_u{8,16,32}. These are handled ; in altivec.h with a #define and a cast. -[VEC_SPLATI, vec_splati, __builtin_vec_xxspltiw, _ARCH_PWR10] +[VEC_SPLATI, vec_splati, __builtin_vec_xxspltiw] vsi __builtin_vec_xxspltiw (signed int); VXXSPLTIW_V4SI vf __builtin_vec_xxspltiw (float); VXXSPLTIW_V4SF -[VEC_SPLATID, vec_splatid, __builtin_vec_xxspltid, _ARCH_PWR10] +[VEC_SPLATID, vec_splatid, __builtin_vec_xxspltid] vd __builtin_vec_xxspltid (float); VXXSPLTIDP -[VEC_SPLATI_INS, vec_splati_ins, __builtin_vec_xxsplti32dx, _ARCH_PWR10] +[VEC_SPLATI_INS, vec_splati_ins, __builtin_vec_xxsplti32dx] vsi __builtin_vec_xxsplti32dx (vsi, const int, signed int); VXXSPLTI32DX_V4SI VXXSPLTI32DX_VSI vui __builtin_vec_xxsplti32dx (vui, const int, unsigned int); @@ -3598,7 +3606,7 @@ vsi __builtin_vec_splats (vsi); ABS_V4SI SPLATS_FAKERY -[VEC_SQRT, vec_sqrt, __builtin_vec_sqrt, __VSX__] +[VEC_SQRT, vec_sqrt, __builtin_vec_sqrt] vf __builtin_vec_sqrt (vf); XVSQRTSP vd __builtin_vec_sqrt (vd); @@ -3648,7 +3656,7 @@ vuq __builtin_vec_sra (vuq, vuq); VSRAQ VSRAQ_VUQ -[VEC_SRDB, vec_srdb, __builtin_vec_srdb, _ARCH_PWR10] +[VEC_SRDB, vec_srdb, __builtin_vec_srdb] vsc __builtin_vec_srdb (vsc, vsc, const int); VSRDB_V16QI VSRDB_VSC vuc __builtin_vec_srdb (vuc, vuc, const int); @@ -3775,7 +3783,7 @@ vf __builtin_vec_sro (vf, vuc); VSRO VSRO_VFU -[VEC_SRV, vec_srv, __builtin_vec_vsrv, _ARCH_PWR9] +[VEC_SRV, vec_srv, __builtin_vec_vsrv] vuc __builtin_vec_vsrv (vuc, vuc); VSRV @@ -3956,7 +3964,7 @@ void __builtin_vec_stl (vd, signed long long, double *); STVXL_V2DF STVXL_D -[VEC_STRIL, vec_stril, __builtin_vec_stril, _ARCH_PWR10] +[VEC_STRIL, vec_stril, __builtin_vec_stril] vuc __builtin_vec_stril (vuc); VSTRIBL VSTRIBL_U vsc __builtin_vec_stril (vsc); @@ -3966,7 +3974,7 @@ vss __builtin_vec_stril (vss); VSTRIHL VSTRIHL_S -[VEC_STRIL_P, vec_stril_p, __builtin_vec_stril_p, _ARCH_PWR10] +[VEC_STRIL_P, vec_stril_p, __builtin_vec_stril_p] signed int __builtin_vec_stril_p (vuc); VSTRIBL_P VSTRIBL_PU signed int __builtin_vec_stril_p (vsc); @@ -3976,7 +3984,7 @@ signed int __builtin_vec_stril_p (vss); VSTRIHL_P VSTRIHL_PS -[VEC_STRIR, vec_strir, __builtin_vec_strir, _ARCH_PWR10] +[VEC_STRIR, vec_strir, __builtin_vec_strir] vuc __builtin_vec_strir (vuc); VSTRIBR VSTRIBR_U vsc __builtin_vec_strir (vsc); @@ -3986,7 +3994,7 @@ vss __builtin_vec_strir (vss); VSTRIHR VSTRIHR_S -[VEC_STRIR_P, vec_strir_p, __builtin_vec_strir_p, _ARCH_PWR10] +[VEC_STRIR_P, vec_strir_p, __builtin_vec_strir_p] signed int __builtin_vec_strir_p (vuc); VSTRIBR_P VSTRIBR_PU signed int __builtin_vec_strir_p (vsc); @@ -4148,7 +4156,7 @@ void __builtin_vec_stvrxl (vf, signed long long, float *); STVRXL STVRXL_F -[VEC_STXVL, vec_xst_len, __builtin_vec_stxvl, _ARCH_PPC64_PWR9] +[VEC_STXVL, vec_xst_len, __builtin_vec_stxvl] void __builtin_vec_stxvl (vsc, signed char *, unsigned int); STXVL STXVL_VSC void __builtin_vec_stxvl (vuc, unsigned char *, unsigned int); @@ -4316,7 +4324,7 @@ vsi __builtin_vec_sums (vsi, vsi); VSUMSWS -[VEC_TERNARYLOGIC, vec_ternarylogic, __builtin_vec_xxeval, _ARCH_PWR10] +[VEC_TERNARYLOGIC, vec_ternarylogic, __builtin_vec_xxeval] vuc __builtin_vec_xxeval (vuc, vuc, vuc, const int); XXEVAL XXEVAL_VUC vus __builtin_vec_xxeval (vus, vus, vus, const int); @@ -4328,11 +4336,11 @@ vuq __builtin_vec_xxeval (vuq, vuq, vuq, const int); XXEVAL XXEVAL_VUQ -[VEC_TEST_LSBB_ALL_ONES, vec_test_lsbb_all_ones, __builtin_vec_xvtlsbb_all_ones, _ARCH_PWR9] +[VEC_TEST_LSBB_ALL_ONES, vec_test_lsbb_all_ones, __builtin_vec_xvtlsbb_all_ones] signed int __builtin_vec_xvtlsbb_all_ones (vuc); XVTLSBB_ONES -[VEC_TEST_LSBB_ALL_ZEROS, vec_test_lsbb_all_zeros, __builtin_vec_xvtlsbb_all_zeros, _ARCH_PWR9] +[VEC_TEST_LSBB_ALL_ZEROS, vec_test_lsbb_all_zeros, __builtin_vec_xvtlsbb_all_zeros] signed int __builtin_vec_xvtlsbb_all_zeros (vuc); XVTLSBB_ZEROS @@ -4420,19 +4428,19 @@ vui __builtin_vec_vunsignedo (vd); VEC_VUNSIGNEDO_V2DF -[VEC_VEE, vec_extract_exp, __builtin_vec_extract_exp, _ARCH_PWR9] +[VEC_VEE, vec_extract_exp, __builtin_vec_extract_exp] vui __builtin_vec_extract_exp (vf); VEESP vull __builtin_vec_extract_exp (vd); VEEDP -[VEC_VES, vec_extract_sig, __builtin_vec_extract_sig, _ARCH_PWR9] +[VEC_VES, vec_extract_sig, __builtin_vec_extract_sig] vui __builtin_vec_extract_sig (vf); VESSP vull __builtin_vec_extract_sig (vd); VESDP -[VEC_VIE, vec_insert_exp, __builtin_vec_insert_exp, _ARCH_PWR9] +[VEC_VIE, vec_insert_exp, __builtin_vec_insert_exp] vf __builtin_vec_insert_exp (vf, vui); VIESP VIESP_VF vf __builtin_vec_insert_exp (vui, vui); @@ -4444,7 +4452,7 @@ ; It is truly unfortunate that vec_vprtyb has an incompatible set of ; interfaces with vec_parity_lsbb. So we can't even deprecate this. -[VEC_VPRTYB, vec_vprtyb, __builtin_vec_vprtyb, _ARCH_PWR9] +[VEC_VPRTYB, vec_vprtyb, __builtin_vec_vprtyb] vsi __builtin_vec_vprtyb (vsi); VPRTYBW VPRTYB_VSI vui __builtin_vec_vprtyb (vui); @@ -4462,43 +4470,43 @@ unsigned __int128 __builtin_vec_vprtyb (unsigned __int128); VPRTYBQ VPRTYB_UQ -[VEC_VSCEEQ, scalar_cmp_exp_eq, __builtin_vec_scalar_cmp_exp_eq, _ARCH_PWR9] +[VEC_VSCEEQ, scalar_cmp_exp_eq, __builtin_vec_scalar_cmp_exp_eq] signed int __builtin_vec_scalar_cmp_exp_eq (double, double); VSCEDPEQ signed int __builtin_vec_scalar_cmp_exp_eq (_Float128, _Float128); VSCEQPEQ -[VEC_VSCEGT, scalar_cmp_exp_gt, __builtin_vec_scalar_cmp_exp_gt, _ARCH_PWR9] +[VEC_VSCEGT, scalar_cmp_exp_gt, __builtin_vec_scalar_cmp_exp_gt] signed int __builtin_vec_scalar_cmp_exp_gt (double, double); VSCEDPGT signed int __builtin_vec_scalar_cmp_exp_gt (_Float128, _Float128); VSCEQPGT -[VEC_VSCELT, scalar_cmp_exp_lt, __builtin_vec_scalar_cmp_exp_lt, _ARCH_PWR9] +[VEC_VSCELT, scalar_cmp_exp_lt, __builtin_vec_scalar_cmp_exp_lt] signed int __builtin_vec_scalar_cmp_exp_lt (double, double); VSCEDPLT signed int __builtin_vec_scalar_cmp_exp_lt (_Float128, _Float128); VSCEQPLT -[VEC_VSCEUO, scalar_cmp_exp_unordered, __builtin_vec_scalar_cmp_exp_unordered, _ARCH_PWR9] +[VEC_VSCEUO, scalar_cmp_exp_unordered, __builtin_vec_scalar_cmp_exp_unordered] signed int __builtin_vec_scalar_cmp_exp_unordered (double, double); VSCEDPUO signed int __builtin_vec_scalar_cmp_exp_unordered (_Float128, _Float128); VSCEQPUO -[VEC_VSEE, scalar_extract_exp, __builtin_vec_scalar_extract_exp, _ARCH_PWR9] +[VEC_VSEE, scalar_extract_exp, __builtin_vec_scalar_extract_exp] unsigned int __builtin_vec_scalar_extract_exp (double); VSEEDP unsigned int __builtin_vec_scalar_extract_exp (_Float128); VSEEQP -[VEC_VSES, scalar_extract_sig, __builtin_vec_scalar_extract_sig, _ARCH_PWR9] +[VEC_VSES, scalar_extract_sig, __builtin_vec_scalar_extract_sig] unsigned long long __builtin_vec_scalar_extract_sig (double); VSESDP unsigned __int128 __builtin_vec_scalar_extract_sig (_Float128); VSESQP -[VEC_VSIE, scalar_insert_exp, __builtin_vec_scalar_insert_exp, _ARCH_PWR9] +[VEC_VSIE, scalar_insert_exp, __builtin_vec_scalar_insert_exp] double __builtin_vec_scalar_insert_exp (unsigned long long, unsigned long long); VSIEDP double __builtin_vec_scalar_insert_exp (double, unsigned long long); @@ -4508,7 +4516,7 @@ _Float128 __builtin_vec_scalar_insert_exp (_Float128, unsigned long long); VSIEQPF -[VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class, _ARCH_PWR9] +[VEC_VSTDC, scalar_test_data_class, __builtin_vec_scalar_test_data_class] unsigned int __builtin_vec_scalar_test_data_class (float, const int); VSTDCSP unsigned int __builtin_vec_scalar_test_data_class (double, const int); @@ -4516,7 +4524,7 @@ unsigned int __builtin_vec_scalar_test_data_class (_Float128, const int); VSTDCQP -[VEC_VSTDCN, scalar_test_neg, __builtin_vec_scalar_test_neg, _ARCH_PWR9] +[VEC_VSTDCN, scalar_test_neg, __builtin_vec_scalar_test_neg] unsigned int __builtin_vec_scalar_test_neg (float); VSTDCNSP unsigned int __builtin_vec_scalar_test_neg (double); @@ -4524,13 +4532,13 @@ unsigned int __builtin_vec_scalar_test_neg (_Float128); VSTDCNQP -[VEC_VTDC, vec_test_data_class, __builtin_vec_test_data_class, _ARCH_PWR9] +[VEC_VTDC, vec_test_data_class, __builtin_vec_test_data_class] vbi __builtin_vec_test_data_class (vf, const int); VTDCSP vbll __builtin_vec_test_data_class (vd, const int); VTDCDP -[VEC_XL, vec_xl, __builtin_vec_vsx_ld, __VSX__] +[VEC_XL, vec_xl, __builtin_vec_vsx_ld] vsc __builtin_vec_vsx_ld (signed long long, const vsc *); LXVW4X_V16QI LXVW4X_VSC vsc __builtin_vec_vsx_ld (signed long long, const signed char *); @@ -4588,7 +4596,7 @@ vd __builtin_vec_vsx_ld (signed long long, const double *); LXVD2X_V2DF LXVD2X_D -[VEC_XL_BE, vec_xl_be, __builtin_vec_xl_be, __VSX__] +[VEC_XL_BE, vec_xl_be, __builtin_vec_xl_be] vsc __builtin_vec_xl_be (signed long long, const vsc *); LD_ELEMREV_V16QI LD_ELEMREV_VSC vsc __builtin_vec_xl_be (signed long long, const signed char *); @@ -4634,11 +4642,11 @@ vd __builtin_vec_xl_be (signed long long, const double *); LD_ELEMREV_V2DF LD_ELEMREV_DD -[VEC_XL_LEN_R, vec_xl_len_r, __builtin_vec_xl_len_r, _ARCH_PPC64_PWR9] +[VEC_XL_LEN_R, vec_xl_len_r, __builtin_vec_xl_len_r] vuc __builtin_vsx_xl_len_r (const unsigned char *, unsigned int); XL_LEN_R -[VEC_XL_SEXT, vec_xl_sext, __builtin_vec_xl_sext, _ARCH_PWR10] +[VEC_XL_SEXT, vec_xl_sext, __builtin_vec_xl_sext] vsq __builtin_vec_xl_sext (signed long long, const signed char *); SE_LXVRBX vsq __builtin_vec_xl_sext (signed long long, const signed short *); @@ -4648,7 +4656,7 @@ vsq __builtin_vec_xl_sext (signed long long, const signed long long *); SE_LXVRDX -[VEC_XL_ZEXT, vec_xl_zext, __builtin_vec_xl_zext, _ARCH_PWR10] +[VEC_XL_ZEXT, vec_xl_zext, __builtin_vec_xl_zext] vuq __builtin_vec_xl_zext (signed long long, const unsigned char *); ZE_LXVRBX vuq __builtin_vec_xl_zext (signed long long, const unsigned short *); @@ -4733,7 +4741,7 @@ vd __builtin_vec_xor (vbll, vd); VXOR_V2DF VXOR_VBLL_VD -[VEC_XST, vec_xst, __builtin_vec_vsx_st, __VSX__] +[VEC_XST, vec_xst, __builtin_vec_vsx_st] void __builtin_vec_vsx_st (vsc, signed long long, vsc *); STXVW4X_V16QI STXVW4X_VSC void __builtin_vec_vsx_st (vsc, signed long long, signed char *); @@ -4801,7 +4809,7 @@ void __builtin_vec_vsx_st (vd, signed long long, double *); STXVD2X_V2DF STXVD2X_D -[VEC_XST_BE, vec_xst_be, __builtin_vec_xst_be, __VSX__] +[VEC_XST_BE, vec_xst_be, __builtin_vec_xst_be] void __builtin_vec_xst_be (vsc, signed long long, vsc *); ST_ELEMREV_V16QI ST_ELEMREV_VSC void __builtin_vec_xst_be (vsc, signed long long, signed char *); @@ -4847,11 +4855,11 @@ void __builtin_vec_xst_be (vd, signed long long, double *); ST_ELEMREV_V2DF ST_ELEMREV_D -[VEC_XST_LEN_R, vec_xst_len_r, __builtin_vec_xst_len_r, _ARCH_PPC64_PWR9] +[VEC_XST_LEN_R, vec_xst_len_r, __builtin_vec_xst_len_r] void __builtin_vsx_xst_len_r (vuc, unsigned char *, unsigned int); XST_LEN_R -[VEC_XST_TRUNC, vec_xst_trunc, __builtin_vec_xst_trunc, _ARCH_PWR10] +[VEC_XST_TRUNC, vec_xst_trunc, __builtin_vec_xst_trunc] void __builtin_vec_xst_trunc (vsq, signed long long, signed char *); TR_STXVRBX TR_STXVRBX_S void __builtin_vec_xst_trunc (vuq, signed long long, unsigned char *); @@ -4869,7 +4877,7 @@ void __builtin_vec_xst_trunc (vuq, signed long long, unsigned long long *); TR_STXVRDX TR_STXVRDX_U -[VEC_XXPERMDI, vec_xxpermdi, __builtin_vsx_xxpermdi, __VSX__] +[VEC_XXPERMDI, vec_xxpermdi, __builtin_vsx_xxpermdi] vsc __builtin_vsx_xxpermdi (vsc, vsc, const int); XXPERMDI_16QI XXPERMDI_VSC vuc __builtin_vsx_xxpermdi (vuc, vuc, const int); @@ -4891,7 +4899,7 @@ vd __builtin_vsx_xxpermdi (vd, vd, const int); XXPERMDI_2DF XXPERMDI_VD -[VEC_XXSLDWI, vec_xxsldwi, __builtin_vsx_xxsldwi, __VSX__] +[VEC_XXSLDWI, vec_xxsldwi, __builtin_vsx_xxsldwi] vsc __builtin_vsx_xxsldwi (vsc, vsc, const int); XXSLDWI_16QI XXSLDWI_VSC2 vuc __builtin_vsx_xxsldwi (vuc, vuc, const int); @@ -4990,51 +4998,51 @@ void __builtin_vec_stvewx (vui, signed long, void *); STVEWX STVEWX_DEPR8 -[VEC_TSTSFI_EQ_DD, SKIP, __builtin_dfp_dtstsfi_eq_dd, _ARCH_PWR9] +[VEC_TSTSFI_EQ_DD, SKIP, __builtin_dfp_dtstsfi_eq_dd] signed int __builtin_dfp_dtstsfi_eq_dd (const int, _Decimal64); TSTSFI_EQ_DD TSTSFI_EQ_DD_DEPR1 -[VEC_TSTSFI_EQ_TD, SKIP, __builtin_dfp_dtstsfi_eq_td, _ARCH_PWR9] +[VEC_TSTSFI_EQ_TD, SKIP, __builtin_dfp_dtstsfi_eq_td] signed int __builtin_dfp_dtstsfi_eq_td (const int, _Decimal128); TSTSFI_EQ_TD TSTSFI_EQ_TD_DEPR1 -[VEC_TSTSFI_GT_DD, SKIP, __builtin_dfp_dtstsfi_gt_dd, _ARCH_PWR9] +[VEC_TSTSFI_GT_DD, SKIP, __builtin_dfp_dtstsfi_gt_dd] signed int __builtin_dfp_dtstsfi_gt_dd (const int, _Decimal64); TSTSFI_GT_DD TSTSFI_GT_DD_DEPR1 -[VEC_TSTSFI_GT_TD, SKIP, __builtin_dfp_dtstsfi_gt_td, _ARCH_PWR9] +[VEC_TSTSFI_GT_TD, SKIP, __builtin_dfp_dtstsfi_gt_td] signed int __builtin_dfp_dtstsfi_gt_td (const int, _Decimal128); TSTSFI_GT_TD TSTSFI_GT_TD_DEPR1 -[VEC_TSTSFI_LT_DD, SKIP, __builtin_dfp_dtstsfi_lt_dd, _ARCH_PWR9] +[VEC_TSTSFI_LT_DD, SKIP, __builtin_dfp_dtstsfi_lt_dd] signed int __builtin_dfp_dtstsfi_lt_dd (const int, _Decimal64); TSTSFI_LT_DD TSTSFI_LT_DD_DEPR1 -[VEC_TSTSFI_LT_TD, SKIP, __builtin_dfp_dtstsfi_lt_td, _ARCH_PWR9] +[VEC_TSTSFI_LT_TD, SKIP, __builtin_dfp_dtstsfi_lt_td] signed int __builtin_dfp_dtstsfi_lt_td (const int, _Decimal128); TSTSFI_LT_TD TSTSFI_LT_TD_DEPR1 -[VEC_TSTSFI_OV_DD, SKIP, __builtin_dfp_dtstsfi_ov_dd, _ARCH_PWR9] +[VEC_TSTSFI_OV_DD, SKIP, __builtin_dfp_dtstsfi_ov_dd] signed int __builtin_dfp_dtstsfi_ov_dd (const int, _Decimal64); TSTSFI_OV_DD TSTSFI_OV_DD_DEPR1 -[VEC_TSTSFI_OV_TD, SKIP, __builtin_dfp_dtstsfi_ov_td, _ARCH_PWR9] +[VEC_TSTSFI_OV_TD, SKIP, __builtin_dfp_dtstsfi_ov_td] signed int __builtin_dfp_dtstsfi_ov_td (const int, _Decimal128); TSTSFI_OV_TD TSTSFI_OV_TD_DEPR1 -[VEC_VADDCUQ, vec_vaddcuq, __builtin_vec_vaddcuq, _ARCH_PWR8] +[VEC_VADDCUQ, vec_vaddcuq, __builtin_vec_vaddcuq] vsq __builtin_vec_vaddcuq (vsq, vsq); VADDCUQ VADDCUQ_DEPR1 vuq __builtin_vec_vaddcuq (vuq, vuq); VADDCUQ VADDCUQ_DEPR2 -[VEC_VADDECUQ, vec_vaddecuq, __builtin_vec_vaddecuq, _ARCH_PWR8] +[VEC_VADDECUQ, vec_vaddecuq, __builtin_vec_vaddecuq] vsq __builtin_vec_vaddecuq (vsq, vsq, vsq); VADDECUQ VADDECUQ_DEPR1 vuq __builtin_vec_vaddecuq (vuq, vuq, vuq); VADDECUQ VADDECUQ_DEPR2 -[VEC_VADDEUQM, vec_vaddeuqm, __builtin_vec_vaddeuqm, _ARCH_PWR8] +[VEC_VADDEUQM, vec_vaddeuqm, __builtin_vec_vaddeuqm] vsq __builtin_vec_vaddeuqm (vsq, vsq, vsq); VADDEUQM VADDEUQM_DEPR1 vuq __builtin_vec_vaddeuqm (vuq, vuq, vuq); @@ -5098,7 +5106,7 @@ vuc __builtin_vec_vaddubs (vuc, vbc); VADDUBS VADDUBS_DEPR5 -[VEC_VADDUDM, vec_vaddudm, __builtin_vec_vaddudm, _ARCH_PWR8] +[VEC_VADDUDM, vec_vaddudm, __builtin_vec_vaddudm] vsll __builtin_vec_vaddudm (vbll, vsll); VADDUDM VADDUDM_DEPR1 vsll __builtin_vec_vaddudm (vsll, vbll); @@ -5142,7 +5150,7 @@ vus __builtin_vec_vadduhs (vus, vbs); VADDUHS VADDUHS_DEPR5 -[VEC_VADDUQM, vec_vadduqm, __builtin_vec_vadduqm, _ARCH_PWR8] +[VEC_VADDUQM, vec_vadduqm, __builtin_vec_vadduqm] vsq __builtin_vec_vadduqm (vsq, vsq); VADDUQM VADDUQM_DEPR1 vuq __builtin_vec_vadduqm (vuq, vuq); @@ -5214,7 +5222,7 @@ vui __builtin_vec_vavguw (vui, vui); VAVGUW VAVGUW_DEPR1 -[VEC_VBPERMQ, vec_vbpermq, __builtin_vec_vbpermq, _ARCH_PWR8] +[VEC_VBPERMQ, vec_vbpermq, __builtin_vec_vbpermq] vull __builtin_vec_vbpermq (vull, vuc); VBPERMQ VBPERMQ_DEPR1 vsll __builtin_vec_vbpermq (vsc, vsc); @@ -5232,25 +5240,25 @@ vf __builtin_vec_vcfux (vui, const int); VCFUX VCFUX_DEPR1 -[VEC_VCLZB, vec_vclzb, __builtin_vec_vclzb, _ARCH_PWR8] +[VEC_VCLZB, vec_vclzb, __builtin_vec_vclzb] vsc __builtin_vec_vclzb (vsc); VCLZB VCLZB_DEPR1 vuc __builtin_vec_vclzb (vuc); VCLZB VCLZB_DEPR2 -[VEC_VCLZD, vec_vclzd, __builtin_vec_vclzd, _ARCH_PWR8] +[VEC_VCLZD, vec_vclzd, __builtin_vec_vclzd] vsll __builtin_vec_vclzd (vsll); VCLZD VCLZD_DEPR1 vull __builtin_vec_vclzd (vull); VCLZD VCLZD_DEPR2 -[VEC_VCLZH, vec_vclzh, __builtin_vec_vclzh, _ARCH_PWR8] +[VEC_VCLZH, vec_vclzh, __builtin_vec_vclzh] vss __builtin_vec_vclzh (vss); VCLZH VCLZH_DEPR1 vus __builtin_vec_vclzh (vus); VCLZH VCLZH_DEPR2 -[VEC_VCLZW, vec_vclzw, __builtin_vec_vclzw, _ARCH_PWR8] +[VEC_VCLZW, vec_vclzw, __builtin_vec_vclzw] vsi __builtin_vec_vclzw (vsi); VCLZW VCLZW_DEPR1 vui __builtin_vec_vclzw (vui); @@ -5306,53 +5314,53 @@ vbi __builtin_vec_vcmpgtuw (vui, vui); VCMPGTUW VCMPGTUW_DEPR1 -[VEC_VCTZB, vec_vctzb, __builtin_vec_vctzb, _ARCH_PWR9] +[VEC_VCTZB, vec_vctzb, __builtin_vec_vctzb] vsc __builtin_vec_vctzb (vsc); VCTZB VCTZB_DEPR1 vuc __builtin_vec_vctzb (vuc); VCTZB VCTZB_DEPR2 -[VEC_VCTZD, vec_vctzd, __builtin_vec_vctzd, _ARCH_PWR9] +[VEC_VCTZD, vec_vctzd, __builtin_vec_vctzd] vsll __builtin_vec_vctzd (vsll); VCTZD VCTZD_DEPR1 vull __builtin_vec_vctzd (vull); VCTZD VCTZD_DEPR2 -[VEC_VCTZH, vec_vctzh, __builtin_vec_vctzh, _ARCH_PWR9] +[VEC_VCTZH, vec_vctzh, __builtin_vec_vctzh] vss __builtin_vec_vctzh (vss); VCTZH VCTZH_DEPR1 vus __builtin_vec_vctzh (vus); VCTZH VCTZH_DEPR2 -[VEC_VCTZW, vec_vctzw, __builtin_vec_vctzw, _ARCH_PWR9] +[VEC_VCTZW, vec_vctzw, __builtin_vec_vctzw] vsi __builtin_vec_vctzw (vsi); VCTZW VCTZW_DEPR1 vui __builtin_vec_vctzw (vui); VCTZW VCTZW_DEPR2 -[VEC_VEEDP, vec_extract_exp_dp, __builtin_vec_extract_exp_dp, _ARCH_PWR9] +[VEC_VEEDP, vec_extract_exp_dp, __builtin_vec_extract_exp_dp] vull __builtin_vec_extract_exp_dp (vd); VEEDP VEEDP_DEPR1 -[VEC_VEESP, vec_extract_exp_sp, __builtin_vec_extract_exp_sp, _ARCH_PWR9] +[VEC_VEESP, vec_extract_exp_sp, __builtin_vec_extract_exp_sp] vui __builtin_vec_extract_exp_sp (vf); VEESP VEESP_DEPR1 -[VEC_VESDP, vec_extract_sig_dp, __builtin_vec_extract_sig_dp, _ARCH_PWR9] +[VEC_VESDP, vec_extract_sig_dp, __builtin_vec_extract_sig_dp] vull __builtin_vec_extract_sig_dp (vd); VESDP VESDP_DEPR1 -[VEC_VESSP, vec_extract_sig_sp, __builtin_vec_extract_sig_sp, _ARCH_PWR9] +[VEC_VESSP, vec_extract_sig_sp, __builtin_vec_extract_sig_sp] vui __builtin_vec_extract_sig_sp (vf); VESSP VESSP_DEPR1 -[VEC_VIEDP, vec_insert_exp_dp, __builtin_vec_insert_exp_dp, _ARCH_PWR9] +[VEC_VIEDP, vec_insert_exp_dp, __builtin_vec_insert_exp_dp] vd __builtin_vec_insert_exp_dp (vd, vull); VIEDP VIEDP_DEPR1 vd __builtin_vec_insert_exp_dp (vull, vull); VIEDP VIEDP_DEPR2 -[VEC_VIESP, vec_insert_exp_sp, __builtin_vec_insert_exp_sp, _ARCH_PWR9] +[VEC_VIESP, vec_insert_exp_sp, __builtin_vec_insert_exp_sp] vf __builtin_vec_insert_exp_sp (vf, vui); VIESP VIESP_DEPR1 vf __builtin_vec_insert_exp_sp (vui, vui); @@ -5650,11 +5658,11 @@ vull __builtin_vec_vmulouw (vui, vui); VMULOUW VMULOUW_DEPR1 -[VEC_VPKSDSS, vec_vpksdss, __builtin_vec_vpksdss, _ARCH_PWR8] +[VEC_VPKSDSS, vec_vpksdss, __builtin_vec_vpksdss] vsi __builtin_vec_vpksdss (vsll, vsll); VPKSDSS VPKSDSS_DEPR1 -[VEC_VPKSDUS, vec_vpksdus, __builtin_vec_vpksdus, _ARCH_PWR8] +[VEC_VPKSDUS, vec_vpksdus, __builtin_vec_vpksdus] vui __builtin_vec_vpksdus (vsll, vsll); VPKSDUS VPKSDUS_DEPR1 @@ -5674,7 +5682,7 @@ vus __builtin_vec_vpkswus (vsi, vsi); VPKSWUS VPKSWUS_DEPR1 -[VEC_VPKUDUM, vec_vpkudum, __builtin_vec_vpkudum, _ARCH_PWR8] +[VEC_VPKUDUM, vec_vpkudum, __builtin_vec_vpkudum] vsi __builtin_vec_vpkudum (vsll, vsll); VPKUDUM VPKUDUM_DEPR1 vui __builtin_vec_vpkudum (vull, vull); @@ -5682,7 +5690,7 @@ vbi __builtin_vec_vpkudum (vbll, vbll); VPKUDUM VPKUDUM_DEPR3 -[VEC_VPKUDUS, vec_vpkudus, __builtin_vec_vpkudus, _ARCH_PWR8] +[VEC_VPKUDUS, vec_vpkudus, __builtin_vec_vpkudus] vui __builtin_vec_vpkudus (vull, vull); VPKUDUS VPKUDUS_DEPR1 @@ -5710,7 +5718,7 @@ vus __builtin_vec_vpkuwus (vui, vui); VPKUWUS VPKUWUS_DEPR1 -[VEC_VPOPCNT, vec_vpopcnt, __builtin_vec_vpopcnt, _ARCH_PWR8] +[VEC_VPOPCNT, vec_vpopcnt, __builtin_vec_vpopcnt] vsc __builtin_vec_vpopcnt (vsc); VPOPCNTB VPOPCNT_DEPR1 vuc __builtin_vec_vpopcnt (vuc); @@ -5728,37 +5736,37 @@ vull __builtin_vec_vpopcnt (vull); VPOPCNTD VPOPCNT_DEPR8 -[VEC_VPOPCNTB, vec_vpopcntb, __builtin_vec_vpopcntb, _ARCH_PWR8] +[VEC_VPOPCNTB, vec_vpopcntb, __builtin_vec_vpopcntb] vsc __builtin_vec_vpopcntb (vsc); VPOPCNTB VPOPCNTB_DEPR1 vuc __builtin_vec_vpopcntb (vuc); VPOPCNTB VPOPCNTB_DEPR2 -[VEC_VPOPCNTD, vec_vpopcntd, __builtin_vec_vpopcntd, _ARCH_PWR8] +[VEC_VPOPCNTD, vec_vpopcntd, __builtin_vec_vpopcntd] vsll __builtin_vec_vpopcntd (vsll); VPOPCNTD VPOPCNTD_DEPR1 vull __builtin_vec_vpopcntd (vull); VPOPCNTD VPOPCNTD_DEPR2 -[VEC_VPOPCNTH, vec_vpopcnth, __builtin_vec_vpopcnth, _ARCH_PWR8] +[VEC_VPOPCNTH, vec_vpopcnth, __builtin_vec_vpopcnth] vss __builtin_vec_vpopcnth (vss); VPOPCNTH VPOPCNTH_DEPR1 vus __builtin_vec_vpopcnth (vus); VPOPCNTH VPOPCNTH_DEPR2 -[VEC_VPOPCNTW, vec_vpopcntw, __builtin_vec_vpopcntw, _ARCH_PWR8] +[VEC_VPOPCNTW, vec_vpopcntw, __builtin_vec_vpopcntw] vsi __builtin_vec_vpopcntw (vsi); VPOPCNTW VPOPCNTW_DEPR1 vui __builtin_vec_vpopcntw (vui); VPOPCNTW VPOPCNTW_DEPR2 -[VEC_VPRTYBD, vec_vprtybd, __builtin_vec_vprtybd, _ARCH_PWR9] +[VEC_VPRTYBD, vec_vprtybd, __builtin_vec_vprtybd] vsll __builtin_vec_vprtybd (vsll); VPRTYBD VPRTYBD_DEPR1 vull __builtin_vec_vprtybd (vull); VPRTYBD VPRTYBD_DEPR2 -[VEC_VPRTYBQ, vec_vprtybq, __builtin_vec_vprtybq, _ARCH_PPC64_PWR9] +[VEC_VPRTYBQ, vec_vprtybq, __builtin_vec_vprtybq] vsq __builtin_vec_vprtybq (vsq); VPRTYBQ VPRTYBQ_DEPR1 vuq __builtin_vec_vprtybq (vuq); @@ -5768,7 +5776,7 @@ unsigned __int128 __builtin_vec_vprtybq (unsigned __int128); VPRTYBQ VPRTYBQ_DEPR4 -[VEC_VPRTYBW, vec_vprtybw, __builtin_vec_vprtybw, _ARCH_PWR9] +[VEC_VPRTYBW, vec_vprtybw, __builtin_vec_vprtybw] vsi __builtin_vec_vprtybw (vsi); VPRTYBW VPRTYBW_DEPR1 vui __builtin_vec_vprtybw (vui); @@ -5780,7 +5788,7 @@ vuc __builtin_vec_vrlb (vuc, vuc); VRLB VRLB_DEPR2 -[VEC_VRLD, SKIP, __builtin_vec_vrld, _ARCH_PWR8] +[VEC_VRLD, SKIP, __builtin_vec_vrld] vsll __builtin_vec_vrld (vsll, vull); VRLD VRLD_DEPR1 vull __builtin_vec_vrld (vull, vull); @@ -5804,7 +5812,7 @@ vuc __builtin_vec_vslb (vuc, vuc); VSLB VSLB_DEPR2 -[VEC_VSLD, SKIP, __builtin_vec_vsld, _ARCH_PWR8] +[VEC_VSLD, SKIP, __builtin_vec_vsld] vsll __builtin_vec_vsld (vsll, vull); VSLD VSLD_DEPR1 vull __builtin_vec_vsld (vull, vull); @@ -5856,7 +5864,7 @@ vuc __builtin_vec_vsrab (vuc, vuc); VSRAB VSRAB_DEPR2 -[VEC_VSRAD, SKIP, __builtin_vec_vsrad, _ARCH_PWR8] +[VEC_VSRAD, SKIP, __builtin_vec_vsrad] vsll __builtin_vec_vsrad (vsll, vull); VSRAD VSRAD_DEPR1 vull __builtin_vec_vsrad (vull, vull); @@ -5880,7 +5888,7 @@ vuc __builtin_vec_vsrb (vuc, vuc); VSRB VSRB_DEPR2 -[VEC_VSRD, SKIP, __builtin_vec_vsrd, _ARCH_PWR8] +[VEC_VSRD, SKIP, __builtin_vec_vsrd] vsll __builtin_vec_vsrd (vsll, vull); VSRD VSRD_DEPR1 vull __builtin_vec_vsrd (vull, vull); @@ -5898,27 +5906,27 @@ vui __builtin_vec_vsrw (vui, vui); VSRW VSRW_DEPR2 -[VEC_VSTDCDP, scalar_test_data_class_dp, __builtin_vec_scalar_test_data_class_dp, _ARCH_PWR9] +[VEC_VSTDCDP, scalar_test_data_class_dp, __builtin_vec_scalar_test_data_class_dp] unsigned int __builtin_vec_scalar_test_data_class_dp (double, const int); VSTDCDP VSTDCDP_DEPR1 -[VEC_VSTDCNDP, scalar_test_neg_dp, __builtin_vec_scalar_test_neg_dp, _ARCH_PWR9] +[VEC_VSTDCNDP, scalar_test_neg_dp, __builtin_vec_scalar_test_neg_dp] unsigned int __builtin_vec_scalar_test_neg_dp (double); VSTDCNDP VSTDCNDP_DEPR1 -[VEC_VSTDCNQP, scalar_test_neg_qp, __builtin_vec_scalar_test_neg_qp, _ARCH_PWR9] +[VEC_VSTDCNQP, scalar_test_neg_qp, __builtin_vec_scalar_test_neg_qp] unsigned int __builtin_vec_scalar_test_neg_qp (_Float128); VSTDCNQP VSTDCNQP_DEPR1 -[VEC_VSTDCNSP, scalar_test_neg_sp, __builtin_vec_scalar_test_neg_sp, _ARCH_PWR9] +[VEC_VSTDCNSP, scalar_test_neg_sp, __builtin_vec_scalar_test_neg_sp] unsigned int __builtin_vec_scalar_test_neg_sp (float); VSTDCNSP VSTDCNSP_DEPR1 -[VEC_VSTDCQP, scalar_test_data_class_qp, __builtin_vec_scalar_test_data_class_qp, _ARCH_PWR9] +[VEC_VSTDCQP, scalar_test_data_class_qp, __builtin_vec_scalar_test_data_class_qp] unsigned int __builtin_vec_scalar_test_data_class_qp (_Float128, const int); VSTDCQP VSTDCQP_DEPR1 -[VEC_VSTDCSP, scalar_test_data_class_sp, __builtin_vec_scalar_test_data_class_sp, _ARCH_PWR9] +[VEC_VSTDCSP, scalar_test_data_class_sp, __builtin_vec_scalar_test_data_class_sp] unsigned int __builtin_vec_scalar_test_data_class_sp (float, const int); VSTDCSP VSTDCSP_DEPR1 @@ -5928,13 +5936,13 @@ vuq __builtin_vec_vsubcuq (vuq, vuq); VSUBCUQ VSUBCUQ_DEPR2 -[VEC_VSUBECUQ, vec_vsubecuq, __builtin_vec_vsubecuq, ARCH_PWR8] +[VEC_VSUBECUQ, vec_vsubecuq, __builtin_vec_vsubecuq] vsq __builtin_vec_vsubecuq (vsq, vsq, vsq); VSUBECUQ VSUBECUQ_DEPR1 vuq __builtin_vec_vsubecuq (vuq, vuq, vuq); VSUBECUQ VSUBECUQ_DEPR2 -[VEC_VSUBEUQM, vec_vsubeuqm, __builtin_vec_vsubeuqm, _ARCH_PWR8] +[VEC_VSUBEUQM, vec_vsubeuqm, __builtin_vec_vsubeuqm] vsq __builtin_vec_vsubeuqm (vsq, vsq, vsq); VSUBEUQM VSUBEUQM_DEPR1 vuq __builtin_vec_vsubeuqm (vuq, vuq, vuq); @@ -6004,7 +6012,7 @@ vuc __builtin_vec_vsububs (vuc, vbc); VSUBUBS VSUBUBS_DEPR8 -[VEC_VSUBUDM, vec_vsubudm, __builtin_vec_vsubudm, _ARCH_PWR8] +[VEC_VSUBUDM, vec_vsubudm, __builtin_vec_vsubudm] vsll __builtin_vec_vsubudm (vbll, vsll); VSUBUDM VSUBUDM_DEPR1 vsll __builtin_vec_vsubudm (vsll, vbll); @@ -6048,7 +6056,7 @@ vus __builtin_vec_vsubuhs (vus, vbs); VSUBUHS VSUBUHS_DEPR5 -[VEC_VSUBUQM, vec_vsubuqm, __builtin_vec_vsubuqm, _ARCH_PWR8] +[VEC_VSUBUQM, vec_vsubuqm, __builtin_vec_vsubuqm] vsq __builtin_vec_vsubuqm (vsq, vsq); VSUBUQM VSUBUQM_DEPR1 vuq __builtin_vec_vsubuqm (vuq, vuq); @@ -6096,11 +6104,11 @@ vui __builtin_vec_vsum4ubs (vuc, vui); VSUM4UBS VSUM4UBS_DEPR1 -[VEC_VTDCDP, vec_test_data_class_dp, __builtin_vec_test_data_class_dp, _ARCH_PWR9] +[VEC_VTDCDP, vec_test_data_class_dp, __builtin_vec_test_data_class_dp] vbll __builtin_vec_test_data_class_dp (vd, const int); VTDCDP VTDCDP_DEPR1 -[VEC_VTDCSP, vec_test_data_class_sp, __builtin_vec_test_data_class_sp, _ARCH_PWR9] +[VEC_VTDCSP, vec_test_data_class_sp, __builtin_vec_test_data_class_sp] vbi __builtin_vec_test_data_class_sp (vf, const int); VTDCSP VTDCSP_DEPR1 @@ -6138,7 +6146,7 @@ vbi __builtin_vec_vupkhsh (vbs); VUPKHSH VUPKHSH_DEPR2 -[VEC_VUPKHSW, vec_vupkhsw, __builtin_vec_vupkhsw, _ARCH_PWR8] +[VEC_VUPKHSW, vec_vupkhsw, __builtin_vec_vupkhsw] vsll __builtin_vec_vupkhsw (vsi); VUPKHSW VUPKHSW_DEPR1 vbll __builtin_vec_vupkhsw (vbi); @@ -6162,7 +6170,7 @@ vbi __builtin_vec_vupklsh (vbs); VUPKLSH VUPKLSH_DEPR2 -[VEC_VUPKLSW, vec_vupklsw, __builtin_vec_vupklsw, _ARCH_PWR8] +[VEC_VUPKLSW, vec_vupklsw, __builtin_vec_vupklsw] vsll __builtin_vec_vupklsw (vsi); VUPKLSW VUPKLSW_DEPR1 vbll __builtin_vec_vupklsw (vbi); diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index a5fd36b..7afbc29 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -86,6 +86,10 @@ /* This file should be included last. */ #include "target-def.h" +extern tree rs6000_builtin_mask_for_load (void); +extern tree rs6000_builtin_md_vectorized_function (tree, tree, tree); +extern tree rs6000_builtin_reciprocal (tree); + /* Set -mabi=ieeelongdouble on some old targets. In the future, power server systems will also set long double to be IEEE 128-bit. AIX and Darwin explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so @@ -105,9 +109,6 @@ #define PCREL_SUPPORTED_BY_OS 0 #endif -/* Support targetm.vectorize.builtin_mask_for_load. */ -tree altivec_builtin_mask_for_load; - #ifdef USING_ELFOS_H /* Counter for labels which are to be placed in .fixup. */ int fixuplabelno = 0; @@ -159,9 +160,6 @@ enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER]; static int dbg_cost_ctrl; -/* Built in types. */ -tree rs6000_builtin_types[RS6000_BTI_MAX]; - /* Flag to say the TOC is initialized */ int toc_initialized, need_toc_init; char toc_label_name[10]; @@ -190,9 +188,6 @@ enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX]; /* Describe the alignment of a vector. */ int rs6000_vector_align[NUM_MACHINE_MODES]; -/* Map selected modes to types for builtins. */ -tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; - /* What modes to automatically generate reciprocal divide estimate (fre) and reciprocal sqrt (frsqrte) for. */ unsigned char rs6000_recip_bits[MAX_MACHINE_MODE]; @@ -3838,10 +3833,6 @@ rs6000_option_override_internal (bool global_init_p) & OPTION_MASK_DIRECT_MOVE)) rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN; - if (!rs6000_fold_gimple) - fprintf (stderr, - "gimple folding of rs6000 builtins has been disabled.\n"); - /* Add some warnings for VSX. */ if (TARGET_VSX) { @@ -3943,6 +3934,15 @@ rs6000_option_override_internal (bool global_init_p) else if (TARGET_ALTIVEC) rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~ignore_masks); + /* Disable VSX and Altivec silently if the user switched cpus to power7 in a + target attribute or pragma which automatically enables both options, + unless the altivec ABI was set. This is set by default for 64-bit, but + not for 32-bit. Don't move this before the above code using ignore_masks, + since it can reset the cleared VSX/ALTIVEC flag again. */ + if (main_target_opt && !main_target_opt->x_rs6000_altivec_abi) + rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC) + & ~rs6000_isa_flags_explicit); + if (TARGET_CRYPTO && !TARGET_ALTIVEC) { if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO) @@ -4178,13 +4178,6 @@ rs6000_option_override_internal (bool global_init_p) ; /* The option value can be seen when cl_target_option_restore is called. */ else if (rs6000_long_double_type_size == 128) rs6000_long_double_type_size = FLOAT_PRECISION_TFmode; - else if (OPTION_SET_P (rs6000_ieeequad)) - { - if (global_options.x_rs6000_ieeequad) - error ("%qs requires %qs", "-mabi=ieeelongdouble", "-mlong-double-128"); - else - error ("%qs requires %qs", "-mabi=ibmlongdouble", "-mlong-double-128"); - } /* Set -mabi=ieeelongdouble on some old targets. In the future, power server systems will also set long double to be IEEE 128-bit. AIX and Darwin @@ -4194,13 +4187,13 @@ rs6000_option_override_internal (bool global_init_p) if (!OPTION_SET_P (rs6000_ieeequad)) rs6000_ieeequad = TARGET_IEEEQUAD_DEFAULT; - else + else if (TARGET_LONG_DOUBLE_128) { if (global_options.x_rs6000_ieeequad && (!TARGET_POPCNTD || !TARGET_VSX)) error ("%qs requires full ISA 2.06 support", "-mabi=ieeelongdouble"); - if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT && TARGET_LONG_DOUBLE_128) + if (rs6000_ieeequad != TARGET_IEEEQUAD_DEFAULT) { /* Determine if the user can change the default long double type at compilation time. You need GLIBC 2.32 or newer to be able to @@ -4359,18 +4352,6 @@ rs6000_option_override_internal (bool global_init_p) } } - /* Disable VSX and Altivec silently if the user switched cpus to power7 in a - target attribute or pragma which automatically enables both options, - unless the altivec ABI was set. This is set by default for 64-bit, but - not for 32-bit. */ - if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi) - { - TARGET_FLOAT128_TYPE = 0; - rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC - | OPTION_MASK_FLOAT128_KEYWORD) - & ~rs6000_isa_flags_explicit); - } - /* Enable Altivec ABI for AIX -maltivec. */ if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX) @@ -4458,30 +4439,6 @@ rs6000_option_override_internal (bool global_init_p) && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0) rs6000_isa_flags |= OPTION_MASK_P10_FUSION; - if (TARGET_POWER10 && - (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0) - rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI; - - if (TARGET_POWER10 - && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0) - rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL; - - if (TARGET_POWER10 - && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LOGADD) == 0) - rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LOGADD; - - if (TARGET_POWER10 - && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_ADDLOG) == 0) - rs6000_isa_flags |= OPTION_MASK_P10_FUSION_ADDLOG; - - if (TARGET_POWER10 - && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0) - rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD; - - if (TARGET_POWER10 - && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2STORE) == 0) - rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2STORE; - /* Turn off vector pair/mma options on non-power10 systems. */ else if (!TARGET_POWER10 && TARGET_MMA) { @@ -4491,6 +4448,16 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_MMA; } + /* MMA requires SIMD support as ISA 3.1 claims and our implementation + such as "*movoo" uses vector pair access which use VSX registers. + So make MMA require VSX support here. */ + if (TARGET_MMA && !TARGET_VSX) + { + if ((rs6000_isa_flags_explicit & OPTION_MASK_MMA) != 0) + error ("%qs requires %qs", "-mmma", "-mvsx"); + rs6000_isa_flags &= ~OPTION_MASK_MMA; + } + if (!TARGET_PCREL && TARGET_PCREL_OPT) rs6000_isa_flags &= ~OPTION_MASK_PCREL_OPT; @@ -4969,18 +4936,6 @@ rs6000_option_override (void) } -/* Implement targetm.vectorize.builtin_mask_for_load. */ -static tree -rs6000_builtin_mask_for_load (void) -{ - /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ - if ((TARGET_ALTIVEC && !TARGET_VSX) - || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) - return altivec_builtin_mask_for_load; - else - return 0; -} - /* Implement LOOP_ALIGN. */ align_flags rs6000_loop_align (rtx label) @@ -5250,7 +5205,7 @@ public: using vector_costs::vector_costs; unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, + stmt_vec_info stmt_info, slp_tree, tree vectype, int misalign, vect_cost_model_location where) override; void finish_cost (const vector_costs *) override; @@ -5466,8 +5421,9 @@ rs6000_cost_data::update_target_cost_per_stmt (vect_cost_for_stmt kind, unsigned rs6000_cost_data::add_stmt_cost (int count, vect_cost_for_stmt kind, - stmt_vec_info stmt_info, tree vectype, - int misalign, vect_cost_model_location where) + stmt_vec_info stmt_info, slp_tree, + tree vectype, int misalign, + vect_cost_model_location where) { unsigned retval = 0; @@ -5508,7 +5464,8 @@ rs6000_cost_data::adjust_vect_cost_per_loop (loop_vec_info loop_vinfo) /* Each length needs one shift to fill into bits 0-7. */ shift_cnt += num_vectors_m1 + 1; - add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL_TREE, 0, vect_body); + add_stmt_cost (shift_cnt, scalar_stmt, NULL, NULL, + NULL_TREE, 0, vect_body); } } @@ -5689,119 +5646,6 @@ rs6000_builtin_vectorized_function (unsigned int fn, tree type_out, return NULL_TREE; } -/* Implement targetm.vectorize.builtin_md_vectorized_function. */ - -static tree -rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out, - tree type_in) -{ - machine_mode in_mode, out_mode; - int in_n, out_n; - - if (TARGET_DEBUG_BUILTIN) - fprintf (stderr, - "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n", - IDENTIFIER_POINTER (DECL_NAME (fndecl)), - GET_MODE_NAME (TYPE_MODE (type_out)), - GET_MODE_NAME (TYPE_MODE (type_in))); - - /* TODO: Should this be gcc_assert? */ - if (TREE_CODE (type_out) != VECTOR_TYPE - || TREE_CODE (type_in) != VECTOR_TYPE) - return NULL_TREE; - - out_mode = TYPE_MODE (TREE_TYPE (type_out)); - out_n = TYPE_VECTOR_SUBPARTS (type_out); - in_mode = TYPE_MODE (TREE_TYPE (type_in)); - in_n = TYPE_VECTOR_SUBPARTS (type_in); - - enum rs6000_gen_builtins fn - = (enum rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl); - switch (fn) - { - case RS6000_BIF_RSQRTF: - if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) - && out_mode == SFmode && out_n == 4 - && in_mode == SFmode && in_n == 4) - return rs6000_builtin_decls[RS6000_BIF_VRSQRTFP]; - break; - case RS6000_BIF_RSQRT: - if (VECTOR_UNIT_VSX_P (V2DFmode) - && out_mode == DFmode && out_n == 2 - && in_mode == DFmode && in_n == 2) - return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF]; - break; - case RS6000_BIF_RECIPF: - if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode) - && out_mode == SFmode && out_n == 4 - && in_mode == SFmode && in_n == 4) - return rs6000_builtin_decls[RS6000_BIF_VRECIPFP]; - break; - case RS6000_BIF_RECIP: - if (VECTOR_UNIT_VSX_P (V2DFmode) - && out_mode == DFmode && out_n == 2 - && in_mode == DFmode && in_n == 2) - return rs6000_builtin_decls[RS6000_BIF_RECIP_V2DF]; - break; - default: - break; - } - - machine_mode in_vmode = TYPE_MODE (type_in); - machine_mode out_vmode = TYPE_MODE (type_out); - - /* Power10 supported vectorized built-in functions. */ - if (TARGET_POWER10 - && in_vmode == out_vmode - && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode)) - { - machine_mode exp_mode = DImode; - machine_mode exp_vmode = V2DImode; - enum rs6000_gen_builtins bif; - switch (fn) - { - case RS6000_BIF_DIVWE: - case RS6000_BIF_DIVWEU: - exp_mode = SImode; - exp_vmode = V4SImode; - if (fn == RS6000_BIF_DIVWE) - bif = RS6000_BIF_VDIVESW; - else - bif = RS6000_BIF_VDIVEUW; - break; - case RS6000_BIF_DIVDE: - case RS6000_BIF_DIVDEU: - if (fn == RS6000_BIF_DIVDE) - bif = RS6000_BIF_VDIVESD; - else - bif = RS6000_BIF_VDIVEUD; - break; - case RS6000_BIF_CFUGED: - bif = RS6000_BIF_VCFUGED; - break; - case RS6000_BIF_CNTLZDM: - bif = RS6000_BIF_VCLZDM; - break; - case RS6000_BIF_CNTTZDM: - bif = RS6000_BIF_VCTZDM; - break; - case RS6000_BIF_PDEPD: - bif = RS6000_BIF_VPDEPD; - break; - case RS6000_BIF_PEXTD: - bif = RS6000_BIF_VPEXTD; - break; - default: - return NULL_TREE; - } - - if (in_mode == exp_mode && in_vmode == exp_vmode) - return rs6000_builtin_decls[bif]; - } - - return NULL_TREE; -} - /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a library with vectorized intrinsics. */ @@ -5920,33 +5764,60 @@ const char *rs6000_machine; const char * rs6000_machine_from_flags (void) { - /* For some CPUs, the machine cannot be determined by ISA flags. We have to - check them first. */ - switch (rs6000_cpu) - { - case PROCESSOR_PPC8540: - case PROCESSOR_PPC8548: - return "e500"; - - case PROCESSOR_PPCE300C2: - case PROCESSOR_PPCE300C3: - return "e300"; - - case PROCESSOR_PPCE500MC: - return "e500mc"; - - case PROCESSOR_PPCE500MC64: - return "e500mc64"; - - case PROCESSOR_PPCE5500: - return "e5500"; - - case PROCESSOR_PPCE6500: - return "e6500"; - - default: - break; - } + /* e300 and e500 */ + if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3) + return "e300"; + if (rs6000_cpu == PROCESSOR_PPC8540 || rs6000_cpu == PROCESSOR_PPC8548) + return "e500"; + if (rs6000_cpu == PROCESSOR_PPCE500MC) + return "e500mc"; + if (rs6000_cpu == PROCESSOR_PPCE500MC64) + return "e500mc64"; + if (rs6000_cpu == PROCESSOR_PPCE5500) + return "e5500"; + if (rs6000_cpu == PROCESSOR_PPCE6500) + return "e6500"; + + /* 400 series */ + if (rs6000_cpu == PROCESSOR_PPC403) + return "\"403\""; + if (rs6000_cpu == PROCESSOR_PPC405) + return "\"405\""; + if (rs6000_cpu == PROCESSOR_PPC440) + return "\"440\""; + if (rs6000_cpu == PROCESSOR_PPC476) + return "\"476\""; + + /* A2 */ + if (rs6000_cpu == PROCESSOR_PPCA2) + return "a2"; + + /* Cell BE */ + if (rs6000_cpu == PROCESSOR_CELL) + return "cell"; + + /* Titan */ + if (rs6000_cpu == PROCESSOR_TITAN) + return "titan"; + + /* 500 series and 800 series */ + if (rs6000_cpu == PROCESSOR_MPCCORE) + return "\"821\""; + + /* 600 series and 700 series, "classic" */ + if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603 + || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e + || rs6000_cpu == PROCESSOR_PPC750 || rs6000_cpu == PROCESSOR_POWERPC) + return "ppc"; + + /* Classic with AltiVec, "G4" */ + if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450) + return "\"7450\""; + + /* The older 64-bit CPUs */ + if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630 + || rs6000_cpu == PROCESSOR_RS64A || rs6000_cpu == PROCESSOR_POWERPC64) + return "ppc64"; HOST_WIDE_INT flags = rs6000_isa_flags; @@ -6384,8 +6255,11 @@ vspltis_shifted (rtx op) return false; /* We need to create pseudo registers to do the shift, so don't recognize - shift vector constants after reload. */ - if (!can_create_pseudo_p ()) + shift vector constants after reload. Don't match it even before RA + after split1 is done, because there won't be further splitting pass + before RA to do the splitting. */ + if (!can_create_pseudo_p () + || (cfun->curr_properties & PROP_rtl_split_insns)) return false; nunits = GET_MODE_NUNITS (mode); @@ -8441,8 +8315,14 @@ darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode) if (GET_CODE (x) == CONST) x = XEXP (x, 0); + /* If we are building PIC code, then any symbol must be wrapped in an + UNSPEC_MACHOPIC_OFFSET so that it will get the picbase subtracted. */ + bool machopic_offs_p = false; if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET) - x = XVECEXP (x, 0, 0); + { + x = XVECEXP (x, 0, 0); + machopic_offs_p = true; + } rtx sym = NULL_RTX; unsigned HOST_WIDE_INT offset = 0; @@ -8473,6 +8353,9 @@ darwin_rs6000_legitimate_lo_sum_const_p (rtx x, machine_mode mode) if (sym) { tree decl = SYMBOL_REF_DECL (sym); + /* As noted above, PIC code cannot use a bare SYMBOL_REF. */ + if (TARGET_MACHO && flag_pic && !machopic_offs_p) + return false; #if TARGET_MACHO if (MACHO_SYMBOL_INDIRECTION_P (sym)) /* The decl in an indirection symbol is the original one, which might @@ -9060,7 +8943,7 @@ legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) return false; x = XEXP (x, 1); - if (TARGET_ELF || TARGET_MACHO) + if (TARGET_ELF) { bool large_toc_ok; @@ -9086,7 +8969,32 @@ legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict) return CONSTANT_P (x) || large_toc_ok; } + else if (TARGET_MACHO) + { + if (GET_MODE_NUNITS (mode) != 1) + return false; + if (GET_MODE_SIZE (mode) > UNITS_PER_WORD + && !(/* see above */ + TARGET_HARD_FLOAT && (mode == DFmode || mode == DDmode))) + return false; +#if TARGET_MACHO + if (MACHO_DYNAMIC_NO_PIC_P || !flag_pic) + return CONSTANT_P (x); +#endif + /* Macho-O PIC code from here. */ + if (GET_CODE (x) == CONST) + x = XEXP (x, 0); + /* SYMBOL_REFs need to be wrapped in an UNSPEC_MACHOPIC_OFFSET. */ + if (SYMBOL_REF_P (x)) + return false; + + /* So this is OK if the wrapped object is const. */ + if (GET_CODE (x) == UNSPEC + && XINT (x, 1) == UNSPEC_MACHOPIC_OFFSET) + return CONSTANT_P (XVECEXP (x, 0, 0)); + return CONSTANT_P (x); + } return false; } @@ -9140,7 +9048,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, else return force_reg (Pmode, x); } - if (SYMBOL_REF_P (x)) + if (SYMBOL_REF_P (x) && !TARGET_MACHO) { enum tls_model model = SYMBOL_REF_TLS_MODEL (x); if (model != 0) @@ -11108,6 +11016,12 @@ init_float128_ibm (machine_mode mode) set_conv_libfunc (trunc_optab, DDmode, mode, "__dpd_trunctfdd"); set_conv_libfunc (sext_optab, TDmode, mode, "__dpd_extendtftd"); + set_conv_libfunc (sfix_optab, DImode, mode, "__fixtfdi"); + set_conv_libfunc (ufix_optab, DImode, mode, "__fixunstfdi"); + + set_conv_libfunc (sfloat_optab, mode, DImode, "__floatditf"); + set_conv_libfunc (ufloat_optab, mode, DImode, "__floatunditf"); + if (TARGET_POWERPC64) { set_conv_libfunc (sfix_optab, TImode, mode, "__fixtfti"); @@ -16299,6 +16213,12 @@ rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode)) return false; + /* PR104335: We now need to expect CC-mode "comparisons" + coming from ifcvt. The following code expects proper + comparisons so better abort here. */ + if (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_CC) + return false; + /* We still have to do the compare, because isel doesn't do a compare, it just looks at the CRx bits set by a previous compare instruction. */ @@ -19110,8 +19030,7 @@ power10_sched_reorder (rtx_insn **ready, int lastpos) /* Try to pair certain store insns to adjacent memory locations so that the hardware will fuse them to a single operation. */ - if (TARGET_P10_FUSION && TARGET_P10_FUSION_2STORE - && is_fusable_store (last_scheduled_insn, &mem1)) + if (TARGET_P10_FUSION && is_fusable_store (last_scheduled_insn, &mem1)) { /* A fusable store was just scheduled. Scan the ready list for another @@ -22543,31 +22462,6 @@ rs6000_ira_change_pseudo_allocno_class (int regno ATTRIBUTE_UNUSED, return allocno_class; } -/* Returns a code for a target-specific builtin that implements - reciprocal of the function, or NULL_TREE if not available. */ - -static tree -rs6000_builtin_reciprocal (tree fndecl) -{ - switch (DECL_MD_FUNCTION_CODE (fndecl)) - { - case RS6000_BIF_XVSQRTDP: - if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode)) - return NULL_TREE; - - return rs6000_builtin_decls[RS6000_BIF_RSQRT_2DF]; - - case RS6000_BIF_XVSQRTSP: - if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode)) - return NULL_TREE; - - return rs6000_builtin_decls[RS6000_BIF_RSQRT_4SF]; - - default: - return NULL_TREE; - } -} - /* Load up a constant. If the mode is a vector mode, splat the value across all of the vector elements. */ @@ -27825,14 +27719,13 @@ emit_fusion_gpr_load (rtx target, rtx mem) return ""; } - -#ifdef RS6000_GLIBC_ATOMIC_FENV -/* Function declarations for rs6000_atomic_assign_expand_fenv. */ -static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl; -#endif +/* This is not inside an #ifdef RS6000_GLIBC_ATOMIC_FENV because gengtype + ignores it then. */ +static GTY(()) tree atomic_hold_decl; +static GTY(()) tree atomic_clear_decl; +static GTY(()) tree atomic_update_decl; /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */ - static void rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) { @@ -28228,6 +28121,7 @@ rs6000_mangle_decl_assembler_name (tree decl, tree id) { size_t printf_len = strlen ("printf"); size_t scanf_len = strlen ("scanf"); + size_t printf_chk_len = strlen ("printf_chk"); if (len >= printf_len && strcmp (name + len - printf_len, "printf") == 0) @@ -28237,6 +28131,10 @@ rs6000_mangle_decl_assembler_name (tree decl, tree id) && strcmp (name + len - scanf_len, "scanf") == 0) newname = xasprintf ("__isoc99_%sieee128", name); + else if (len >= printf_chk_len + && strcmp (name + len - printf_chk_len, "printf_chk") == 0) + newname = xasprintf ("%sieee128", name); + else if (name[len - 1] == 'l') { bool uses_ieee128_p = false; diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index 5fdb8f2..17af314 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -2551,7 +2551,6 @@ enum rs6000_builtin_type_index extern GTY(()) tree rs6000_builtin_types[RS6000_BTI_MAX]; #ifndef USED_FOR_TARGET -extern GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2]; extern GTY(()) tree altivec_builtin_mask_for_load; extern GTY(()) section *toc_section; diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index c2a7718..4931d78 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -155,10 +155,6 @@ maltivec Target Mask(ALTIVEC) Var(rs6000_isa_flags) Use AltiVec instructions. -mfold-gimple -Target Var(rs6000_fold_gimple) Init(1) -Enable early gimple folding of builtins. - mhard-dfp Target Mask(DFP) Var(rs6000_isa_flags) Use decimal floating point instructions. @@ -491,33 +487,9 @@ Target Mask(P8_VECTOR) Var(rs6000_isa_flags) Use vector and scalar instructions added in ISA 2.07. mpower10-fusion -Target Mask(P10_FUSION) Var(rs6000_isa_flags) -Fuse certain integer operations together for better performance on power10. - -mpower10-fusion-ld-cmpi -Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags) +Target Undocumented Mask(P10_FUSION) Var(rs6000_isa_flags) Fuse certain integer operations together for better performance on power10. -mpower10-fusion-2logical -Target Undocumented Mask(P10_FUSION_2LOGICAL) Var(rs6000_isa_flags) -Fuse pairs of scalar or vector logical operations together for better performance on power10. - -mpower10-fusion-logical-add -Target Undocumented Mask(P10_FUSION_LOGADD) Var(rs6000_isa_flags) -Fuse scalar logical op with add/subf for better performance on power10. - -mpower10-fusion-add-logical -Target Undocumented Mask(P10_FUSION_ADDLOG) Var(rs6000_isa_flags) -Fuse scalar add/subf with logical op for better performance on power10. - -mpower10-fusion-2add -Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags) -Fuse dependent pairs of add or vaddudm instructions for better performance on power10. - -mpower10-fusion-2store -Target Undocumented Mask(P10_FUSION_2STORE) Var(rs6000_isa_flags) -Fuse certain store operations together for better performance on power10. - mcrypto Target Mask(CRYPTO) Var(rs6000_isa_flags) Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions. diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h index cca2f7d..3628c88 100644 --- a/gcc/config/rs6000/smmintrin.h +++ b/gcc/config/rs6000/smmintrin.h @@ -273,31 +273,31 @@ _mm_round_ss (__m128 __A, __m128 __B, int __rounding) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi8 (__m128i const __A, int const __D, int const __N) { - __v16qi result = (__v16qi)__A; + __v16qi __result = (__v16qi)__A; - result [__N & 0xf] = __D; + __result [__N & 0xf] = __D; - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi32 (__m128i const __A, int const __D, int const __N) { - __v4si result = (__v4si)__A; + __v4si __result = (__v4si)__A; - result [__N & 3] = __D; + __result [__N & 3] = __D; - return (__m128i) result; + return (__m128i) __result; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_epi64 (__m128i const __A, long long const __D, int const __N) { - __v2di result = (__v2di)__A; + __v2di __result = (__v2di)__A; - result [__N & 1] = __D; + __result [__N & 1] = __D; - return (__m128i) result; + return (__m128i) __result; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 90079ce..597cea4 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -21,7 +21,7 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def TM_H += $(srcdir)/config/rs6000/rs6000-modes.h PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def -EXTRA_GTYPE_DEPS += $(srcdir)/config/rs6000/rs6000-builtins.def +EXTRA_GTYPE_DEPS += rs6000-builtins.h rs6000-pcrel-opt.o: $(srcdir)/config/rs6000/rs6000-pcrel-opt.cc $(COMPILE) $< @@ -43,6 +43,10 @@ rs6000-logue.o: $(srcdir)/config/rs6000/rs6000-logue.cc $(COMPILE) $< $(POSTCOMPILE) +rs6000-builtin.o: $(srcdir)/config/rs6000/rs6000-builtin.cc + $(COMPILE) $< + $(POSTCOMPILE) + build/rs6000-gen-builtins.o: $(srcdir)/config/rs6000/rs6000-gen-builtins.cc build/rbtree.o: $(srcdir)/config/rs6000/rbtree.cc diff --git a/gcc/config/rs6000/tmmintrin.h b/gcc/config/rs6000/tmmintrin.h index c06a643..05b985b 100644 --- a/gcc/config/rs6000/tmmintrin.h +++ b/gcc/config/rs6000/tmmintrin.h @@ -112,8 +112,8 @@ _mm_alignr_epi8 (__m128i __A, __m128i __B, const unsigned int __count) { if (__count >= 32) { - const __v16qu zero = { 0 }; - return (__m128i) zero; + const __v16qu __zero = { 0 }; + return (__m128i) __zero; } else { diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md index b87a742..4d0797c 100644 --- a/gcc/config/rs6000/vector.md +++ b/gcc/config/rs6000/vector.md @@ -1519,7 +1519,10 @@ [(set (match_operand:VEC_N 0 "nonimmediate_operand") (match_operand:VEC_N 1 "any_operand"))] "VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_ALLOW_MOVMISALIGN" - "") +{ + rs6000_emit_move (operands[0], operands[1], <MODE>mode); + DONE; +}) ;; Vector shift right in bits. Currently supported ony for shift ;; amounts that can be expressed as byte shifts (divisible by 8). diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index c8c891e..d0fb92f 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -360,7 +360,6 @@ UNSPEC_XXGENPCV UNSPEC_MTVSBM UNSPEC_EXTENDDITI2 - UNSPEC_MTVSRD_DITI_W1 UNSPEC_VCNTMB UNSPEC_VEXPAND UNSPEC_VEXTRACT @@ -372,6 +371,7 @@ UNSPEC_REPLACE_UN UNSPEC_VDIVES UNSPEC_VDIVEU + UNSPEC_VMSUMCUD UNSPEC_XXEVAL UNSPEC_XXSPLTIW UNSPEC_XXSPLTIDP @@ -4196,21 +4196,6 @@ } [(set_attr "type" "vecsimple")]) -(define_expand "vreplace_un_<mode>" - [(set (match_operand:REPLACE_ELT 0 "register_operand") - (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand") - (match_operand:<VS_scalar> 2 "register_operand") - (match_operand:QI 3 "const_0_to_12_operand")] - UNSPEC_REPLACE_UN))] - "TARGET_POWER10" -{ - /* Immediate value is the byte index Big Endian numbering. */ - emit_insn (gen_vreplace_elt_<mode>_inst (operands[0], operands[1], - operands[2], operands[3])); - DONE; - } -[(set_attr "type" "vecsimple")]) - (define_insn "vreplace_elt_<mode>_inst" [(set (match_operand:REPLACE_ELT 0 "register_operand" "=v") (unspec:REPLACE_ELT [(match_operand:REPLACE_ELT 1 "register_operand" "0") @@ -4221,6 +4206,16 @@ "vins<REPLACE_ELT_char> %0,%2,%3" [(set_attr "type" "vecsimple")]) +(define_insn "vreplace_un_<mode>" + [(set (match_operand:V16QI 0 "register_operand" "=v") + (unspec:V16QI [(match_operand:REPLACE_ELT 1 "register_operand" "0") + (match_operand:<VS_scalar> 2 "register_operand" "r") + (match_operand:QI 3 "const_0_to_12_operand" "n")] + UNSPEC_REPLACE_UN))] + "TARGET_POWER10" + "vins<REPLACE_ELT_char> %0,%2,%3" + [(set_attr "type" "vecsimple")]) + ;; VSX_EXTRACT optimizations ;; Optimize double d = (double) vec_extract (vi, <n>) ;; Get the element into the top position and use XVCVSWDP/XVCVUWDP @@ -5027,15 +5022,67 @@ DONE; }) -;; ISA 3.1 vector sign extend -;; Move DI value from GPR to TI mode in VSX register, word 1. -(define_insn "mtvsrdd_diti_w1" - [(set (match_operand:TI 0 "register_operand" "=wa") - (unspec:TI [(match_operand:DI 1 "register_operand" "r")] - UNSPEC_MTVSRD_DITI_W1))] - "TARGET_POWERPC64 && TARGET_DIRECT_MOVE" - "mtvsrdd %x0,0,%1" - [(set_attr "type" "vecmove")]) +;; Sign extend DI to TI. We provide both GPR targets and Altivec targets on +;; power10. On earlier systems, the machine independent code will generate a +;; shift left to sign extend the 64-bit value to 128-bit. +;; +;; If the register allocator prefers to use GPR registers, we will use a shift +;; left instruction to sign extend the 64-bit value to 128-bit. +;; +;; If the register allocator prefers to use Altivec registers on power10, +;; generate the vextsd2q instruction. +(define_insn_and_split "extendditi2" + [(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v") + (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,r,wa,Z"))) + (clobber (reg:DI CA_REGNO))] + "TARGET_POWERPC64 && TARGET_POWER10" + "#" + "&& reload_completed" + [(pc)] +{ + rtx dest = operands[0]; + rtx src = operands[1]; + int dest_regno = reg_or_subregno (dest); + + /* Handle conversion to GPR registers. Load up the low part and then do + a sign extension to the upper part. */ + if (INT_REGNO_P (dest_regno)) + { + rtx dest_hi = gen_highpart (DImode, dest); + rtx dest_lo = gen_lowpart (DImode, dest); + + emit_move_insn (dest_lo, src); + /* In case src is a MEM, we have to use the destination, which is a + register, instead of re-using the source. */ + rtx src2 = (REG_P (src) || SUBREG_P (src)) ? src : dest_lo; + emit_insn (gen_ashrdi3 (dest_hi, src2, GEN_INT (63))); + DONE; + } + + /* For conversion to an Altivec register, generate either a splat operation + or a load rightmost double word instruction. Both instructions gets the + DImode value into the lower 64 bits, and then do the vextsd2q + instruction. */ + + else if (ALTIVEC_REGNO_P (dest_regno)) + { + if (MEM_P (src)) + emit_insn (gen_vsx_lxvrdx (dest, src)); + else + { + rtx dest_v2di = gen_rtx_REG (V2DImode, dest_regno); + emit_insn (gen_vsx_splat_v2di (dest_v2di, src)); + } + + emit_insn (gen_extendditi2_vector (dest, dest)); + DONE; + } + + else + gcc_unreachable (); +} + [(set_attr "length" "8") + (set_attr "type" "shift,load,vecmove,vecperm,load")]) ;; Sign extend 64-bit value in TI reg, word 1, to 128-bit value in TI reg (define_insn "extendditi2_vector" @@ -5046,18 +5093,6 @@ "vextsd2q %0,%1" [(set_attr "type" "vecexts")]) -(define_expand "extendditi2" - [(set (match_operand:TI 0 "gpc_reg_operand") - (sign_extend:DI (match_operand:DI 1 "gpc_reg_operand")))] - "TARGET_POWER10" - { - /* Move 64-bit src from GPR to vector reg and sign extend to 128-bits. */ - rtx temp = gen_reg_rtx (TImode); - emit_insn (gen_mtvsrdd_diti_w1 (temp, operands[1])); - emit_insn (gen_extendditi2_vector (operands[0], temp)); - DONE; - }) - ;; ISA 3.0 Binary Floating-Point Support @@ -6620,3 +6655,15 @@ emit_move_insn (operands[0], tmp4); DONE; }) + +;; vmsumcud +(define_insn "vmsumcud" +[(set (match_operand:V1TI 0 "register_operand" "+v") + (unspec:V1TI [(match_operand:V2DI 1 "register_operand" "v") + (match_operand:V2DI 2 "register_operand" "v") + (match_operand:V1TI 3 "register_operand" "v")] + UNSPEC_VMSUMCUD))] + "TARGET_POWER10" + "vmsumcud %0,%1,%2,%3" + [(set_attr "type" "veccomplex")] +) diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h index 5867431..c602011 100644 --- a/gcc/config/rs6000/xmmintrin.h +++ b/gcc/config/rs6000/xmmintrin.h @@ -127,14 +127,14 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif _mm_loadr_ps (float const *__P) { __v4sf __tmp; - __m128 result; - static const __vector unsigned char permute_vector = + __m128 __result; + static const __vector unsigned char __permute_vector = { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16, 0x17, 0x10, 0x11, 0x12, 0x13 }; __tmp = vec_ld (0, (__v4sf *) __P); - result = (__m128) vec_perm (__tmp, __tmp, permute_vector); - return result; + __result = (__m128) vec_perm (__tmp, __tmp, __permute_vector); + return __result; } /* Create a vector with all four elements equal to F. */ @@ -184,11 +184,11 @@ extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artific _mm_storer_ps (float *__P, __m128 __A) { __v4sf __tmp; - static const __vector unsigned char permute_vector = + static const __vector unsigned char __permute_vector = { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16, 0x17, 0x10, 0x11, 0x12, 0x13 }; - __tmp = (__m128) vec_perm (__A, __A, permute_vector); + __tmp = (__m128) vec_perm (__A, __A, __permute_vector); _mm_store_ps (__P, __tmp); } @@ -218,9 +218,9 @@ _mm_set_ss (float __F) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_move_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; - return (vec_sel ((__v4sf)__A, (__v4sf)__B, mask)); + return (vec_sel ((__v4sf)__A, (__v4sf)__B, __mask)); } /* Create a vector with element 0 as *P and the rest zero. */ @@ -245,18 +245,18 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif _mm_add_ss (__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 - __m128 a, b, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __m128 __a, __b, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ - a = vec_splat (__A, 0); - b = vec_splat (__B, 0); - c = a + b; + __a = vec_splat (__A, 0); + __b = vec_splat (__B, 0); + __c = __a + __b; /* Then we merge the lower float result with the original upper float elements from __A. */ - return (vec_sel (__A, c, mask)); + return (vec_sel (__A, __c, __mask)); #else __A[0] = __A[0] + __B[0]; return (__A); @@ -267,18 +267,18 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif _mm_sub_ss (__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 - __m128 a, b, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __m128 __a, __b, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ - a = vec_splat (__A, 0); - b = vec_splat (__B, 0); - c = a - b; + __a = vec_splat (__A, 0); + __b = vec_splat (__B, 0); + __c = __a - __b; /* Then we merge the lower float result with the original upper float elements from __A. */ - return (vec_sel (__A, c, mask)); + return (vec_sel (__A, __c, __mask)); #else __A[0] = __A[0] - __B[0]; return (__A); @@ -289,18 +289,18 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif _mm_mul_ss (__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 - __m128 a, b, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __m128 __a, __b, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ - a = vec_splat (__A, 0); - b = vec_splat (__B, 0); - c = a * b; + __a = vec_splat (__A, 0); + __b = vec_splat (__B, 0); + __c = __a * __b; /* Then we merge the lower float result with the original upper float elements from __A. */ - return (vec_sel (__A, c, mask)); + return (vec_sel (__A, __c, __mask)); #else __A[0] = __A[0] * __B[0]; return (__A); @@ -311,18 +311,18 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artif _mm_div_ss (__m128 __A, __m128 __B) { #ifdef _ARCH_PWR7 - __m128 a, b, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __m128 __a, __b, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) results. So to insure we don't generate spurious exceptions (from the upper double values) we splat the lower double before we to the operation. */ - a = vec_splat (__A, 0); - b = vec_splat (__B, 0); - c = a / b; + __a = vec_splat (__A, 0); + __b = vec_splat (__B, 0); + __c = __a / __b; /* Then we merge the lower float result with the original upper float elements from __A. */ - return (vec_sel (__A, c, mask)); + return (vec_sel (__A, __c, __mask)); #else __A[0] = __A[0] / __B[0]; return (__A); @@ -332,17 +332,17 @@ _mm_div_ss (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sqrt_ss (__m128 __A) { - __m128 a, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __m128 __a, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) * results. So to insure we don't generate spurious exceptions * (from the upper double values) we splat the lower double * before we to the operation. */ - a = vec_splat (__A, 0); - c = vec_sqrt (a); + __a = vec_splat (__A, 0); + __c = vec_sqrt (__a); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return (vec_sel (__A, c, mask)); + return (vec_sel (__A, __c, __mask)); } /* Perform the respective operation on the four SPFP values in A and B. */ @@ -391,81 +391,81 @@ _mm_rsqrt_ps (__m128 __A) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rcp_ss (__m128 __A) { - __m128 a, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __m128 __a, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) * results. So to insure we don't generate spurious exceptions * (from the upper double values) we splat the lower double * before we to the operation. */ - a = vec_splat (__A, 0); - c = _mm_rcp_ps (a); + __a = vec_splat (__A, 0); + __c = _mm_rcp_ps (__a); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return (vec_sel (__A, c, mask)); + return (vec_sel (__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_rsqrt_ss (__m128 __A) { - __m128 a, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __m128 __a, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower double) * results. So to insure we don't generate spurious exceptions * (from the upper double values) we splat the lower double * before we to the operation. */ - a = vec_splat (__A, 0); - c = vec_rsqrte (a); + __a = vec_splat (__A, 0); + __c = vec_rsqrte (__a); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return (vec_sel (__A, c, mask)); + return (vec_sel (__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ss (__m128 __A, __m128 __B) { - __v4sf a, b, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __v4sf __a, __b, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower float) * results. So to insure we don't generate spurious exceptions * (from the upper float values) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf)__A, 0); - b = vec_splat ((__v4sf)__B, 0); - c = vec_min (a, b); + __a = vec_splat ((__v4sf)__A, 0); + __b = vec_splat ((__v4sf)__B, 0); + __c = vec_min (__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return (vec_sel ((__v4sf)__A, c, mask)); + return (vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ss (__m128 __A, __m128 __B) { - __v4sf a, b, c; - static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + __v4sf __a, __b, __c; + static const __vector unsigned int __mask = {0xffffffff, 0, 0, 0}; /* PowerISA VSX does not allow partial (for just lower float) * results. So to insure we don't generate spurious exceptions * (from the upper float values) we splat the lower float * before we to the operation. */ - a = vec_splat (__A, 0); - b = vec_splat (__B, 0); - c = vec_max (a, b); + __a = vec_splat (__A, 0); + __b = vec_splat (__B, 0); + __c = vec_max (__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return (vec_sel ((__v4sf)__A, c, mask)); + return (vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_min_ps (__m128 __A, __m128 __B) { - __vector __bool int m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A); - return vec_sel (__B, __A, m); + __vector __bool int __m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A); + return vec_sel (__B, __A, __m); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_max_ps (__m128 __A, __m128 __B) { - __vector __bool int m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B); - return vec_sel (__B, __A, m); + __vector __bool int __m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B); + return vec_sel (__B, __A, __m); } /* Perform logical bit-wise operations on 128-bit values. */ @@ -530,8 +530,8 @@ _mm_cmpge_ps (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_ps (__m128 __A, __m128 __B) { - __v4sf temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B); - return ((__m128)vec_nor (temp, temp)); + __v4sf __temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B); + return ((__m128)vec_nor (__temp, __temp)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -561,31 +561,31 @@ _mm_cmpnge_ps (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_ps (__m128 __A, __m128 __B) { - __vector unsigned int a, b; - __vector unsigned int c, d; - static const __vector unsigned int float_exp_mask = + __vector unsigned int __a, __b; + __vector unsigned int __c, __d; + static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; - a = (__vector unsigned int) vec_abs ((__v4sf)__A); - b = (__vector unsigned int) vec_abs ((__v4sf)__B); - c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a); - d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b); - return ((__m128 ) vec_and (c, d)); + __a = (__vector unsigned int) vec_abs ((__v4sf)__A); + __b = (__vector unsigned int) vec_abs ((__v4sf)__B); + __c = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __a); + __d = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __b); + return ((__m128 ) vec_and (__c, __d)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_ps (__m128 __A, __m128 __B) { - __vector unsigned int a, b; - __vector unsigned int c, d; - static const __vector unsigned int float_exp_mask = + __vector unsigned int __a, __b; + __vector unsigned int __c, __d; + static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; - a = (__vector unsigned int) vec_abs ((__v4sf)__A); - b = (__vector unsigned int) vec_abs ((__v4sf)__B); - c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask); - d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask); - return ((__m128 ) vec_or (c, d)); + __a = (__vector unsigned int) vec_abs ((__v4sf)__A); + __b = (__vector unsigned int) vec_abs ((__v4sf)__B); + __c = (__vector unsigned int) vec_cmpgt (__a, __float_exp_mask); + __d = (__vector unsigned int) vec_cmpgt (__b, __float_exp_mask); + return ((__m128 ) vec_or (__c, __d)); } /* Perform a comparison on the lower SPFP values of A and B. If the @@ -594,222 +594,222 @@ _mm_cmpunord_ps (__m128 __A, __m128 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpeq_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmpeq(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmpeq (__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmplt_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmplt(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmplt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmple_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmple(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmple(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpgt_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmpgt(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmpgt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpge_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmpge(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmpge(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpneq_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmpeq(a, b); - c = vec_nor (c, c); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmpeq(__a, __b); + __c = vec_nor (__c, __c); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnlt_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmpge(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmpge(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnle_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmpgt(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmpgt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpngt_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we to the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmple(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmple(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpnge_ss (__m128 __A, __m128 __B) { - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - __v4sf a, b, c; + __v4sf __a, __b, __c; /* PowerISA VMX does not allow partial (for just element 0) * results. So to insure we don't generate spurious exceptions * (from the upper elements) we splat the lower float * before we do the operation. */ - a = vec_splat ((__v4sf) __A, 0); - b = vec_splat ((__v4sf) __B, 0); - c = (__v4sf) vec_cmplt(a, b); + __a = vec_splat ((__v4sf) __A, 0); + __b = vec_splat ((__v4sf) __B, 0); + __c = (__v4sf) vec_cmplt(__a, __b); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, __c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpord_ss (__m128 __A, __m128 __B) { - __vector unsigned int a, b; - __vector unsigned int c, d; - static const __vector unsigned int float_exp_mask = + __vector unsigned int __a, __b; + __vector unsigned int __c, __d; + static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - a = (__vector unsigned int) vec_abs ((__v4sf)__A); - b = (__vector unsigned int) vec_abs ((__v4sf)__B); - c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a); - d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b); - c = vec_and (c, d); + __a = (__vector unsigned int) vec_abs ((__v4sf)__A); + __b = (__vector unsigned int) vec_abs ((__v4sf)__B); + __c = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __a); + __d = (__vector unsigned int) vec_cmpgt (__float_exp_mask, __b); + __c = vec_and (__c, __d); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)__c, __mask)); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cmpunord_ss (__m128 __A, __m128 __B) { - __vector unsigned int a, b; - __vector unsigned int c, d; - static const __vector unsigned int float_exp_mask = + __vector unsigned int __a, __b; + __vector unsigned int __c, __d; + static const __vector unsigned int __float_exp_mask = { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; - static const __vector unsigned int mask = + static const __vector unsigned int __mask = { 0xffffffff, 0, 0, 0 }; - a = (__vector unsigned int) vec_abs ((__v4sf)__A); - b = (__vector unsigned int) vec_abs ((__v4sf)__B); - c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask); - d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask); - c = vec_or (c, d); + __a = (__vector unsigned int) vec_abs ((__v4sf)__A); + __b = (__vector unsigned int) vec_abs ((__v4sf)__B); + __c = (__vector unsigned int) vec_cmpgt (__a, __float_exp_mask); + __d = (__vector unsigned int) vec_cmpgt (__b, __float_exp_mask); + __c = vec_or (__c, __d); /* Then we merge the lower float result with the original upper * float elements from __A. */ - return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask)); + return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)__c, __mask)); } /* Compare the lower SPFP values of A and B and return 1 if true @@ -905,9 +905,9 @@ _mm_cvtss_f32 (__m128 __A) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si32 (__m128 __A) { - int res; + int __res; #ifdef _ARCH_PWR8 - double dtmp; + double __dtmp; __asm__( #ifdef __LITTLE_ENDIAN__ "xxsldwi %x0,%x0,%x0,3;\n" @@ -916,13 +916,13 @@ _mm_cvtss_si32 (__m128 __A) "fctiw %2,%2;\n" "mfvsrd %1,%x2;\n" : "+wa" (__A), - "=r" (res), - "=f" (dtmp) + "=r" (__res), + "=f" (__dtmp) : ); #else - res = __builtin_rint(__A[0]); + __res = __builtin_rint(__A[0]); #endif - return (res); + return __res; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -938,9 +938,9 @@ _mm_cvt_ss2si (__m128 __A) extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtss_si64 (__m128 __A) { - long long res; + long long __res; #if defined (_ARCH_PWR8) && defined (__powerpc64__) - double dtmp; + double __dtmp; __asm__( #ifdef __LITTLE_ENDIAN__ "xxsldwi %x0,%x0,%x0,3;\n" @@ -949,13 +949,13 @@ _mm_cvtss_si64 (__m128 __A) "fctid %2,%2;\n" "mfvsrd %1,%x2;\n" : "+wa" (__A), - "=r" (res), - "=f" (dtmp) + "=r" (__res), + "=f" (__dtmp) : ); #else - res = __builtin_llrint(__A[0]); + __res = __builtin_llrint(__A[0]); #endif - return (res); + return __res; } /* Microsoft intrinsic. */ @@ -992,15 +992,15 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_cvtps_pi32 (__m128 __A) { /* Splat two lower SPFP values to both halves. */ - __v4sf temp, rounded; - __vector unsigned long long result; + __v4sf __temp, __rounded; + __vector unsigned long long __result; /* Splat two lower SPFP values to both halves. */ - temp = (__v4sf) vec_splat ((__vector long long)__A, 0); - rounded = vec_rint(temp); - result = (__vector unsigned long long) vec_cts (rounded, 0); + __temp = (__v4sf) vec_splat ((__vector long long)__A, 0); + __rounded = vec_rint (__temp); + __result = (__vector unsigned long long) vec_cts (__rounded, 0); - return (__m64) ((__vector long long) result)[0]; + return (__m64) ((__vector long long) __result)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1014,9 +1014,9 @@ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artifici _mm_cvttss_si32 (__m128 __A) { /* Extract the lower float element. */ - float temp = __A[0]; + float __temp = __A[0]; /* truncate to 32-bit integer and return. */ - return temp; + return __temp; } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1030,9 +1030,9 @@ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __ar _mm_cvttss_si64 (__m128 __A) { /* Extract the lower float element. */ - float temp = __A[0]; + float __temp = __A[0]; /* truncate to 32-bit integer and return. */ - return temp; + return __temp; } /* Microsoft intrinsic. */ @@ -1040,9 +1040,9 @@ extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __ar _mm_cvttss_si64x (__m128 __A) { /* Extract the lower float element. */ - float temp = __A[0]; + float __temp = __A[0]; /* truncate to 32-bit integer and return. */ - return temp; + return __temp; } /* Truncate the two lower SPFP values to 32-bit integers. Return the @@ -1050,14 +1050,14 @@ _mm_cvttss_si64x (__m128 __A) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvttps_pi32 (__m128 __A) { - __v4sf temp; - __vector unsigned long long result; + __v4sf __temp; + __vector unsigned long long __result; /* Splat two lower SPFP values to both halves. */ - temp = (__v4sf) vec_splat ((__vector long long)__A, 0); - result = (__vector unsigned long long) vec_cts (temp, 0); + __temp = (__v4sf) vec_splat ((__vector long long)__A, 0); + __result = (__vector unsigned long long) vec_cts (__temp, 0); - return (__m64) ((__vector long long) result)[0]; + return (__m64) ((__vector long long) __result)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1070,8 +1070,8 @@ _mm_cvtt_ps2pi (__m128 __A) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi32_ss (__m128 __A, int __B) { - float temp = __B; - __A[0] = temp; + float __temp = __B; + __A[0] = __temp; return __A; } @@ -1087,8 +1087,8 @@ _mm_cvt_si2ss (__m128 __A, int __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtsi64_ss (__m128 __A, long long __B) { - float temp = __B; - __A[0] = temp; + float __temp = __B; + __A[0] = __temp; return __A; } @@ -1105,14 +1105,14 @@ _mm_cvtsi64x_ss (__m128 __A, long long __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32_ps (__m128 __A, __m64 __B) { - __vector signed int vm1; - __vector float vf1; + __vector signed int __vm1; + __vector float __vf1; - vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B}; - vf1 = (__vector float) vec_ctf (vm1, 0); + __vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B}; + __vf1 = (__vector float) vec_ctf (__vm1, 0); return ((__m128) (__vector unsigned long long) - { ((__vector unsigned long long)vf1) [0], + { ((__vector unsigned long long)__vf1) [0], ((__vector unsigned long long)__A) [1]}); } @@ -1126,54 +1126,54 @@ _mm_cvt_pi2ps (__m128 __A, __m64 __B) extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi16_ps (__m64 __A) { - __vector signed short vs8; - __vector signed int vi4; - __vector float vf1; + __vector signed short __vs8; + __vector signed int __vi4; + __vector float __vf1; - vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A }; - vi4 = vec_vupklsh (vs8); - vf1 = (__vector float) vec_ctf (vi4, 0); + __vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A }; + __vi4 = vec_vupklsh (__vs8); + __vf1 = (__vector float) vec_ctf (__vi4, 0); - return (__m128) vf1; + return (__m128) __vf1; } /* Convert the four unsigned 16-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpu16_ps (__m64 __A) { - const __vector unsigned short zero = + const __vector unsigned short __zero = { 0, 0, 0, 0, 0, 0, 0, 0 }; - __vector unsigned short vs8; - __vector unsigned int vi4; - __vector float vf1; + __vector unsigned short __vs8; + __vector unsigned int __vi4; + __vector float __vf1; - vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A }; - vi4 = (__vector unsigned int) vec_mergel + __vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A }; + __vi4 = (__vector unsigned int) vec_mergel #ifdef __LITTLE_ENDIAN__ - (vs8, zero); + (__vs8, __zero); #else - (zero, vs8); + (__zero, __vs8); #endif - vf1 = (__vector float) vec_ctf (vi4, 0); + __vf1 = (__vector float) vec_ctf (__vi4, 0); - return (__m128) vf1; + return (__m128) __vf1; } /* Convert the low four signed 8-bit values in A to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi8_ps (__m64 __A) { - __vector signed char vc16; - __vector signed short vs8; - __vector signed int vi4; - __vector float vf1; + __vector signed char __vc16; + __vector signed short __vs8; + __vector signed int __vi4; + __vector float __vf1; - vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A }; - vs8 = vec_vupkhsb (vc16); - vi4 = vec_vupkhsh (vs8); - vf1 = (__vector float) vec_ctf (vi4, 0); + __vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A }; + __vs8 = vec_vupkhsb (__vc16); + __vi4 = vec_vupkhsh (__vs8); + __vf1 = (__vector float) vec_ctf (__vi4, 0); - return (__m128) vf1; + return (__m128) __vf1; } /* Convert the low four unsigned 8-bit values in A to SPFP form. */ @@ -1181,70 +1181,70 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __art _mm_cvtpu8_ps (__m64 __A) { - const __vector unsigned char zero = + const __vector unsigned char __zero = { 0, 0, 0, 0, 0, 0, 0, 0 }; - __vector unsigned char vc16; - __vector unsigned short vs8; - __vector unsigned int vi4; - __vector float vf1; + __vector unsigned char __vc16; + __vector unsigned short __vs8; + __vector unsigned int __vi4; + __vector float __vf1; - vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A }; + __vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A }; #ifdef __LITTLE_ENDIAN__ - vs8 = (__vector unsigned short) vec_mergel (vc16, zero); - vi4 = (__vector unsigned int) vec_mergeh (vs8, - (__vector unsigned short) zero); + __vs8 = (__vector unsigned short) vec_mergel (__vc16, __zero); + __vi4 = (__vector unsigned int) vec_mergeh (__vs8, + (__vector unsigned short) __zero); #else - vs8 = (__vector unsigned short) vec_mergel (zero, vc16); - vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) zero, - vs8); + __vs8 = (__vector unsigned short) vec_mergel (__zero, __vc16); + __vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) __zero, + __vs8); #endif - vf1 = (__vector float) vec_ctf (vi4, 0); + __vf1 = (__vector float) vec_ctf (__vi4, 0); - return (__m128) vf1; + return (__m128) __vf1; } /* Convert the four signed 32-bit values in A and B to SPFP form. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtpi32x2_ps (__m64 __A, __m64 __B) { - __vector signed int vi4; - __vector float vf4; + __vector signed int __vi4; + __vector float __vf4; - vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B }; - vf4 = (__vector float) vec_ctf (vi4, 0); - return (__m128) vf4; + __vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B }; + __vf4 = (__vector float) vec_ctf (__vi4, 0); + return (__m128) __vf4; } /* Convert the four SPFP values in A to four signed 16-bit integers. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi16 (__m128 __A) { - __v4sf rounded; - __vector signed int temp; - __vector unsigned long long result; + __v4sf __rounded; + __vector signed int __temp; + __vector unsigned long long __result; - rounded = vec_rint(__A); - temp = vec_cts (rounded, 0); - result = (__vector unsigned long long) vec_pack (temp, temp); + __rounded = vec_rint(__A); + __temp = vec_cts (__rounded, 0); + __result = (__vector unsigned long long) vec_pack (__temp, __temp); - return (__m64) ((__vector long long) result)[0]; + return (__m64) ((__vector long long) __result)[0]; } /* Convert the four SPFP values in A to four signed 8-bit integers. */ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_cvtps_pi8 (__m128 __A) { - __v4sf rounded; - __vector signed int tmp_i; - static const __vector signed int zero = {0, 0, 0, 0}; - __vector signed short tmp_s; - __vector signed char res_v; + __v4sf __rounded; + __vector signed int __tmp_i; + static const __vector signed int __zero = {0, 0, 0, 0}; + __vector signed short __tmp_s; + __vector signed char __res_v; - rounded = vec_rint(__A); - tmp_i = vec_cts (rounded, 0); - tmp_s = vec_pack (tmp_i, zero); - res_v = vec_pack (tmp_s, tmp_s); - return (__m64) ((__vector long long) res_v)[0]; + __rounded = vec_rint(__A); + __tmp_i = vec_cts (__rounded, 0); + __tmp_s = vec_pack (__tmp_i, __zero); + __res_v = vec_pack (__tmp_s, __tmp_s); + return (__m64) ((__vector long long) __res_v)[0]; } /* Selects four specific SPFP values from A and B based on MASK. */ @@ -1252,11 +1252,11 @@ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __art _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) { - unsigned long element_selector_10 = __mask & 0x03; - unsigned long element_selector_32 = (__mask >> 2) & 0x03; - unsigned long element_selector_54 = (__mask >> 4) & 0x03; - unsigned long element_selector_76 = (__mask >> 6) & 0x03; - static const unsigned int permute_selectors[4] = + unsigned long __element_selector_10 = __mask & 0x03; + unsigned long __element_selector_32 = (__mask >> 2) & 0x03; + unsigned long __element_selector_54 = (__mask >> 4) & 0x03; + unsigned long __element_selector_76 = (__mask >> 6) & 0x03; + static const unsigned int __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C @@ -1264,13 +1264,13 @@ _mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F #endif }; - __vector unsigned int t; + __vector unsigned int __t; - t[0] = permute_selectors[element_selector_10]; - t[1] = permute_selectors[element_selector_32]; - t[2] = permute_selectors[element_selector_54] + 0x10101010; - t[3] = permute_selectors[element_selector_76] + 0x10101010; - return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)t); + __t[0] = __permute_selectors[__element_selector_10]; + __t[1] = __permute_selectors[__element_selector_32]; + __t[2] = __permute_selectors[__element_selector_54] + 0x10101010; + __t[3] = __permute_selectors[__element_selector_76] + 0x10101010; + return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)__t); } /* Selects and interleaves the upper two SPFP values from A and B. */ @@ -1355,8 +1355,8 @@ _mm_movemask_ps (__m128 __A) #ifdef _ARCH_PWR10 return vec_extractm ((__vector unsigned int) __A); #else - __vector unsigned long long result; - static const __vector unsigned int perm_mask = + __vector unsigned long long __result; + static const __vector unsigned int __perm_mask = { #ifdef __LITTLE_ENDIAN__ 0x00204060, 0x80808080, 0x80808080, 0x80808080 @@ -1365,14 +1365,14 @@ _mm_movemask_ps (__m128 __A) #endif }; - result = ((__vector unsigned long long) + __result = ((__vector unsigned long long) vec_vbpermq ((__vector unsigned char) __A, - (__vector unsigned char) perm_mask)); + (__vector unsigned char) __perm_mask)); #ifdef __LITTLE_ENDIAN__ - return result[1]; + return __result[1]; #else - return result[0]; + return __result[0]; #endif #endif /* !_ARCH_PWR10 */ } @@ -1395,12 +1395,12 @@ _mm_load_ps1 (float const *__P) extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_extract_pi16 (__m64 const __A, int const __N) { - unsigned int shiftr = __N & 3; + unsigned int __shiftr = __N & 3; #ifdef __BIG_ENDIAN__ - shiftr = 3 - shiftr; + __shiftr = 3 - __shiftr; #endif - return ((__A >> (shiftr * 16)) & 0xffff); + return ((__A >> (__shiftr * 16)) & 0xffff); } extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1414,12 +1414,12 @@ _m_pextrw (__m64 const __A, int const __N) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_insert_pi16 (__m64 const __A, int const __D, int const __N) { - const int shiftl = (__N & 3) * 16; - const __m64 shiftD = (const __m64) __D << shiftl; - const __m64 mask = 0xffffUL << shiftl; - __m64 result = (__A & (~mask)) | (shiftD & mask); + const int __shiftl = (__N & 3) * 16; + const __m64 __shiftD = (const __m64) __D << __shiftl; + const __m64 __mask = 0xffffUL << __shiftl; + __m64 __result = (__A & (~__mask)) | (__shiftD & __mask); - return (result); + return __result; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1434,30 +1434,30 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_max_pi16 (__m64 __A, __m64 __B) { #if _ARCH_PWR8 - __vector signed short a, b, r; - __vector __bool short c; - - a = (__vector signed short)vec_splats (__A); - b = (__vector signed short)vec_splats (__B); - c = (__vector __bool short)vec_cmpgt (a, b); - r = vec_sel (b, a, c); - return (__m64) ((__vector long long) r)[0]; + __vector signed short __a, __b, __r; + __vector __bool short __c; + + __a = (__vector signed short)vec_splats (__A); + __b = (__vector signed short)vec_splats (__B); + __c = (__vector __bool short)vec_cmpgt (__a, __b); + __r = vec_sel (__b, __a, __c); + return (__m64) ((__vector long long) __r)[0]; #else - __m64_union m1, m2, res; + __m64_union __m1, __m2, __res; - m1.as_m64 = __A; - m2.as_m64 = __B; + __m1.as_m64 = __A; + __m2.as_m64 = __B; - res.as_short[0] = - (m1.as_short[0] > m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0]; - res.as_short[1] = - (m1.as_short[1] > m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1]; - res.as_short[2] = - (m1.as_short[2] > m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2]; - res.as_short[3] = - (m1.as_short[3] > m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3]; + __res.as_short[0] = + (__m1.as_short[0] > __m2.as_short[0]) ? __m1.as_short[0] : __m2.as_short[0]; + __res.as_short[1] = + (__m1.as_short[1] > __m2.as_short[1]) ? __m1.as_short[1] : __m2.as_short[1]; + __res.as_short[2] = + (__m1.as_short[2] > __m2.as_short[2]) ? __m1.as_short[2] : __m2.as_short[2]; + __res.as_short[3] = + (__m1.as_short[3] > __m2.as_short[3]) ? __m1.as_short[3] : __m2.as_short[3]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -1472,28 +1472,27 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_max_pu8 (__m64 __A, __m64 __B) { #if _ARCH_PWR8 - __vector unsigned char a, b, r; - __vector __bool char c; - - a = (__vector unsigned char)vec_splats (__A); - b = (__vector unsigned char)vec_splats (__B); - c = (__vector __bool char)vec_cmpgt (a, b); - r = vec_sel (b, a, c); - return (__m64) ((__vector long long) r)[0]; + __vector unsigned char __a, __b, __r; + __vector __bool char __c; + + __a = (__vector unsigned char)vec_splats (__A); + __b = (__vector unsigned char)vec_splats (__B); + __c = (__vector __bool char)vec_cmpgt (__a, __b); + __r = vec_sel (__b, __a, __c); + return (__m64) ((__vector long long) __r)[0]; #else - __m64_union m1, m2, res; - long i; + __m64_union __m1, __m2, __res; + long __i; - m1.as_m64 = __A; - m2.as_m64 = __B; + __m1.as_m64 = __A; + __m2.as_m64 = __B; + for (__i = 0; __i < 8; __i++) + __res.as_char[__i] = + ((unsigned char) __m1.as_char[__i] > (unsigned char) __m2.as_char[__i]) ? + __m1.as_char[__i] : __m2.as_char[__i]; - for (i = 0; i < 8; i++) - res.as_char[i] = - ((unsigned char) m1.as_char[i] > (unsigned char) m2.as_char[i]) ? - m1.as_char[i] : m2.as_char[i]; - - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -1508,30 +1507,30 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_min_pi16 (__m64 __A, __m64 __B) { #if _ARCH_PWR8 - __vector signed short a, b, r; - __vector __bool short c; - - a = (__vector signed short)vec_splats (__A); - b = (__vector signed short)vec_splats (__B); - c = (__vector __bool short)vec_cmplt (a, b); - r = vec_sel (b, a, c); - return (__m64) ((__vector long long) r)[0]; + __vector signed short __a, __b, __r; + __vector __bool short __c; + + __a = (__vector signed short)vec_splats (__A); + __b = (__vector signed short)vec_splats (__B); + __c = (__vector __bool short)vec_cmplt (__a, __b); + __r = vec_sel (__b, __a, __c); + return (__m64) ((__vector long long) __r)[0]; #else - __m64_union m1, m2, res; + __m64_union __m1, __m2, __res; - m1.as_m64 = __A; - m2.as_m64 = __B; + __m1.as_m64 = __A; + __m2.as_m64 = __B; - res.as_short[0] = - (m1.as_short[0] < m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0]; - res.as_short[1] = - (m1.as_short[1] < m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1]; - res.as_short[2] = - (m1.as_short[2] < m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2]; - res.as_short[3] = - (m1.as_short[3] < m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3]; + __res.as_short[0] = + (__m1.as_short[0] < __m2.as_short[0]) ? __m1.as_short[0] : __m2.as_short[0]; + __res.as_short[1] = + (__m1.as_short[1] < __m2.as_short[1]) ? __m1.as_short[1] : __m2.as_short[1]; + __res.as_short[2] = + (__m1.as_short[2] < __m2.as_short[2]) ? __m1.as_short[2] : __m2.as_short[2]; + __res.as_short[3] = + (__m1.as_short[3] < __m2.as_short[3]) ? __m1.as_short[3] : __m2.as_short[3]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -1546,28 +1545,28 @@ extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artifi _mm_min_pu8 (__m64 __A, __m64 __B) { #if _ARCH_PWR8 - __vector unsigned char a, b, r; - __vector __bool char c; - - a = (__vector unsigned char)vec_splats (__A); - b = (__vector unsigned char)vec_splats (__B); - c = (__vector __bool char)vec_cmplt (a, b); - r = vec_sel (b, a, c); - return (__m64) ((__vector long long) r)[0]; + __vector unsigned char __a, __b, __r; + __vector __bool char __c; + + __a = (__vector unsigned char)vec_splats (__A); + __b = (__vector unsigned char)vec_splats (__B); + __c = (__vector __bool char)vec_cmplt (__a, __b); + __r = vec_sel (__b, __a, __c); + return (__m64) ((__vector long long) __r)[0]; #else - __m64_union m1, m2, res; - long i; + __m64_union __m1, __m2, __res; + long __i; - m1.as_m64 = __A; - m2.as_m64 = __B; + __m1.as_m64 = __A; + __m2.as_m64 = __B; - for (i = 0; i < 8; i++) - res.as_char[i] = - ((unsigned char) m1.as_char[i] < (unsigned char) m2.as_char[i]) ? - m1.as_char[i] : m2.as_char[i]; + for (__i = 0; __i < 8; __i++) + __res.as_char[__i] = + ((unsigned char) __m1.as_char[__i] < (unsigned char) __m2.as_char[__i]) ? + __m1.as_char[__i] : __m2.as_char[__i]; - return (__m64) res.as_m64; + return (__m64) __res.as_m64; #endif } @@ -1582,24 +1581,24 @@ extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artifici _mm_movemask_pi8 (__m64 __A) { #ifdef __powerpc64__ - unsigned long long p = + unsigned long long __p = #ifdef __LITTLE_ENDIAN__ 0x0008101820283038UL; // permute control for sign bits #else 0x3830282018100800UL; // permute control for sign bits #endif - return __builtin_bpermd (p, __A); + return __builtin_bpermd (__p, __A); #else #ifdef __LITTLE_ENDIAN__ - unsigned int mask = 0x20283038UL; - unsigned int r1 = __builtin_bpermd (mask, __A) & 0xf; - unsigned int r2 = __builtin_bpermd (mask, __A >> 32) & 0xf; + unsigned int __mask = 0x20283038UL; + unsigned int __r1 = __builtin_bpermd (__mask, __A) & 0xf; + unsigned int __r2 = __builtin_bpermd (__mask, __A >> 32) & 0xf; #else - unsigned int mask = 0x38302820UL; - unsigned int r1 = __builtin_bpermd (mask, __A >> 32) & 0xf; - unsigned int r2 = __builtin_bpermd (mask, __A) & 0xf; + unsigned int __mask = 0x38302820UL; + unsigned int __r1 = __builtin_bpermd (__mask, __A >> 32) & 0xf; + unsigned int __r2 = __builtin_bpermd (__mask, __A) & 0xf; #endif - return (r2 << 4) | r1; + return (__r2 << 4) | __r1; #endif } @@ -1614,10 +1613,10 @@ _m_pmovmskb (__m64 __A) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_mulhi_pu16 (__m64 __A, __m64 __B) { - __vector unsigned short a, b; - __vector unsigned short c; - __vector unsigned int w0, w1; - __vector unsigned char xform1 = { + __vector unsigned short __a, __b; + __vector unsigned short __c; + __vector unsigned int __w0, __w1; + __vector unsigned char __xform1 = { #ifdef __LITTLE_ENDIAN__ 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F @@ -1627,14 +1626,14 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B) #endif }; - a = (__vector unsigned short)vec_splats (__A); - b = (__vector unsigned short)vec_splats (__B); + __a = (__vector unsigned short)vec_splats (__A); + __b = (__vector unsigned short)vec_splats (__B); - w0 = vec_vmuleuh (a, b); - w1 = vec_vmulouh (a, b); - c = (__vector unsigned short)vec_perm (w0, w1, xform1); + __w0 = vec_vmuleuh (__a, __b); + __w1 = vec_vmulouh (__a, __b); + __c = (__vector unsigned short)vec_perm (__w0, __w1, __xform1); - return (__m64) ((__vector long long) c)[0]; + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1648,11 +1647,11 @@ _m_pmulhuw (__m64 __A, __m64 __B) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_shuffle_pi16 (__m64 __A, int const __N) { - unsigned long element_selector_10 = __N & 0x03; - unsigned long element_selector_32 = (__N >> 2) & 0x03; - unsigned long element_selector_54 = (__N >> 4) & 0x03; - unsigned long element_selector_76 = (__N >> 6) & 0x03; - static const unsigned short permute_selectors[4] = + unsigned long __element_selector_10 = __N & 0x03; + unsigned long __element_selector_32 = (__N >> 2) & 0x03; + unsigned long __element_selector_54 = (__N >> 4) & 0x03; + unsigned long __element_selector_76 = (__N >> 6) & 0x03; + static const unsigned short __permute_selectors[4] = { #ifdef __LITTLE_ENDIAN__ 0x0908, 0x0B0A, 0x0D0C, 0x0F0E @@ -1660,24 +1659,24 @@ _mm_shuffle_pi16 (__m64 __A, int const __N) 0x0607, 0x0405, 0x0203, 0x0001 #endif }; - __m64_union t; - __vector unsigned long long a, p, r; + __m64_union __t; + __vector unsigned long long __a, __p, __r; #ifdef __LITTLE_ENDIAN__ - t.as_short[0] = permute_selectors[element_selector_10]; - t.as_short[1] = permute_selectors[element_selector_32]; - t.as_short[2] = permute_selectors[element_selector_54]; - t.as_short[3] = permute_selectors[element_selector_76]; + __t.as_short[0] = __permute_selectors[__element_selector_10]; + __t.as_short[1] = __permute_selectors[__element_selector_32]; + __t.as_short[2] = __permute_selectors[__element_selector_54]; + __t.as_short[3] = __permute_selectors[__element_selector_76]; #else - t.as_short[3] = permute_selectors[element_selector_10]; - t.as_short[2] = permute_selectors[element_selector_32]; - t.as_short[1] = permute_selectors[element_selector_54]; - t.as_short[0] = permute_selectors[element_selector_76]; + __t.as_short[3] = __permute_selectors[__element_selector_10]; + __t.as_short[2] = __permute_selectors[__element_selector_32]; + __t.as_short[1] = __permute_selectors[__element_selector_54]; + __t.as_short[0] = __permute_selectors[__element_selector_76]; #endif - p = vec_splats (t.as_m64); - a = vec_splats (__A); - r = vec_perm (a, a, (__vector unsigned char)p); - return (__m64) ((__vector long long) r)[0]; + __p = vec_splats (__t.as_m64); + __a = vec_splats (__A); + __r = vec_perm (__a, __a, (__vector unsigned char)__p); + return (__m64) ((__vector long long) __r)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1692,14 +1691,14 @@ _m_pshufw (__m64 __A, int const __N) extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) { - __m64 hibit = 0x8080808080808080UL; - __m64 mask, tmp; - __m64 *p = (__m64*)__P; + __m64 __hibit = 0x8080808080808080UL; + __m64 __mask, __tmp; + __m64 *__p = (__m64*)__P; - tmp = *p; - mask = _mm_cmpeq_pi8 ((__N & hibit), hibit); - tmp = (tmp & (~mask)) | (__A & mask); - *p = tmp; + __tmp = *__p; + __mask = _mm_cmpeq_pi8 ((__N & __hibit), __hibit); + __tmp = (__tmp & (~__mask)) | (__A & __mask); + *__p = __tmp; } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1712,12 +1711,12 @@ _m_maskmovq (__m64 __A, __m64 __N, char *__P) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_pu8 (__m64 __A, __m64 __B) { - __vector unsigned char a, b, c; + __vector unsigned char __a, __b, __c; - a = (__vector unsigned char)vec_splats (__A); - b = (__vector unsigned char)vec_splats (__B); - c = vec_avg (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector unsigned char)vec_splats (__A); + __b = (__vector unsigned char)vec_splats (__B); + __c = vec_avg (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1730,12 +1729,12 @@ _m_pavgb (__m64 __A, __m64 __B) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_avg_pu16 (__m64 __A, __m64 __B) { - __vector unsigned short a, b, c; + __vector unsigned short __a, __b, __c; - a = (__vector unsigned short)vec_splats (__A); - b = (__vector unsigned short)vec_splats (__B); - c = vec_avg (a, b); - return (__m64) ((__vector long long) c)[0]; + __a = (__vector unsigned short)vec_splats (__A); + __b = (__vector unsigned short)vec_splats (__B); + __c = vec_avg (__a, __b); + return (__m64) ((__vector long long) __c)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -1750,26 +1749,26 @@ _m_pavgw (__m64 __A, __m64 __B) extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_sad_pu8 (__m64 __A, __m64 __B) { - __vector unsigned char a, b; - __vector unsigned char vmin, vmax, vabsdiff; - __vector signed int vsum; - const __vector unsigned int zero = + __vector unsigned char __a, __b; + __vector unsigned char __vmin, __vmax, __vabsdiff; + __vector signed int __vsum; + const __vector unsigned int __zero = { 0, 0, 0, 0 }; - __m64_union result = {0}; + __m64_union __result = {0}; - a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A }; - b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B }; - vmin = vec_min (a, b); - vmax = vec_max (a, b); - vabsdiff = vec_sub (vmax, vmin); + __a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A }; + __b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B }; + __vmin = vec_min (__a, __b); + __vmax = vec_max (__a, __b); + __vabsdiff = vec_sub (__vmax, __vmin); /* Sum four groups of bytes into integers. */ - vsum = (__vector signed int) vec_sum4s (vabsdiff, zero); + __vsum = (__vector signed int) vec_sum4s (__vabsdiff, __zero); /* Sum across four integers with integer result. */ - vsum = vec_sums (vsum, (__vector signed int) zero); + __vsum = vec_sums (__vsum, (__vector signed int) __zero); /* The sum is in the right most 32-bits of the vector result. Transfer to a GPR and truncate to 16 bits. */ - result.as_short[0] = vsum[3]; - return result.as_m64; + __result.as_short[0] = __vsum[3]; + return __result.as_m64; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 43c5c72..d2af6d8 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -3636,7 +3636,7 @@ s390_rtx_costs (rtx x, machine_mode mode, int outer_code, /* It is going to be a load/store on condition. Make it slightly more expensive than a normal load. */ - *total = COSTS_N_INSNS (1) + 1; + *total = COSTS_N_INSNS (1) + 2; rtx dst = SET_DEST (x); rtx then = XEXP (SET_SRC (x), 1); @@ -15903,7 +15903,7 @@ s390_valid_target_attribute_inner_p (tree args, /* Process the option. */ if (!found) { - error ("attribute(target(\"%s\")) is unknown", orig_p); + error ("attribute %<target%> argument %qs is unknown", orig_p); return false; } else if (attrs[i].only_as_pragma && !force_pragma) @@ -15953,7 +15953,7 @@ s390_valid_target_attribute_inner_p (tree args, } else { - error ("attribute(target(\"%s\")) is unknown", orig_p); + error ("attribute %<target%> argument %qs is unknown", orig_p); ret = false; } } @@ -15970,7 +15970,7 @@ s390_valid_target_attribute_inner_p (tree args, global_dc); else { - error ("attribute(target(\"%s\")) is unknown", orig_p); + error ("attribute %<target%> argument %qs is unknown", orig_p); ret = false; } } @@ -16091,6 +16091,23 @@ s390_valid_target_attribute_p (tree fndecl, static bool s390_can_inline_p (tree caller, tree callee) { + /* Flags which if present in the callee are required in the caller as well. */ + const unsigned HOST_WIDE_INT caller_required_masks = MASK_OPT_HTM; + + /* Flags which affect the ABI and in general prevent inlining. */ + unsigned HOST_WIDE_INT must_match_masks + = (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_SOFT_FLOAT + | MASK_LONG_DOUBLE_128 | MASK_OPT_VX); + + /* Flags which we in general want to prevent inlining but accept for + always_inline. */ + const unsigned HOST_WIDE_INT always_inline_safe_masks + = MASK_MVCLE | MASK_BACKCHAIN | MASK_SMALL_EXEC; + + const HOST_WIDE_INT all_masks + = (caller_required_masks | must_match_masks | always_inline_safe_masks + | MASK_DEBUG_ARG | MASK_PACKED_STACK | MASK_ZVECTOR); + tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); @@ -16103,16 +16120,18 @@ s390_can_inline_p (tree caller, tree callee) struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); - bool ret = true; - if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)) - != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))) - ret = false; + /* If one of these triggers make sure to add proper handling of your + new flag to this hook. */ + gcc_assert (!(caller_opts->x_target_flags & ~all_masks)); + gcc_assert (!(callee_opts->x_target_flags & ~all_masks)); - /* Don't inline functions to be compiled for a more recent arch into a - function for an older arch. */ - else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch) - ret = false; + bool always_inline + = (DECL_DISREGARD_INLINE_LIMITS (callee) + && lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee))); + + if (!always_inline) + must_match_masks |= always_inline_safe_masks; /* Inlining a hard float function into a soft float function is only allowed if the hard float function doesn't actually make use of @@ -16120,16 +16139,27 @@ s390_can_inline_p (tree caller, tree callee) We are called from FEs for multi-versioning call optimization, so beware of ipa_fn_summaries not available. */ - else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags) - && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags)) - || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags) - && TARGET_HARD_DFP_P (callee_opts->x_target_flags))) - && (! ipa_fn_summaries - || ipa_fn_summaries->get - (cgraph_node::get (callee))->fp_expressions)) - ret = false; + if (always_inline && ipa_fn_summaries + && !ipa_fn_summaries->get(cgraph_node::get (callee))->fp_expressions) + must_match_masks &= ~(MASK_HARD_DFP | MASK_SOFT_FLOAT); - return ret; + if ((caller_opts->x_target_flags & must_match_masks) + != (callee_opts->x_target_flags & must_match_masks)) + return false; + + if (~(caller_opts->x_target_flags & caller_required_masks) + & (callee_opts->x_target_flags & caller_required_masks)) + return false; + + /* Don't inline functions to be compiled for a more recent arch into a + function for an older arch. */ + if (caller_opts->x_s390_arch < callee_opts->x_s390_arch) + return false; + + if (!always_inline && caller_opts->x_s390_tune != callee_opts->x_s390_tune) + return false; + + return true; } #endif @@ -16809,7 +16839,6 @@ s390_code_end (void) assemble_name_raw (asm_out_file, label_start); fputs ("-.\n", asm_out_file); } - switch_to_section (current_function_section ()); } } } diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index e3ccbac..5eee8e8 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -7003,9 +7003,9 @@ if (!CONSTANT_P (els)) els = simplify_gen_subreg (E_SImode, els, <MODE>mode, 0); - rtx tmp_target = gen_reg_rtx (E_SImode); + rtx tmp_target = simplify_gen_subreg (E_SImode, operands[0], <MODE>mode, 0); + emit_insn (gen_movsicc (tmp_target, operands[1], then, els)); - emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp_target)); DONE; }) diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux index d33c638..4866dac 100644 --- a/gcc/config/sh/t-linux +++ b/gcc/config/sh/t-linux @@ -1,2 +1,3 @@ -MULTILIB_DIRNAMES= -MULTILIB_MATCHES = +MULTILIB_DIRNAMES= +MULTILIB_MATCHES= +MULTILIB_EXCEPTIONS=m1 mb/m1 m2a diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h index 46823b6..d08a2ef 100644 --- a/gcc/config/sparc/linux64.h +++ b/gcc/config/sparc/linux64.h @@ -35,8 +35,8 @@ along with GCC; see the file COPYING3. If not see #if defined(TARGET_64BIT_DEFAULT) && TARGET_CPU_DEFAULT >= TARGET_CPU_v9 #undef TARGET_DEFAULT #define TARGET_DEFAULT \ - (MASK_V9 + MASK_PTR64 + MASK_64BIT + MASK_STACK_BIAS + \ - MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128) + (MASK_V9 + MASK_64BIT + MASK_PTR64 + MASK_STACK_BIAS + \ + MASK_V8PLUS + MASK_APP_REGS + MASK_FPU + MASK_LONG_DOUBLE_128) #endif /* This must be v9a not just v9 because by default we enable diff --git a/gcc/config/v850/v850.md b/gcc/config/v850/v850.md index ed51157..6ca31e3 100644 --- a/gcc/config/v850/v850.md +++ b/gcc/config/v850/v850.md @@ -2601,7 +2601,12 @@ (set_attr "type" "fpu")]) ;;; negative-multiply-add -(define_insn "fnmasf4" +;; Note the name on this and the following insn were previously fnmasf4 +;; and fnmssf4. Those names are known to the gimple->rtl expanders and +;; must implement specific semantics (negating one of the inputs to the +;; multiplication). The v850 instructions actually negate the entire +;; result. Thus the names have been changed and hidden. +(define_insn "*v850_fnmasf4" [(set (match_operand:SF 0 "register_operand" "=r") (neg:SF (fma:SF (match_operand:SF 1 "register_operand" "r") (match_operand:SF 2 "register_operand" "r") @@ -2612,7 +2617,7 @@ (set_attr "type" "fpu")]) ;; negative-multiply-subtract -(define_insn "fnmssf4" +(define_insn "*v850_fnmssf4" [(set (match_operand:SF 0 "register_operand" "=r") (neg:SF (fma:SF (match_operand:SF 1 "register_operand" "r") (match_operand:SF 2 "register_operand" "r") |