diff options
Diffstat (limited to 'gcc/config')
34 files changed, 1430 insertions, 465 deletions
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def index 0e22d72..7f204fd 100644 --- a/gcc/config/aarch64/aarch64-cores.def +++ b/gcc/config/aarch64/aarch64-cores.def @@ -173,6 +173,22 @@ AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, (F AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15, -1) AARCH64_CORE("cortex-r82ae", cortexr82ae, cortexa53, V8R, (), cortexa53, 0x41, 0xd14, -1) +/* Apple (A12 and M) cores. + Known part numbers as listed in other public sources. + Placeholders for schedulers, generic_armv8_a for costs. + A12 seems mostly 8.3, M1 is 8.5 without BTI, M2 and M3 are 8.6 + From measurements made so far the odd-number core IDs are performance. */ +AARCH64_CORE("apple-a12", applea12, cortexa53, V8_3A, (), generic_armv8_a, 0x61, 0x12, -1) +AARCH64_CORE("apple-m1", applem1_0, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x21, 0x20), -1) +AARCH64_CORE("apple-m1", applem1_1, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x23, 0x22), -1) +AARCH64_CORE("apple-m1", applem1_2, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x25, 0x24), -1) +AARCH64_CORE("apple-m1", applem1_3, cortexa57, V8_5A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x29, 0x28), -1) +AARCH64_CORE("apple-m2", applem2_0, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x31, 0x30), -1) +AARCH64_CORE("apple-m2", applem2_1, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x33, 0x32), -1) +AARCH64_CORE("apple-m2", applem2_2, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x35, 0x34), -1) +AARCH64_CORE("apple-m2", applem2_3, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x39, 0x38), -1) +AARCH64_CORE("apple-m3", applem3_0, cortexa57, V8_6A, (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x49, 0x48), -1) + /* Armv9.0-A Architecture Processors. */ /* Arm ('A') cores. */ diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def index 79b7935..dbbb021 100644 --- a/gcc/config/aarch64/aarch64-option-extensions.def +++ b/gcc/config/aarch64/aarch64-option-extensions.def @@ -207,7 +207,7 @@ AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4)) AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "sve2p1") -AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme") +AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, FCMA, F16, F16FML), (), (), "sme") AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "") diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index 44e4807..3651926 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -5174,7 +5174,11 @@ bool verify_type_context (location_t loc, type_context_kind context, const_tree type, bool silent_p) { - if (!sizeless_type_p (type)) + const_tree tmp = type; + if (omp_type_context (context) && POINTER_TYPE_P (type)) + tmp = strip_pointer_types (tmp); + + if (!sizeless_type_p (tmp)) return true; switch (context) @@ -5234,6 +5238,37 @@ verify_type_context (location_t loc, type_context_kind context, if (!silent_p) error_at (loc, "capture by copy of SVE type %qT", type); return false; + + case TCTX_OMP_MAP: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in %<map%> clause", type); + return false; + + case TCTX_OMP_MAP_IMP_REF: + if (!silent_p) + error ("cannot reference %qT object types in %<target%> region", type); + return false; + + case TCTX_OMP_PRIVATE: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in" + " %<target%> %<private%> clause", type); + return false; + + case TCTX_OMP_FIRSTPRIVATE: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in" + " %<target%> %<firstprivate%> clause", type); + return false; + + case TCTX_OMP_DEVICE_ADDR: + if (!silent_p) + error_at (loc, "SVE type %qT not allowed in" + " %<target%> device clauses", type); + return false; + + default: + break; } gcc_unreachable (); } diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md index 56a914f..982074c 100644 --- a/gcc/config/aarch64/aarch64-tune.md +++ b/gcc/config/aarch64/aarch64-tune.md @@ -1,5 +1,5 @@ ;; -*- buffer-read-only: t -*- ;; Generated automatically by gentune.sh from aarch64-cores.def (define_attr "tune" - "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a" + "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,generic,generic_armv8_a,generic_armv9_a" (const (symbol_ref "((enum attr_tune) aarch64_tune)"))) diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 25963c9..4e80114 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -18765,7 +18765,10 @@ aarch64_override_options_internal (struct gcc_options *opts) " option %<-march%>, or by using the %<target%>" " attribute or pragma", "sme"); opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY; - auto new_flags = isa_flags | feature_deps::SME ().enable; + auto new_flags = (isa_flags + | feature_deps::SME ().enable + /* TODO: Remove once we support SME without SVE2. */ + | feature_deps::SVE2 ().enable); aarch64_set_asm_isa_flags (opts, new_flags); } @@ -18892,6 +18895,12 @@ aarch64_override_options_internal (struct gcc_options *opts) SET_OPTION_IF_UNSET (opts, &global_options_set, param_fully_pipelined_fma, 1); + /* TODO: SME codegen without SVE2 is not supported, once this support is added + remove this 'sorry' and the implicit enablement of SVE2 in the checks for + streaming mode above in this function. */ + if (TARGET_SME && !TARGET_SVE2) + sorry ("no support for %qs without %qs", "sme", "sve2"); + aarch64_override_options_after_change_1 (opts); } diff --git a/gcc/config/alpha/alpha-modes.def b/gcc/config/alpha/alpha-modes.def index d2441ef..9392979 100644 --- a/gcc/config/alpha/alpha-modes.def +++ b/gcc/config/alpha/alpha-modes.def @@ -17,6 +17,10 @@ You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +/* 256-bit integer mode used by "reload_out<mode>_safe_bwa" secondary + reload patterns to obtain 4 scratch registers. */ +INT_MODE (OI, 32); + /* 128-bit floating point. This gets reset in alpha_option_override if VAX float format is in use. */ FLOAT_MODE (TF, 16, ieee_quad_format); diff --git a/gcc/config/alpha/alpha-protos.h b/gcc/config/alpha/alpha-protos.h index 1bc5520..b0c8936 100644 --- a/gcc/config/alpha/alpha-protos.h +++ b/gcc/config/alpha/alpha-protos.h @@ -43,6 +43,7 @@ extern enum reg_class alpha_preferred_reload_class (rtx, enum reg_class); extern void alpha_set_memflags (rtx, rtx); extern bool alpha_split_const_mov (machine_mode, rtx *); extern bool alpha_expand_mov (machine_mode, rtx *); +extern bool alpha_expand_mov_safe_bwa (machine_mode, rtx *); extern bool alpha_expand_mov_nobwx (machine_mode, rtx *); extern void alpha_expand_movmisalign (machine_mode, rtx *); extern void alpha_emit_floatuns (rtx[]); @@ -53,12 +54,16 @@ extern void alpha_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT, HOST_WIDE_INT, int); extern void alpha_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT, HOST_WIDE_INT); +extern void alpha_expand_unaligned_store_safe_partial (rtx, rtx, HOST_WIDE_INT, + HOST_WIDE_INT, + HOST_WIDE_INT); extern int alpha_expand_block_move (rtx []); extern int alpha_expand_block_clear (rtx []); extern rtx alpha_expand_zap_mask (HOST_WIDE_INT); extern void alpha_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx), machine_mode, rtx, rtx, rtx); +extern rtx alpha_emit_unlikely_jump (rtx, rtx); extern void alpha_expand_builtin_establish_vms_condition_handler (rtx, rtx); extern void alpha_expand_builtin_revert_vms_condition_handler (rtx); diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 6965ece..ba470d9 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -1661,8 +1661,10 @@ alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, if (!aligned_memory_operand (x, mode)) sri->icode = direct_optab_handler (reload_in_optab, mode); } - else + else if (aligned_memory_operand (x, mode) || !TARGET_SAFE_BWA) sri->icode = direct_optab_handler (reload_out_optab, mode); + else + sri->icode = code_for_reload_out_safe_bwa (mode); return NO_REGS; } } @@ -2391,6 +2393,70 @@ alpha_expand_mov_nobwx (machine_mode mode, rtx *operands) return false; } +/* Expand a multi-thread and async-signal safe QImode or HImode + move instruction; return true if all work is done. */ + +bool +alpha_expand_mov_safe_bwa (machine_mode mode, rtx *operands) +{ + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0])) + operands[1] = force_reg (mode, operands[1]); + + /* If it's a memory load, the sequence is the usual non-BWX one. */ + if (any_memory_operand (operands[1], mode)) + return alpha_expand_mov_nobwx (mode, operands); + + /* Handle memory store cases, unaligned and aligned. The only case + where we can be called during reload is for aligned loads; all + other cases require temporaries. */ + if (any_memory_operand (operands[0], mode)) + { + if (aligned_memory_operand (operands[0], mode)) + { + rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + rtx aligned_mem, bitnum; + rtx status = gen_reg_rtx (SImode); + rtx temp = gen_reg_rtx (SImode); + get_aligned_mem (operands[0], &aligned_mem, &bitnum); + emit_insn (gen_aligned_store_safe_bwa (aligned_mem, operands[1], + bitnum, status, temp)); + + rtx cond = gen_rtx_EQ (DImode, + gen_rtx_SUBREG (DImode, status, 0), + const0_rtx); + alpha_emit_unlikely_jump (cond, label); + } + else + { + rtx addr = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (addr, get_unaligned_address (operands[0]))); + + rtx aligned_addr = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (aligned_addr, + gen_rtx_AND (DImode, addr, GEN_INT (-8)))); + + rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + rtx status = gen_reg_rtx (DImode); + rtx temp = gen_reg_rtx (DImode); + rtx seq = gen_unaligned_store_safe_bwa (mode, addr, operands[1], + aligned_addr, status, temp); + alpha_set_memflags (seq, operands[0]); + emit_insn (seq); + + rtx cond = gen_rtx_EQ (DImode, status, const0_rtx); + alpha_emit_unlikely_jump (cond, label); + } + return true; + } + + return false; +} + /* Implement the movmisalign patterns. One of the operands is a memory that is not naturally aligned. Emit instructions to load it. */ @@ -2415,7 +2481,11 @@ alpha_expand_movmisalign (machine_mode mode, rtx *operands) { if (!reg_or_0_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); - alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_safe_partial (operands[0], operands[1], + 8, 0, BITS_PER_UNIT); + else + alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); } else gcc_unreachable (); @@ -3607,6 +3677,310 @@ alpha_expand_unaligned_store (rtx dst, rtx src, emit_move_insn (meml, dstl); } +/* Store data SRC of size SIZE using unaligned methods to location + referred by base DST plus offset OFS and of alignment ALIGN. This is + a multi-thread and async-signal safe implementation for all sizes from + 8 down to 1. + + For BWX targets it is straightforward, we just write data piecemeal, + taking any advantage of the alignment known and observing that we + shouldn't have been called for alignments of 32 or above in the first + place (though adding support for that would be easy). + + For non-BWX targets we need to load data from memory, mask it such as + to keep any part outside the area written, insert data to be stored, + and write the result back atomically. For sizes that are not a power + of 2 there are no byte mask or insert machine instructions available + so the mask required has to be built by hand, however ZAP and ZAPNOT + instructions can then be used to apply the mask. Since LL/SC loops + are used, the high and low parts have to be disentangled from each + other and handled sequentially except for size 1 where there is only + the low part to be written. */ + +void +alpha_expand_unaligned_store_safe_partial (rtx dst, rtx src, + HOST_WIDE_INT size, + HOST_WIDE_INT ofs, + HOST_WIDE_INT align) +{ + if (TARGET_BWX) + { + machine_mode mode = align >= 2 * BITS_PER_UNIT ? HImode : QImode; + HOST_WIDE_INT step = mode == HImode ? 2 : 1; + + while (1) + { + rtx dstl = src == const0_rtx ? const0_rtx : gen_lowpart (mode, src); + rtx meml = adjust_address (dst, mode, ofs); + emit_move_insn (meml, dstl); + + ofs += step; + size -= step; + if (size == 0) + return; + + if (size < step) + { + mode = QImode; + step = 1; + } + + if (src != const0_rtx) + src = expand_simple_binop (DImode, LSHIFTRT, src, + GEN_INT (step * BITS_PER_UNIT), + NULL, 1, OPTAB_WIDEN); + } + } + + rtx dsta = XEXP (dst, 0); + if (GET_CODE (dsta) == LO_SUM) + dsta = force_reg (Pmode, dsta); + + rtx addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs)); + + rtx byte_mask = NULL_RTX; + switch (size) + { + case 3: + case 5: + case 6: + case 7: + /* If size is not a power of 2 we need to build the byte mask from + size by hand. This is SIZE consecutive bits starting from bit 0. */ + byte_mask = force_reg (DImode, GEN_INT (~(HOST_WIDE_INT_M1U << size))); + + /* Unlike with machine INSxx and MSKxx operations there is no + implicit mask applied to addr with corresponding operations + made by hand, so extract the byte index now. */ + emit_insn (gen_rtx_SET (addr, + gen_rtx_AND (DImode, addr, GEN_INT (~-8)))); + } + + /* Must handle high before low for degenerate case of aligned. */ + if (size != 1) + { + rtx addrh = gen_reg_rtx (DImode); + rtx aligned_addrh = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (addrh, + plus_constant (DImode, dsta, ofs + size - 1))); + emit_insn (gen_rtx_SET (aligned_addrh, + gen_rtx_AND (DImode, addrh, GEN_INT (-8)))); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + rtx memh = change_address (dst, DImode, aligned_addrh); + set_mem_alias_set (memh, 0); + + rtx insh = gen_reg_rtx (DImode); + rtx maskh = NULL_RTX; + switch (size) + { + case 1: + case 2: + case 4: + case 8: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insxh (insh, gen_lowpart (DImode, src), + GEN_INT (size * 8), addr)); + break; + case 3: + case 5: + case 6: + case 7: + { + /* For the high part we shift the byte mask right by 8 minus + the byte index in addr, so we need an extra calculation. */ + rtx shamt = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (shamt, + gen_rtx_MINUS (DImode, + force_reg (DImode, + GEN_INT (8)), + addr))); + + maskh = gen_reg_rtx (DImode); + rtx shift = gen_rtx_LSHIFTRT (DImode, byte_mask, shamt); + emit_insn (gen_rtx_SET (maskh, shift)); + + /* Insert any bytes required by hand, by doing a byte-wise + shift on SRC right by the same number and then zap the + bytes outside the byte mask. */ + if (src != CONST0_RTX (GET_MODE (src))) + { + rtx byte_loc = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (byte_loc, + gen_rtx_ASHIFT (DImode, + shamt, GEN_INT (3)))); + rtx bytes = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (bytes, + gen_rtx_LSHIFTRT (DImode, + gen_lowpart (DImode, + src), + byte_loc))); + + rtx zapmask = gen_rtx_NOT (QImode, + gen_rtx_SUBREG (QImode, maskh, 0)); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), + UNSPEC_ZAP); + emit_insn (gen_rtx_SET (insh, + gen_rtx_AND (DImode, zap, bytes))); + } + } + break; + default: + gcc_unreachable (); + } + + rtx labelh = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (labelh, 0)); + + rtx dsth = gen_reg_rtx (DImode); + emit_insn (gen_load_locked (DImode, dsth, memh)); + + switch (size) + { + case 1: + case 2: + case 4: + case 8: + emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size * 8), addr)); + break; + case 3: + case 5: + case 6: + case 7: + { + rtx zapmask = gen_rtx_SUBREG (QImode, maskh, 0); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), + UNSPEC_ZAP); + emit_insn (gen_rtx_SET (dsth, gen_rtx_AND (DImode, zap, dsth))); + } + break; + default: + gcc_unreachable (); + } + + if (src != CONST0_RTX (GET_MODE (src))) + dsth = expand_simple_binop (DImode, IOR, insh, dsth, dsth, 0, + OPTAB_WIDEN); + + emit_insn (gen_store_conditional (DImode, dsth, memh, dsth)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dsth, const0_rtx), labelh); + } + + /* Now handle low. */ + rtx addrl = gen_reg_rtx (DImode); + rtx aligned_addrl = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (addrl, plus_constant (DImode, dsta, ofs))); + emit_insn (gen_rtx_SET (aligned_addrl, + gen_rtx_AND (DImode, addrl, GEN_INT (-8)))); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + rtx meml = change_address (dst, DImode, aligned_addrl); + set_mem_alias_set (meml, 0); + + rtx insl = gen_reg_rtx (DImode); + rtx maskl; + switch (size) + { + case 1: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insbl (insl, gen_lowpart (QImode, src), addr)); + break; + case 2: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr)); + break; + case 4: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr)); + break; + case 8: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr)); + break; + case 3: + case 5: + case 6: + case 7: + /* For the low part we shift the byte mask left by the byte index, + which is already in ADDR. */ + maskl = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (maskl, + gen_rtx_ASHIFT (DImode, byte_mask, addr))); + + /* Insert any bytes required by hand, by doing a byte-wise shift + on SRC left by the same number and then zap the bytes outside + the byte mask. */ + if (src != CONST0_RTX (GET_MODE (src))) + { + rtx byte_loc = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (byte_loc, + gen_rtx_ASHIFT (DImode, + force_reg (DImode, addr), + GEN_INT (3)))); + rtx bytes = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (bytes, + gen_rtx_ASHIFT (DImode, + gen_lowpart (DImode, src), + byte_loc))); + + rtx zapmask = gen_rtx_NOT (QImode, + gen_rtx_SUBREG (QImode, maskl, 0)); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), + UNSPEC_ZAP); + emit_insn (gen_rtx_SET (insl, gen_rtx_AND (DImode, zap, bytes))); + } + break; + default: + gcc_unreachable (); + } + + rtx labell = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (labell, 0)); + + rtx dstl = gen_reg_rtx (DImode); + emit_insn (gen_load_locked (DImode, dstl, meml)); + + switch (size) + { + case 1: + emit_insn (gen_mskbl (dstl, dstl, addr)); + break; + case 2: + emit_insn (gen_mskwl (dstl, dstl, addr)); + break; + case 4: + emit_insn (gen_mskll (dstl, dstl, addr)); + break; + case 8: + emit_insn (gen_mskql (dstl, dstl, addr)); + break; + case 3: + case 5: + case 6: + case 7: + { + rtx zapmask = gen_rtx_SUBREG (QImode, maskl, 0); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), UNSPEC_ZAP); + emit_insn (gen_rtx_SET (dstl, gen_rtx_AND (DImode, zap, dstl))); + } + break; + default: + gcc_unreachable (); + } + + if (src != CONST0_RTX (GET_MODE (src))) + dstl = expand_simple_binop (DImode, IOR, insl, dstl, dstl, 0, OPTAB_WIDEN); + + emit_insn (gen_store_conditional (DImode, dstl, meml, dstl)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dstl, const0_rtx), labell); +} + /* The block move code tries to maximize speed by separating loads and stores at the expense of register pressure: we load all of the data before we store it back out. There are two secondary effects worth @@ -3772,6 +4146,117 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, emit_move_insn (st_addr_1, st_tmp_1); } +/* Store an integral number of consecutive unaligned quadwords. DATA_REGS + may be NULL to store zeros. This is a multi-thread and async-signal + safe implementation. */ + +static void +alpha_expand_unaligned_store_words_safe_partial (rtx *data_regs, rtx dmem, + HOST_WIDE_INT words, + HOST_WIDE_INT ofs, + HOST_WIDE_INT align) +{ + rtx const im8 = GEN_INT (-8); + rtx ins_tmps[MAX_MOVE_WORDS]; + HOST_WIDE_INT i; + + /* Generate all the tmp registers we need. */ + for (i = 0; i < words; i++) + ins_tmps[i] = data_regs != NULL ? gen_reg_rtx (DImode) : const0_rtx; + + if (ofs != 0) + dmem = adjust_address (dmem, GET_MODE (dmem), ofs); + + /* For BWX store the ends before we start fiddling with data registers + to fill the middle. Also if we have no more than two quadwords, + then obviously we're done. */ + if (TARGET_BWX) + { + rtx datan = data_regs ? data_regs[words - 1] : const0_rtx; + rtx data0 = data_regs ? data_regs[0] : const0_rtx; + HOST_WIDE_INT e = (words - 1) * 8; + + alpha_expand_unaligned_store_safe_partial (dmem, data0, 8, 0, align); + alpha_expand_unaligned_store_safe_partial (dmem, datan, 8, e, align); + if (words <= 2) + return; + } + + rtx dmema = XEXP (dmem, 0); + if (GET_CODE (dmema) == LO_SUM) + dmema = force_reg (Pmode, dmema); + + /* Shift the input data into place. */ + rtx dreg = copy_addr_to_reg (dmema); + if (data_regs != NULL) + { + for (i = words - 1; i >= 0; i--) + { + emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg)); + emit_insn (gen_insql (data_regs[i], data_regs[i], dreg)); + } + for (i = words - 1; i > 0; i--) + ins_tmps[i - 1] = expand_simple_binop (DImode, IOR, data_regs[i], + ins_tmps[i - 1], + ins_tmps[i - 1], + 1, OPTAB_DIRECT); + } + + if (!TARGET_BWX) + { + rtx temp = gen_reg_rtx (DImode); + rtx mem = gen_rtx_MEM (DImode, + expand_simple_binop (Pmode, AND, dreg, im8, + NULL_RTX, 1, OPTAB_DIRECT)); + + rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_insn (gen_load_locked (DImode, temp, mem)); + emit_insn (gen_mskql (temp, temp, dreg)); + if (data_regs != NULL) + temp = expand_simple_binop (DImode, IOR, temp, data_regs[0], + temp, 1, OPTAB_DIRECT); + emit_insn (gen_store_conditional (DImode, temp, mem, temp)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label); + } + + for (i = words - 1; i > 0; --i) + { + rtx temp = change_address (dmem, Pmode, + gen_rtx_AND (Pmode, + plus_constant (Pmode, + dmema, i * 8), + im8)); + set_mem_alias_set (temp, 0); + emit_move_insn (temp, ins_tmps[i - 1]); + } + + if (!TARGET_BWX) + { + rtx temp = gen_reg_rtx (DImode); + rtx addr = expand_simple_binop (Pmode, PLUS, dreg, + GEN_INT (words * 8 - 1), + NULL_RTX, 1, OPTAB_DIRECT); + rtx mem = gen_rtx_MEM (DImode, + expand_simple_binop (Pmode, AND, addr, im8, + NULL_RTX, 1, OPTAB_DIRECT)); + + rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_insn (gen_load_locked (DImode, temp, mem)); + emit_insn (gen_mskqh (temp, temp, dreg)); + if (data_regs != NULL) + temp = expand_simple_binop (DImode, IOR, temp, ins_tmps[words - 1], + temp, 1, OPTAB_DIRECT); + emit_insn (gen_store_conditional (DImode, temp, mem, temp)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label); + } +} + /* Get the base alignment and offset of EXPR in A and O respectively. Check for any pseudo register pointer alignment and for any tree node information and return the largest alignment determined and @@ -4081,26 +4566,74 @@ alpha_expand_block_move (rtx operands[]) if (GET_MODE (data_regs[i + words]) != DImode) break; - if (words == 1) - alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); + if (TARGET_SAFE_PARTIAL) + { + if (words == 1) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i], + 8, ofs, dst_align); + else + alpha_expand_unaligned_store_words_safe_partial (data_regs + i, + orig_dst, words, + ofs, dst_align); + } else - alpha_expand_unaligned_store_words (data_regs + i, orig_dst, - words, ofs); - + { + if (words == 1) + alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); + else + alpha_expand_unaligned_store_words (data_regs + i, orig_dst, + words, ofs); + } i += words; ofs += words * 8; } - /* Due to the above, this won't be aligned. */ + /* If we are in the partial memory access safety mode with a non-BWX + target, then coalesce data loaded of different widths so as to + minimize the number of safe partial stores as they are expensive. */ + if (!TARGET_BWX && TARGET_SAFE_PARTIAL) + { + HOST_WIDE_INT size = 0; + unsigned int n; + + for (n = i; i < nregs; i++) + { + if (i != n) + { + /* Don't widen SImode data where obtained by extraction. */ + rtx data = data_regs[n]; + if (GET_MODE (data) == SImode && src_align < 32) + data = gen_rtx_SUBREG (DImode, data, 0); + rtx field = expand_simple_binop (DImode, ASHIFT, data_regs[i], + GEN_INT (size * BITS_PER_UNIT), + NULL_RTX, 1, OPTAB_DIRECT); + data_regs[n] = expand_simple_binop (DImode, IOR, data, field, + data, 1, OPTAB_WIDEN); + } + size += GET_MODE_SIZE (GET_MODE (data_regs[i])); + gcc_assert (size < 8); + } + if (size > 0) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[n], + size, ofs, dst_align); + ofs += size; + } + + /* We've done aligned stores above, this won't be aligned. */ while (i < nregs && GET_MODE (data_regs[i]) == SImode) { - alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); + gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i], + 4, ofs, dst_align); + else + alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); ofs += 4; i++; gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode); } - if (dst_align >= 16) + if (TARGET_BWX && dst_align >= 16) while (i < nregs && GET_MODE (data_regs[i]) == HImode) { emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); @@ -4110,7 +4643,12 @@ alpha_expand_block_move (rtx operands[]) else while (i < nregs && GET_MODE (data_regs[i]) == HImode) { - alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); + gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i], + 2, ofs, dst_align); + else + alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); i++; ofs += 2; } @@ -4119,6 +4657,7 @@ alpha_expand_block_move (rtx operands[]) while (i < nregs) { gcc_assert (GET_MODE (data_regs[i]) == QImode); + gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL); emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); i++; ofs += 1; @@ -4127,6 +4666,27 @@ alpha_expand_block_move (rtx operands[]) return 1; } +/* Expand a multi-thread and async-signal safe partial clear of a longword + or a quadword quantity indicated by MODE at aligned memory location MEM + according to MASK. */ + +static void +alpha_expand_clear_safe_partial_nobwx (rtx mem, machine_mode mode, + HOST_WIDE_INT mask) +{ + rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + rtx temp = gen_reg_rtx (mode); + rtx status = mode == DImode ? temp : gen_rtx_SUBREG (DImode, temp, 0); + + emit_insn (gen_load_locked (mode, temp, mem)); + emit_insn (gen_rtx_SET (temp, gen_rtx_AND (mode, temp, GEN_INT (mask)))); + emit_insn (gen_store_conditional (mode, status, mem, temp)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, status, const0_rtx), label); +} + int alpha_expand_block_clear (rtx operands[]) { @@ -4171,8 +4731,9 @@ alpha_expand_block_clear (rtx operands[]) { /* Given that alignofs is bounded by align, the only time BWX could generate three stores is for a 7 byte fill. Prefer two individual - stores over a load/mask/store sequence. */ - if ((!TARGET_BWX || alignofs == 7) + stores over a load/mask/store sequence. In the partial safety + mode always do individual stores regardless of their count. */ + if ((!TARGET_BWX || (!TARGET_SAFE_PARTIAL && alignofs == 7)) && align >= 32 && !(alignofs == 4 && bytes >= 4)) { @@ -4198,10 +4759,15 @@ alpha_expand_block_clear (rtx operands[]) } alignofs = 0; - tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), - NULL_RTX, 1, OPTAB_WIDEN); + if (TARGET_SAFE_PARTIAL) + alpha_expand_clear_safe_partial_nobwx (mem, mode, mask); + else + { + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); - emit_move_insn (mem, tmp); + emit_move_insn (mem, tmp); + } } if (TARGET_BWX && (alignofs & 1) && bytes >= 1) @@ -4306,7 +4872,11 @@ alpha_expand_block_clear (rtx operands[]) { words = bytes / 8; - alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_words_safe_partial (NULL, orig_dst, + words, ofs, align); + else + alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); bytes -= words * 8; ofs += words * 8; @@ -4323,7 +4893,7 @@ alpha_expand_block_clear (rtx operands[]) /* If we have appropriate alignment (and it wouldn't take too many instructions otherwise), mask out the bytes we need. */ - if ((TARGET_BWX ? words > 2 : bytes > 0) + if ((TARGET_BWX ? !TARGET_SAFE_PARTIAL && words > 2 : bytes > 0) && (align >= 64 || (align >= 32 && bytes < 4))) { machine_mode mode = (align >= 64 ? DImode : SImode); @@ -4335,18 +4905,46 @@ alpha_expand_block_clear (rtx operands[]) mask = HOST_WIDE_INT_M1U << (bytes * 8); - tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), - NULL_RTX, 1, OPTAB_WIDEN); + if (TARGET_SAFE_PARTIAL) + alpha_expand_clear_safe_partial_nobwx (mem, mode, mask); + else + { + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); - emit_move_insn (mem, tmp); + emit_move_insn (mem, tmp); + } return 1; } - if (!TARGET_BWX && bytes >= 4) + if (bytes >= 4) { - alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); - bytes -= 4; - ofs += 4; + if (align >= 32) + do + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), + const0_rtx); + bytes -= 4; + ofs += 4; + } + while (bytes >= 4); + else if (!TARGET_BWX) + { + gcc_assert (bytes < 8); + if (TARGET_SAFE_PARTIAL) + { + alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx, + bytes, ofs, align); + ofs += bytes; + bytes = 0; + } + else + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); + bytes -= 4; + ofs += 4; + } + } } if (bytes >= 2) @@ -4362,18 +4960,38 @@ alpha_expand_block_clear (rtx operands[]) } else if (! TARGET_BWX) { - alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); - bytes -= 2; - ofs += 2; + gcc_assert (bytes < 4); + if (TARGET_SAFE_PARTIAL) + { + alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx, + bytes, ofs, align); + ofs += bytes; + bytes = 0; + } + else + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); + bytes -= 2; + ofs += 2; + } } } while (bytes > 0) - { - emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); - bytes -= 1; - ofs += 1; - } + if (TARGET_BWX || !TARGET_SAFE_PARTIAL) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + } + else + { + gcc_assert (bytes < 2); + alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx, + bytes, ofs, align); + ofs += bytes; + bytes = 0; + } return 1; } @@ -4421,12 +5039,13 @@ alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), /* A subroutine of the atomic operation splitters. Jump to LABEL if COND is true. Mark the jump as unlikely to be taken. */ -static void -emit_unlikely_jump (rtx cond, rtx label) +rtx +alpha_emit_unlikely_jump (rtx cond, rtx label) { rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); + return insn; } /* Subroutines of the atomic operation splitters. Emit barriers @@ -4518,7 +5137,7 @@ alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before, emit_insn (gen_store_conditional (mode, cond, mem, scratch)); x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label); + alpha_emit_unlikely_jump (x, label); alpha_post_atomic_barrier (model); } @@ -4568,7 +5187,7 @@ alpha_split_compare_and_swap (rtx operands[]) emit_insn (gen_rtx_SET (cond, x)); x = gen_rtx_EQ (DImode, cond, const0_rtx); } - emit_unlikely_jump (x, label2); + alpha_emit_unlikely_jump (x, label2); emit_move_insn (cond, newval); emit_insn (gen_store_conditional @@ -4577,7 +5196,7 @@ alpha_split_compare_and_swap (rtx operands[]) if (!is_weak) { x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label1); + alpha_emit_unlikely_jump (x, label1); } if (!is_mm_relaxed (mod_f)) @@ -4680,7 +5299,7 @@ alpha_split_compare_and_swap_12 (rtx operands[]) emit_insn (gen_rtx_SET (cond, x)); x = gen_rtx_EQ (DImode, cond, const0_rtx); } - emit_unlikely_jump (x, label2); + alpha_emit_unlikely_jump (x, label2); emit_insn (gen_mskxl (cond, scratch, mask, addr)); @@ -4692,7 +5311,7 @@ alpha_split_compare_and_swap_12 (rtx operands[]) if (!is_weak) { x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label1); + alpha_emit_unlikely_jump (x, label1); } if (!is_mm_relaxed (mod_f)) @@ -4732,7 +5351,7 @@ alpha_split_atomic_exchange (rtx operands[]) emit_insn (gen_store_conditional (mode, cond, mem, scratch)); x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label); + alpha_emit_unlikely_jump (x, label); alpha_post_atomic_barrier (model); } @@ -4806,7 +5425,7 @@ alpha_split_atomic_exchange_12 (rtx operands[]) emit_insn (gen_store_conditional (DImode, scratch, mem, scratch)); x = gen_rtx_EQ (DImode, scratch, const0_rtx); - emit_unlikely_jump (x, label); + alpha_emit_unlikely_jump (x, label); alpha_post_atomic_barrier (model); } diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md index 178ce99..a44178d 100644 --- a/gcc/config/alpha/alpha.md +++ b/gcc/config/alpha/alpha.md @@ -4200,6 +4200,31 @@ << INTVAL (operands[2]))); }) +;; Multi-thread and async-signal safe variant. Operand 0 is the aligned +;; SImode MEM. Operand 1 is the data to store. Operand 2 is the number +;; of bits within the word that the value should be placed. Operand 3 is +;; the SImode status. Operand 4 is a SImode temporary. + +(define_expand "aligned_store_safe_bwa" + [(set (match_operand:SI 3 "register_operand") + (unspec_volatile:SI + [(match_operand:SI 0 "memory_operand")] UNSPECV_LL)) + (set (subreg:DI (match_dup 3) 0) + (and:DI (subreg:DI (match_dup 3) 0) (match_dup 5))) + (set (subreg:DI (match_operand:SI 4 "register_operand") 0) + (ashift:DI (zero_extend:DI (match_operand 1 "register_operand")) + (match_operand:DI 2 "const_int_operand"))) + (set (subreg:DI (match_dup 3) 0) + (ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0))) + (parallel [(set (subreg:DI (match_dup 3) 0) + (unspec_volatile:DI [(const_int 0)] UNSPECV_SC)) + (set (match_dup 0) (match_dup 3))])] + "" +{ + operands[5] = GEN_INT (~(GET_MODE_MASK (GET_MODE (operands[1])) + << INTVAL (operands[2]))); +}) + ;; For the unaligned byte and halfword cases, we use code similar to that ;; in the Architecture book, but reordered to lower the number of registers ;; required. Operand 0 is the address. Operand 1 is the data to store. @@ -4227,6 +4252,31 @@ "" "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));") +;; Multi-thread and async-signal safe variant. Operand 0 is the address. +;; Operand 1 is the data to store. Operand 2 is the aligned address. +;; Operand 3 is the DImode status. Operand 4 is a DImode temporary. + +(define_expand "@unaligned_store<mode>_safe_bwa" + [(set (match_operand:DI 3 "register_operand") + (unspec_volatile:DI + [(mem:DI (match_operand:DI 2 "register_operand"))] UNSPECV_LL)) + (set (match_dup 3) + (and:DI (not:DI + (ashift:DI (match_dup 5) + (ashift:DI (match_operand:DI 0 "register_operand") + (const_int 3)))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand") + (ashift:DI (zero_extend:DI + (match_operand:I12MODE 1 "register_operand")) + (ashift:DI (match_dup 0) (const_int 3)))) + (set (match_dup 3) (ior:DI (match_dup 4) (match_dup 3))) + (parallel [(set (match_dup 3) + (unspec_volatile:DI [(const_int 0)] UNSPECV_SC)) + (set (mem:DI (match_dup 2)) (match_dup 3))])] + "" + "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));") + ;; Here are the define_expand's for QI and HI moves that use the above ;; patterns. We have the normal sets, plus the ones that need scratch ;; registers for reload. @@ -4236,8 +4286,8 @@ (match_operand:I12MODE 1 "general_operand"))] "" { - if (TARGET_BWX - ? alpha_expand_mov (<MODE>mode, operands) + if (TARGET_BWX ? alpha_expand_mov (<MODE>mode, operands) + : TARGET_SAFE_BWA ? alpha_expand_mov_safe_bwa (<MODE>mode, operands) : alpha_expand_mov_nobwx (<MODE>mode, operands)) DONE; }) @@ -4292,7 +4342,9 @@ operands[1] = gen_lowpart (HImode, operands[1]); do_aligned2: operands[0] = gen_lowpart (HImode, operands[0]); - done = alpha_expand_mov_nobwx (HImode, operands); + done = (TARGET_SAFE_BWA + ? alpha_expand_mov_safe_bwa (HImode, operands) + : alpha_expand_mov_nobwx (HImode, operands)); gcc_assert (done); DONE; } @@ -4371,6 +4423,8 @@ } else { + gcc_assert (!TARGET_SAFE_BWA); + rtx addr = get_unaligned_address (operands[0]); rtx scratch1 = gen_rtx_REG (DImode, regno); rtx scratch2 = gen_rtx_REG (DImode, regno + 1); @@ -4388,6 +4442,52 @@ DONE; }) +(define_expand "@reload_out<mode>_safe_bwa" + [(parallel [(match_operand:RELOAD12 0 "any_memory_operand" "=m") + (match_operand:RELOAD12 1 "register_operand" "r") + (match_operand:OI 2 "register_operand" "=&r")])] + "!TARGET_BWX && TARGET_SAFE_BWA" +{ + unsigned regno = REGNO (operands[2]); + + if (<MODE>mode == CQImode) + { + operands[0] = gen_lowpart (HImode, operands[0]); + operands[1] = gen_lowpart (HImode, operands[1]); + } + + rtx addr = get_unaligned_address (operands[0]); + rtx status = gen_rtx_REG (DImode, regno); + rtx areg = gen_rtx_REG (DImode, regno + 1); + rtx aligned_addr = gen_rtx_REG (DImode, regno + 2); + rtx scratch = gen_rtx_REG (DImode, regno + 3); + + if (REG_P (addr)) + areg = addr; + else + emit_move_insn (areg, addr); + emit_move_insn (aligned_addr, gen_rtx_AND (DImode, areg, GEN_INT (-8))); + + rtx label = gen_label_rtx (); + emit_label (label); + LABEL_NUSES (label) = 1; + + rtx seq = gen_reload_out<reloadmode>_unaligned_safe_bwa (areg, operands[1], + aligned_addr, + status, scratch); + alpha_set_memflags (seq, operands[0]); + emit_insn (seq); + + rtx label_ref = gen_rtx_LABEL_REF (DImode, label); + rtx cond = gen_rtx_EQ (DImode, status, const0_rtx); + rtx jump = alpha_emit_unlikely_jump (cond, label_ref); + JUMP_LABEL (jump) = label; + + cfun->split_basic_blocks_after_reload = 1; + + DONE; +}) + ;; Helpers for the above. The way reload is structured, we can't ;; always get a proper address for a stack slot during reload_foo ;; expansion, so we must delay our address manipulations until after. @@ -4420,10 +4520,55 @@ { rtx aligned_mem, bitnum; get_aligned_mem (operands[0], &aligned_mem, &bitnum); - emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, - operands[2], operands[3])); + if (TARGET_SAFE_BWA) + { + rtx label = gen_label_rtx (); + emit_label (label); + LABEL_NUSES (label) = 1; + + rtx status = operands[2]; + rtx temp = operands[3]; + emit_insn (gen_aligned_store_safe_bwa (aligned_mem, operands[1], bitnum, + status, temp)); + + rtx label_ref = gen_rtx_LABEL_REF (DImode, label); + rtx cond = gen_rtx_EQ (DImode, gen_rtx_SUBREG (DImode, status, 0), + const0_rtx); + rtx jump = alpha_emit_unlikely_jump (cond, label_ref); + JUMP_LABEL (jump) = label; + + cfun->split_basic_blocks_after_reload = 1; + } + else + emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, + operands[2], operands[3])); DONE; }) + +;; Operand 0 is the address. Operand 1 is the data to store. Operand 2 +;; is the aligned address. Operand 3 is the DImode status. Operand 4 is +;; a DImode scratch. + +(define_expand "reload_out<mode>_unaligned_safe_bwa" + [(set (match_operand:DI 3 "register_operand") + (unspec_volatile:DI [(mem:DI (match_operand:DI 2 "register_operand"))] + UNSPECV_LL)) + (set (match_dup 3) + (and:DI (not:DI + (ashift:DI (match_dup 5) + (ashift:DI (match_operand:DI 0 "register_operand") + (const_int 3)))) + (match_dup 3))) + (set (match_operand:DI 4 "register_operand") + (ashift:DI (zero_extend:DI + (match_operand:I12MODE 1 "register_operand")) + (ashift:DI (match_dup 0) (const_int 3)))) + (set (match_dup 3) (ior:DI (match_dup 4) (match_dup 3))) + (parallel [(set (match_dup 3) + (unspec_volatile:DI [(const_int 0)] UNSPECV_SC)) + (set (mem:DI (match_dup 2)) (match_dup 3))])] + "" + "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));") ;; Vector operations @@ -4636,9 +4781,15 @@ && INTVAL (operands[1]) != 64)) FAIL; - alpha_expand_unaligned_store (operands[0], operands[3], - INTVAL (operands[1]) / 8, - INTVAL (operands[2]) / 8); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_safe_partial (operands[0], operands[3], + INTVAL (operands[1]) / 8, + INTVAL (operands[2]) / 8, + BITS_PER_UNIT); + else + alpha_expand_unaligned_store (operands[0], operands[3], + INTVAL (operands[1]) / 8, + INTVAL (operands[2]) / 8); DONE; }) diff --git a/gcc/config/alpha/alpha.opt b/gcc/config/alpha/alpha.opt index f5ecc7b..3c1320b 100644 --- a/gcc/config/alpha/alpha.opt +++ b/gcc/config/alpha/alpha.opt @@ -69,6 +69,14 @@ mcix Target Mask(CIX) Emit code for the counting ISA extension. +msafe-bwa +Target Mask(SAFE_BWA) +Emit multi-thread and async-signal safe code for byte and word memory accesses. + +msafe-partial +Target Mask(SAFE_PARTIAL) +Emit multi-thread and async-signal safe code for partial memory accesses. + mexplicit-relocs Target Mask(EXPLICIT_RELOCS) Emit code using explicit relocation directives. diff --git a/gcc/config/alpha/alpha.opt.urls b/gcc/config/alpha/alpha.opt.urls index a55c083..9361587 100644 --- a/gcc/config/alpha/alpha.opt.urls +++ b/gcc/config/alpha/alpha.opt.urls @@ -35,6 +35,12 @@ UrlSuffix(gcc/DEC-Alpha-Options.html#index-mfix) mcix UrlSuffix(gcc/DEC-Alpha-Options.html#index-mcix) +msafe-bwa +UrlSuffix(gcc/DEC-Alpha-Options.html#index-msafe-bwa) + +msafe-partial +UrlSuffix(gcc/DEC-Alpha-Options.html#index-msafe-partial) + mexplicit-relocs UrlSuffix(gcc/DEC-Alpha-Options.html#index-mexplicit-relocs) diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 658deac..6904aaa 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -46,6 +46,10 @@ #define ASM_OUTPUT_LABEL(FILE,NAME) \ do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0) +/* Used in lieu of '../elfos.h:ASM_WEAKEN_LABEL'. */ +#define ASM_WEAKEN_DECL(STREAM, DECL, NAME, VALUE) \ + gcn_asm_weaken_decl ((STREAM), (DECL), (NAME), (VALUE)) + #define ASM_OUTPUT_LABELREF(FILE, NAME) \ asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME)) diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index 87d9092..1e513ba 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -18,6 +18,8 @@ #define _GCN_PROTOS_ extern void gcn_asm_output_symbol_ref (FILE *file, rtx x); +extern void gcn_asm_weaken_decl (FILE *stream, tree decl, const char *name, + const char *value); extern tree gcn_builtin_decl (unsigned code, bool initialize_p); extern bool gcn_can_split_p (machine_mode, rtx); extern bool gcn_constant64_p (rtx); diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 48691c3..d59e87b 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -6927,6 +6927,20 @@ gcn_asm_output_symbol_ref (FILE *file, rtx x) } } +void +gcn_asm_weaken_decl (FILE *stream, tree decl, const char *name, + const char *value) +{ + if (!value + && DECL_EXTERNAL (decl)) + /* Don't emit weak undefined symbols; see PR119369. */ + return; + if (value) + ASM_OUTPUT_WEAK_ALIAS (stream, name, value); + else + ASM_WEAKEN_LABEL (stream, name); +} + /* Implement TARGET_CONSTANT_ALIGNMENT. Returns the alignment in bits of a constant that is being placed in memory. diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 91f59e1..a9fac01 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1271,6 +1271,13 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], } } + /* Fixup -msse4 which is RejectNegative to -mno-sse4 when negated. */ + if (opt == OPT_msse4 && !opt_set_p) + { + opt = OPT_mno_sse4; + opt_set_p = true; + } + /* Process the option. */ if (opt == N_OPTS) { diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index f38e3db..4f8380c4 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -21883,7 +21883,11 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno, case SYMBOL_REF: if (x86_64_immediate_operand (x, VOIDmode)) *total = 0; - else + else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode)) + /* Consider the zext constants slightly more expensive, as they + can't appear in most instructions. */ + *total = 1; + else /* movabsq is slightly more expensive than a simple instruction. */ *total = COSTS_N_INSNS (1) + 1; return true; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 13da3d8..8507243 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -2449,11 +2449,11 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_SKYLAKE | PTA_PKU | PTA_SHA | PTA_WBNOINVD | PTA_CLWB | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_ENQCMD | PTA_CLDEMOTE | PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE | PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR | PTA_AVXVNNI - | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1 - | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8 - | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2 - | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE - | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; + | PTA_AMX_FP16 | PTA_PREFETCHI | PTA_AMX_COMPLEX | PTA_AVX10_1_256 + | PTA_AVX10_1 | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 + | PTA_AVXVNNIINT8 | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 + | PTA_AVX10_2 | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 + | PTA_AMX_TRANSPOSE | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index f7f790d..d6b2f29 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -18153,8 +18153,15 @@ (match_dup 2))) (clobber (reg:CC FLAGS_REG))])] { - operands[2] = force_reg (GET_MODE (operands[2]), operands[2]); - operands[2] = gen_lowpart (QImode, operands[2]); + operands[2] = force_lowpart_subreg (QImode, operands[2], + GET_MODE (operands[2])); + if (TARGET_APX_NF) + { + emit_move_insn (operands[0], + gen_rtx_<CODE> (<MODE>mode, operands[1], + operands[2])); + DONE; + } }) (define_split @@ -18192,7 +18199,16 @@ [(set (match_dup 0) (any_rotate:SWI (match_dup 1) (match_dup 2))) - (clobber (reg:CC FLAGS_REG))])]) + (clobber (reg:CC FLAGS_REG))])] +{ + if (TARGET_APX_NF) + { + emit_move_insn (operands[0], + gen_rtx_<CODE> (<MODE>mode, operands[1], + operands[2])); + DONE; + } +}) (define_split [(set (match_operand:SWI 0 "register_operand") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ed5ac1a..b280676 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1571,7 +1571,11 @@ "TARGET_AVX512VL || <MODE_SIZE> == 64" "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}" [(set_attr "type" "sselog1") - (set_attr "prefix" "evex")]) + (set_attr "prefix" "evex") + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512VL") + (const_string "<sseinsnmode>") + (const_string "XI")))]) ;; If mem_addr points to a memory region with less than whole vector size bytes ;; of accessible memory and k is a mask that would prevent reading the inaccessible @@ -30845,7 +30849,10 @@ else return "vaesdec\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vaesdeclast_<mode>" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30860,7 +30867,10 @@ else return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vaesenc_<mode>" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30875,7 +30885,10 @@ else return "vaesenc\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vaesenclast_<mode>" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") @@ -30890,7 +30903,10 @@ else return "vaesenclast\t{%2, %1, %0|%0, %1, %2}"; } -[(set_attr "addr" "gpr16,*")]) +[(set_attr "isa" "avx,vaes_avx512vl") + (set_attr "type" "sselog1") + (set_attr "addr" "gpr16,*") + (set_attr "mode" "TI")]) (define_insn "vpclmulqdq_<mode>" [(set (match_operand:VI8_FVL 0 "register_operand" "=v") diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index a4a128c..7c8cb73 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -37,34 +37,37 @@ static stringop_algs ix86_size_memset[2] = { const struct processor_costs ix86_size_cost = {/* costs for tuning for size */ { - /* Start of register allocator costs. integer->integer move cost is 2. */ - 2, /* cost for loading QImode using movzbl */ - {2, 2, 2}, /* cost of loading integer registers + /* Start of register allocator costs. integer->integer move cost is 2 + and coststs are relative to it. movl %eax, %ebx is 2 bytes, so the + sizes coincides with average size of instruction encoding. */ + 3, /* cost for loading QImode using movzbl */ + /* Typical load/save from stack frame is 4 bytes with ebp and 5 with esp. */ + {5, 6, 5}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ - {2, 2, 2}, /* cost of storing integer registers */ + {5, 6, 5}, /* cost of storing integer registers */ 2, /* cost of reg,reg fld/fst */ - {2, 2, 2}, /* cost of loading fp registers + {5, 6, 5}, /* cost of loading fp registers in SFmode, DFmode and XFmode */ - {2, 2, 2}, /* cost of storing fp registers + {5, 6, 5}, /* cost of storing fp registers in SFmode, DFmode and XFmode */ 3, /* cost of moving MMX register */ - {3, 3}, /* cost of loading MMX registers + {6, 6}, /* cost of loading MMX registers in SImode and DImode */ - {3, 3}, /* cost of storing MMX registers + {6, 6}, /* cost of storing MMX registers in SImode and DImode */ - 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */ - {3, 3, 3, 3, 3}, /* cost of loading SSE registers + 4, 4, 6, /* cost of moving XMM,YMM,ZMM register */ + {6, 6, 6, 6, 11}, /* cost of loading SSE registers in 32,64,128,256 and 512-bit */ - {3, 3, 3, 3, 3}, /* cost of storing SSE registers + {6, 6, 6, 6, 11}, /* cost of storing SSE registers in 32,64,128,256 and 512-bit */ - 3, 3, /* SSE->integer and integer->SSE moves */ - 3, 3, /* mask->integer and integer->mask moves */ - {2, 2, 2}, /* cost of loading mask register + 4, 4, /* SSE->integer and integer->SSE moves */ + 4, 4, /* mask->integer and integer->mask moves */ + {7, 7, 7}, /* cost of loading mask register in QImode, HImode, SImode. */ - {2, 2, 2}, /* cost if storing mask register + {7, 7, 7}, /* cost if storing mask register in QImode, HImode, SImode. */ - 2, /* cost of moving mask register. */ + 4, /* cost of moving mask register. */ /* End of register allocator costs. */ }, @@ -88,22 +91,24 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ 0, /* "large" insn */ 2, /* MOVE_RATIO */ 2, /* CLEAR_RATIO */ - {2, 2, 2}, /* cost of loading integer registers + /* These costs are relative to reg-reg move with cost of 2. Since it has + 2 bytes, this coincides with average instruction sizes. */ + {5, 6, 5}, /* cost of loading integer registers in QImode, HImode and SImode. Relative to reg-reg move (2). */ - {2, 2, 2}, /* cost of storing integer registers */ - {3, 3, 3, 3, 3}, /* cost of loading SSE register + {5, 6, 5}, /* cost of storing integer registers */ + {6, 6, 6, 6, 11}, /* cost of loading SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ - {3, 3, 3, 3, 3}, /* cost of storing SSE register + {6, 6, 6, 6, 11}, /* cost of storing SSE register in 32bit, 64bit, 128bit, 256bit and 512bit */ - {3, 3, 3, 3, 3}, /* cost of unaligned SSE load + {6, 6, 6, 6, 11}, /* cost of unaligned SSE load in 128bit, 256bit and 512bit */ - {3, 3, 3, 3, 3}, /* cost of unaligned SSE store + {6, 6, 6, 6, 11}, /* cost of unaligned SSE store in 128bit, 256bit and 512bit */ - 3, 3, 3, /* cost of moving XMM,YMM,ZMM register */ - 3, /* cost of moving SSE register to integer. */ - 5, 0, /* Gather load static, per_elt. */ - 5, 0, /* Gather store static, per_elt. */ + 4, 4, 6, /* cost of moving XMM,YMM,ZMM register */ + 4, /* cost of moving SSE register to integer. */ + COSTS_N_BYTES (5), 0, /* Gather load static, per_elt. */ + COSTS_N_BYTES (5), 0, /* Gather store static, per_elt. */ 0, /* size of l1 cache */ 0, /* size of l2 cache */ 0, /* size of prefetch block */ diff --git a/gcc/config/i386/zn4zn5.md b/gcc/config/i386/zn4zn5.md index 75e3102..ecb1e3b 100644 --- a/gcc/config/i386/zn4zn5.md +++ b/gcc/config/i386/zn4zn5.md @@ -893,13 +893,20 @@ "znver4-direct,znver5-load,znver4-fpu") (define_insn_reservation "znver4_sse_log1" 1 + (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sselog1") + (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu1|znver4-fpu2") + +(define_insn_reservation "znver4_sse_log1_store" 1 (and (eq_attr "cpu" "znver4") (and (eq_attr "type" "sselog1") (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") (eq_attr "memory" "store")))) "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fp-store") -(define_insn_reservation "znver5_sse_log1" 1 +(define_insn_reservation "znver5_sse_log1_store" 1 (and (eq_attr "cpu" "znver5") (and (eq_attr "type" "sselog1") (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI") @@ -946,9 +953,8 @@ (define_insn_reservation "znver4_sse_test" 1 (and (eq_attr "cpu" "znver4,znver5") - (and (eq_attr "prefix_extra" "1") - (and (eq_attr "type" "ssecomi") - (eq_attr "memory" "none")))) + (and (eq_attr "type" "ssecomi") + (eq_attr "memory" "none"))) "znver4-direct,znver4-fpu1|znver4-fpu2") (define_insn_reservation "znver4_sse_test_load" 6 @@ -1030,14 +1036,14 @@ (define_insn_reservation "znver4_sse_mov_fp_load" 6 (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssemov") + (and (eq_attr "type" "ssemov,ssemov2") (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF") (eq_attr "memory" "load")))) "znver4-direct,znver4-load,znver4-fpu") (define_insn_reservation "znver5_sse_mov_fp_load" 6 (and (eq_attr "cpu" "znver5") - (and (eq_attr "type" "ssemov") + (and (eq_attr "type" "ssemov,ssemov2") (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF") (eq_attr "memory" "load")))) "znver4-direct,znver5-load,znver4-fpu") @@ -1150,7 +1156,7 @@ (define_insn_reservation "znver4_sse_div_pd" 13 (and (eq_attr "cpu" "znver4,znver5") (and (eq_attr "type" "ssediv") - (and (eq_attr "mode" "V4DF,V2DF,V1DF") + (and (eq_attr "mode" "V4DF,V2DF,V1DF,DF") (eq_attr "memory" "none")))) "znver4-direct,znver4-fdiv*5") @@ -1164,14 +1170,14 @@ (define_insn_reservation "znver4_sse_div_pd_load" 18 (and (eq_attr "cpu" "znver4") (and (eq_attr "type" "ssediv") - (and (eq_attr "mode" "V4DF,V2DF,V1DF") + (and (eq_attr "mode" "V4DF,V2DF,V1DF,DF") (eq_attr "memory" "load")))) "znver4-direct,znver4-load,znver4-fdiv*5") (define_insn_reservation "znver5_sse_div_pd_load" 18 (and (eq_attr "cpu" "znver5") (and (eq_attr "type" "ssediv") - (and (eq_attr "mode" "V4DF,V2DF,V1DF") + (and (eq_attr "mode" "V4DF,V2DF,V1DF,DF") (eq_attr "memory" "load")))) "znver4-direct,znver5-load,znver4-fdiv*5") @@ -1257,21 +1263,28 @@ (define_insn_reservation "znver4_sse_icvt" 3 (and (eq_attr "cpu" "znver4,znver5") - (and (eq_attr "type" "ssecvt") + (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SI") (eq_attr "memory" "none")))) "znver4-direct,znver4-fpu2|znver4-fpu3") +(define_insn_reservation "znver4_sse_icvt2" 3 + (and (eq_attr "cpu" "znver4,znver5") + (and (eq_attr "type" "sseicvt2") + (and (eq_attr "mode" "DF") + (eq_attr "memory" "none")))) + "znver4-direct,znver4-fpu2|znver4-fpu3") + (define_insn_reservation "znver4_sse_icvt_store" 4 (and (eq_attr "cpu" "znver4") - (and (eq_attr "type" "ssecvt") + (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SI") (eq_attr "memory" "store")))) "znver4-double,znver4-fpu2|znver4-fpu3,znver4-fp-store") (define_insn_reservation "znver5_sse_icvt_store" 4 (and (eq_attr "cpu" "znver5") - (and (eq_attr "type" "ssecvt") + (and (eq_attr "type" "sseicvt") (and (eq_attr "mode" "SI") (eq_attr "memory" "store")))) "znver4-double,znver4-fpu2|znver4-fpu3,znver5-fp-store256") @@ -1354,6 +1367,20 @@ (eq_attr "memory" "load")))) "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") +(define_insn_reservation "znver4_sse_log_evex_store" 1 + (and (eq_attr "cpu" "znver4") + (and (eq_attr "type" "sselog") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (eq_attr "memory" "store")))) + "znver4-direct,znver4-store,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2") + +(define_insn_reservation "znver5_sse_log_evex_store" 1 + (and (eq_attr "cpu" "znver5") + (and (eq_attr "type" "sselog") + (and (eq_attr "mode" "V16SF,V8DF,XI") + (eq_attr "memory" "store")))) + "znver4-direct,znver5-store,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3") + (define_insn_reservation "znver4_sse_log1_evex" 1 (and (eq_attr "cpu" "znver4") (and (eq_attr "type" "sselog1") @@ -1595,7 +1622,7 @@ (define_insn_reservation "znver4_sse_cmp_avx128" 3 (and (eq_attr "cpu" "znver4") (and (eq_attr "type" "ssecmp") - (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF") + (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF,TI") (and (eq_attr "prefix" "evex") (eq_attr "memory" "none"))))) "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") @@ -1603,7 +1630,7 @@ (define_insn_reservation "znver5_sse_cmp_avx128" 3 (and (eq_attr "cpu" "znver5") (and (eq_attr "type" "ssecmp") - (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF") + (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF,TI") (and (eq_attr "prefix" "evex") (eq_attr "memory" "none"))))) "znver4-direct,znver4-fpu1|znver4-fpu2") @@ -1627,7 +1654,7 @@ (define_insn_reservation "znver4_sse_cmp_avx256" 4 (and (eq_attr "cpu" "znver4") (and (eq_attr "type" "ssecmp") - (and (eq_attr "mode" "V8SF,V4DF") + (and (eq_attr "mode" "V8SF,V4DF,OI") (and (eq_attr "prefix" "evex") (eq_attr "memory" "none"))))) "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") @@ -1635,7 +1662,7 @@ (define_insn_reservation "znver5_sse_cmp_avx256" 4 (and (eq_attr "cpu" "znver5") (and (eq_attr "type" "ssecmp") - (and (eq_attr "mode" "V8SF,V4DF") + (and (eq_attr "mode" "V8SF,V4DF,OI") (and (eq_attr "prefix" "evex") (eq_attr "memory" "none"))))) "znver4-direct,znver4-fpu1|znver4-fpu2") @@ -1659,7 +1686,7 @@ (define_insn_reservation "znver4_sse_cmp_avx512" 5 (and (eq_attr "cpu" "znver4") (and (eq_attr "type" "ssecmp") - (and (eq_attr "mode" "V16SF,V8DF") + (and (eq_attr "mode" "V16SF,V8DF,XI") (and (eq_attr "prefix" "evex") (eq_attr "memory" "none"))))) "znver4-direct,znver4-fpu0*2|znver4-fpu1*2") @@ -1667,7 +1694,7 @@ (define_insn_reservation "znver5_sse_cmp_avx512" 5 (and (eq_attr "cpu" "znver5") (and (eq_attr "type" "ssecmp") - (and (eq_attr "mode" "V16SF,V8DF") + (and (eq_attr "mode" "V16SF,V8DF,XI") (and (eq_attr "prefix" "evex") (eq_attr "memory" "none"))))) "znver4-direct,znver4-fpu1|znver4-fpu2") diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk index bf16b26..142b658 100644 --- a/gcc/config/loongarch/genopts/gen-evolution.awk +++ b/gcc/config/loongarch/genopts/gen-evolution.awk @@ -33,10 +33,12 @@ BEGIN { { cpucfg_word[NR] = $1 cpucfg_bit_in_word[NR] = $2 - name[NR] = gensub(/-/, "_", "g", $3) + name[NR] = $3 + gsub("-", "_", name[NR]) name_capitalized[NR] = toupper(name[NR]) - isa_version_major[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\1", 1, $4) - isa_version_minor[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\2", 1, $4) + split($4, isa_ver, "\\.") + isa_version_major[NR] = isa_ver[1] + isa_version_minor[NR] = isa_ver[2] $1 = $2 = $3 = $4 = "" sub (/^\s*/, "") diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc index adaa6df..08a761d 100644 --- a/gcc/config/mingw/winnt.cc +++ b/gcc/config/mingw/winnt.cc @@ -819,6 +819,7 @@ mingw_pe_file_end (void) } fprintf (asm_out_file, "\t.section\t.rdata$%s, \"dr\"\n" + "\t.p2align\t3, 0\n" "\t.globl\t%s\n" "\t.linkonce\tdiscard\n", oname, oname); fprintf (asm_out_file, "%s:\n\t.quad\t%s\n", oname, name); diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 022037f..87364bf 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -470,9 +470,7 @@ nvptx_encode_section_info (tree decl, rtx rtl, int first) { nvptx_data_area area = DATA_AREA_GENERIC; - if (TREE_CONSTANT (decl)) - area = DATA_AREA_CONST; - else if (VAR_P (decl)) + if (VAR_P (decl)) { if (lookup_attribute ("shared", DECL_ATTRIBUTES (decl))) { @@ -482,7 +480,7 @@ nvptx_encode_section_info (tree decl, rtx rtl, int first) " memory is not supported", decl); } else - area = TREE_READONLY (decl) ? DATA_AREA_CONST : DATA_AREA_GLOBAL; + area = DATA_AREA_GLOBAL; } SET_SYMBOL_DATA_AREA (XEXP (rtl, 0), area); @@ -2597,7 +2595,7 @@ nvptx_asm_declare_constant_name (FILE *file, const char *name, fprintf (file, "\t"); tree type = TREE_TYPE (exp); - nvptx_assemble_decl_begin (file, name, ".const", type, obj_size, + nvptx_assemble_decl_begin (file, name, ".global", type, obj_size, TYPE_ALIGN (type)); } @@ -7768,7 +7766,9 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, return; } +#ifdef ACCEL_COMPILER emit_ptx_alias: +#endif cgraph_node *cnode = cgraph_node::get (name); if (!cnode->referred_to_p ()) diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index 02d36b3..9be81ae 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -173,12 +173,12 @@ Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented ; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only ; for configurations where PTX 'alloca' is not available. Rather than a ; compile-time 'sorry, unimplemented: dynamic stack allocation not supported' -; in presence of dynamic stack allocation, compilation and assembly then -; succeeds. However, attempting to link in such '*.o' files then fails due -; to unresolved symbol '__GCC_nvptx__PTX_alloca_not_supported'. +; in presence of dynamic stack allocation, with '-mfake-ptx-alloca' enabled, +; compilation, assembly, and linking succeeds, as does execution, in case that +; 'alloca' is not attempted (if only used in error code paths, for example), +; and a run-time failure only in case that 'alloca' is actually attempted. ; ; This is meant to be used in scenarios where large volumes of code are ; compiled, a small fraction of which runs into dynamic stack allocation, but ; these parts are not important for specific use cases, and we'd thus like the -; build to succeed, and error out just upon actual, very rare use of the -; offending '*.o' files. +; build to succeed, and error out just upon actual, very rare use of 'alloca'. diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md index f53ed3a..9e51e3c 100644 --- a/gcc/config/riscv/autovec.md +++ b/gcc/config/riscv/autovec.md @@ -330,7 +330,15 @@ { poly_int64 nunits = GET_MODE_NUNITS (<MODE>mode); machine_mode mode = riscv_vector::get_vector_mode (QImode, nunits).require (); - rtx dup = expand_vector_broadcast (mode, operands[1]); + + /* The 1-bit mask is in a QImode register, make sure we only use the last + bit. See also PR119114 and the respective vec_init expander. */ + rtx tmp = gen_reg_rtx (Xmode); + emit_insn + (gen_rtx_SET (tmp, gen_rtx_AND (Xmode, gen_lowpart (Xmode, operands[1]), + CONST1_RTX (Xmode)))); + + rtx dup = expand_vector_broadcast (mode, gen_lowpart (QImode, tmp)); riscv_vector::expand_vec_cmp (operands[0], NE, dup, CONST0_RTX (mode)); DONE; } diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md index b29c127..5ed5e18 100644 --- a/gcc/config/riscv/bitmanip.md +++ b/gcc/config/riscv/bitmanip.md @@ -80,7 +80,9 @@ (match_operand:DI 3 "consecutive_bits_operand")) 0) (subreg:SI (match_operand:DI 4 "register_operand") 0))))] "TARGET_64BIT && TARGET_ZBA - && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))" + && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3])) + /* Ensure the mask includes all the bits in SImode. */ + && ((INTVAL (operands[3]) & (HOST_WIDE_INT_1U << 31)) != 0)" [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4))) (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))]) diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h index 2dc7055..217e0ac 100644 --- a/gcc/config/riscv/freebsd.h +++ b/gcc/config/riscv/freebsd.h @@ -42,7 +42,7 @@ along with GCC; see the file COPYING3. If not see #define LINK_SPEC " \ -melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv \ %{p:%nconsider using `-pg' instead of `-p' with gprof (1)} \ - " FBSD_LINK_PG_NOTES " \ + " FBSD_LINK_PG_NOTE " \ %{v:-V} \ %{assert*} %{R*} %{rpath*} %{defsym*} \ -X \ diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 287eb3e..aae2d27 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -1782,13 +1782,15 @@ get_vlmul (machine_mode mode) int inner_size = GET_MODE_BITSIZE (GET_MODE_INNER (mode)); if (size < TARGET_MIN_VLEN) { + /* Follow rule LMUL >= SEW / ELEN. */ + int elen = TARGET_VECTOR_ELEN_64 ? 1 : 2; int factor = TARGET_MIN_VLEN / size; if (inner_size == 8) - factor = MIN (factor, 8); + factor = MIN (factor, 8 / elen); else if (inner_size == 16) - factor = MIN (factor, 4); + factor = MIN (factor, 4 / elen); else if (inner_size == 32) - factor = MIN (factor, 2); + factor = MIN (factor, 2 / elen); else if (inner_size == 64) factor = MIN (factor, 1); else diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def index 6b98b93..857b637 100644 --- a/gcc/config/riscv/riscv-vector-builtins-types.def +++ b/gcc/config/riscv/riscv-vector-builtins-types.def @@ -369,20 +369,20 @@ along with GCC; see the file COPYING3. If not see #define DEF_RVV_XFQF_OPS(TYPE, REQUIRE) #endif -DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_I_OPS (vint8mf4_t, 0) DEF_RVV_I_OPS (vint8mf2_t, 0) DEF_RVV_I_OPS (vint8m1_t, 0) DEF_RVV_I_OPS (vint8m2_t, 0) DEF_RVV_I_OPS (vint8m4_t, 0) DEF_RVV_I_OPS (vint8m8_t, 0) -DEF_RVV_I_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_I_OPS (vint16mf2_t, 0) DEF_RVV_I_OPS (vint16m1_t, 0) DEF_RVV_I_OPS (vint16m2_t, 0) DEF_RVV_I_OPS (vint16m4_t, 0) DEF_RVV_I_OPS (vint16m8_t, 0) -DEF_RVV_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_I_OPS (vint32m1_t, 0) DEF_RVV_I_OPS (vint32m2_t, 0) DEF_RVV_I_OPS (vint32m4_t, 0) @@ -392,20 +392,20 @@ DEF_RVV_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_U_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_U_OPS (vuint8mf4_t, 0) DEF_RVV_U_OPS (vuint8mf2_t, 0) DEF_RVV_U_OPS (vuint8m1_t, 0) DEF_RVV_U_OPS (vuint8m2_t, 0) DEF_RVV_U_OPS (vuint8m4_t, 0) DEF_RVV_U_OPS (vuint8m8_t, 0) -DEF_RVV_U_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_U_OPS (vuint16mf2_t, 0) DEF_RVV_U_OPS (vuint16m1_t, 0) DEF_RVV_U_OPS (vuint16m2_t, 0) DEF_RVV_U_OPS (vuint16m4_t, 0) DEF_RVV_U_OPS (vuint16m8_t, 0) -DEF_RVV_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_U_OPS (vuint32m1_t, 0) DEF_RVV_U_OPS (vuint32m2_t, 0) DEF_RVV_U_OPS (vuint32m4_t, 0) @@ -415,21 +415,21 @@ DEF_RVV_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_F_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_F_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_F_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_F_OPS (vbfloat16m1_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_F_OPS (vbfloat16m2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_F_OPS (vbfloat16m4_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_F_OPS (vbfloat16m8_t, RVV_REQUIRE_ELEN_BF_16) -DEF_RVV_F_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_F_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_F_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_F_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_F_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_F_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_F_OPS (vfloat16m8_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_F_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_F_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_F_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32) @@ -439,7 +439,7 @@ DEF_RVV_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_F_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_B_OPS (vbool64_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_B_OPS (vbool64_t, RVV_REQUIRE_ELEN_64) DEF_RVV_B_OPS (vbool32_t, 0) DEF_RVV_B_OPS (vbool16_t, 0) DEF_RVV_B_OPS (vbool8_t, 0) @@ -447,13 +447,13 @@ DEF_RVV_B_OPS (vbool4_t, 0) DEF_RVV_B_OPS (vbool2_t, 0) DEF_RVV_B_OPS (vbool1_t, 0) -DEF_RVV_WEXTI_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WEXTI_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTI_OPS (vint16mf2_t, 0) DEF_RVV_WEXTI_OPS (vint16m1_t, 0) DEF_RVV_WEXTI_OPS (vint16m2_t, 0) DEF_RVV_WEXTI_OPS (vint16m4_t, 0) DEF_RVV_WEXTI_OPS (vint16m8_t, 0) -DEF_RVV_WEXTI_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WEXTI_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTI_OPS (vint32m1_t, 0) DEF_RVV_WEXTI_OPS (vint32m2_t, 0) DEF_RVV_WEXTI_OPS (vint32m4_t, 0) @@ -463,7 +463,7 @@ DEF_RVV_WEXTI_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTI_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTI_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_QEXTI_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_QEXTI_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_QEXTI_OPS (vint32m1_t, 0) DEF_RVV_QEXTI_OPS (vint32m2_t, 0) DEF_RVV_QEXTI_OPS (vint32m4_t, 0) @@ -478,13 +478,13 @@ DEF_RVV_OEXTI_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_OEXTI_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_OEXTI_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_WEXTU_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WEXTU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTU_OPS (vuint16mf2_t, 0) DEF_RVV_WEXTU_OPS (vuint16m1_t, 0) DEF_RVV_WEXTU_OPS (vuint16m2_t, 0) DEF_RVV_WEXTU_OPS (vuint16m4_t, 0) DEF_RVV_WEXTU_OPS (vuint16m8_t, 0) -DEF_RVV_WEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTU_OPS (vuint32m1_t, 0) DEF_RVV_WEXTU_OPS (vuint32m2_t, 0) DEF_RVV_WEXTU_OPS (vuint32m4_t, 0) @@ -494,7 +494,7 @@ DEF_RVV_WEXTU_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTU_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTU_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_QEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_QEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_QEXTU_OPS (vuint32m1_t, 0) DEF_RVV_QEXTU_OPS (vuint32m2_t, 0) DEF_RVV_QEXTU_OPS (vuint32m4_t, 0) @@ -509,20 +509,20 @@ DEF_RVV_OEXTU_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_OEXTU_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_OEXTU_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_FULL_V_I_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_FULL_V_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_FULL_V_I_OPS (vint8mf4_t, 0) DEF_RVV_FULL_V_I_OPS (vint8mf2_t, 0) DEF_RVV_FULL_V_I_OPS (vint8m1_t, 0) DEF_RVV_FULL_V_I_OPS (vint8m2_t, 0) DEF_RVV_FULL_V_I_OPS (vint8m4_t, 0) DEF_RVV_FULL_V_I_OPS (vint8m8_t, 0) -DEF_RVV_FULL_V_I_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_FULL_V_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_FULL_V_I_OPS (vint16mf2_t, 0) DEF_RVV_FULL_V_I_OPS (vint16m1_t, 0) DEF_RVV_FULL_V_I_OPS (vint16m2_t, 0) DEF_RVV_FULL_V_I_OPS (vint16m4_t, 0) DEF_RVV_FULL_V_I_OPS (vint16m8_t, 0) -DEF_RVV_FULL_V_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_FULL_V_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_FULL_V_I_OPS (vint32m1_t, 0) DEF_RVV_FULL_V_I_OPS (vint32m2_t, 0) DEF_RVV_FULL_V_I_OPS (vint32m4_t, 0) @@ -532,20 +532,20 @@ DEF_RVV_FULL_V_I_OPS (vint64m2_t, RVV_REQUIRE_FULL_V) DEF_RVV_FULL_V_I_OPS (vint64m4_t, RVV_REQUIRE_FULL_V) DEF_RVV_FULL_V_I_OPS (vint64m8_t, RVV_REQUIRE_FULL_V) -DEF_RVV_FULL_V_U_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_FULL_V_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_FULL_V_U_OPS (vuint8mf4_t, 0) DEF_RVV_FULL_V_U_OPS (vuint8mf2_t, 0) DEF_RVV_FULL_V_U_OPS (vuint8m1_t, 0) DEF_RVV_FULL_V_U_OPS (vuint8m2_t, 0) DEF_RVV_FULL_V_U_OPS (vuint8m4_t, 0) DEF_RVV_FULL_V_U_OPS (vuint8m8_t, 0) -DEF_RVV_FULL_V_U_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_FULL_V_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_FULL_V_U_OPS (vuint16mf2_t, 0) DEF_RVV_FULL_V_U_OPS (vuint16m1_t, 0) DEF_RVV_FULL_V_U_OPS (vuint16m2_t, 0) DEF_RVV_FULL_V_U_OPS (vuint16m4_t, 0) DEF_RVV_FULL_V_U_OPS (vuint16m8_t, 0) -DEF_RVV_FULL_V_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_FULL_V_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_FULL_V_U_OPS (vuint32m1_t, 0) DEF_RVV_FULL_V_U_OPS (vuint32m2_t, 0) DEF_RVV_FULL_V_U_OPS (vuint32m4_t, 0) @@ -555,7 +555,7 @@ DEF_RVV_FULL_V_U_OPS (vuint64m2_t, RVV_REQUIRE_FULL_V) DEF_RVV_FULL_V_U_OPS (vuint64m4_t, RVV_REQUIRE_FULL_V) DEF_RVV_FULL_V_U_OPS (vuint64m8_t, RVV_REQUIRE_FULL_V) -DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_WEXTF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WEXTF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WEXTF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_16) @@ -566,14 +566,14 @@ DEF_RVV_WEXTF_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_WEXTF_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_WEXTF_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_I_OPS (vint16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_I_OPS (vint16m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_I_OPS (vint16m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_I_OPS (vint16m4_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_I_OPS (vint16m8_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_I_OPS (vint32m1_t, 0) DEF_RVV_CONVERT_I_OPS (vint32m2_t, 0) DEF_RVV_CONVERT_I_OPS (vint32m4_t, 0) @@ -583,14 +583,14 @@ DEF_RVV_CONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_U_OPS (vuint16m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_U_OPS (vuint16m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_U_OPS (vuint16m4_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_CONVERT_U_OPS (vuint16m8_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_CONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_CONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_U_OPS (vuint32m1_t, 0) DEF_RVV_CONVERT_U_OPS (vuint32m2_t, 0) DEF_RVV_CONVERT_U_OPS (vuint32m4_t, 0) @@ -600,7 +600,7 @@ DEF_RVV_CONVERT_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_CONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_WCONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WCONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_WCONVERT_I_OPS (vint32m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WCONVERT_I_OPS (vint32m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WCONVERT_I_OPS (vint32m4_t, RVV_REQUIRE_ELEN_FP_16) @@ -611,7 +611,7 @@ DEF_RVV_WCONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64 DEF_RVV_WCONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_WCONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) -DEF_RVV_WCONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WCONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_WCONVERT_U_OPS (vuint32m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WCONVERT_U_OPS (vuint32m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WCONVERT_U_OPS (vuint32m4_t, RVV_REQUIRE_ELEN_FP_16) @@ -622,7 +622,7 @@ DEF_RVV_WCONVERT_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_6 DEF_RVV_WCONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_WCONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) -DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_WCONVERT_F_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WCONVERT_F_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WCONVERT_F_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32) @@ -633,76 +633,76 @@ DEF_RVV_WCONVERT_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_WCONVERT_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_WCONVERT_F_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_F32_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_F32_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_F32_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_F32_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_F32_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_F32_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32) -DEF_RVV_WI_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WI_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WI_OPS (vint8mf4_t, 0) DEF_RVV_WI_OPS (vint8mf2_t, 0) DEF_RVV_WI_OPS (vint8m1_t, 0) DEF_RVV_WI_OPS (vint8m2_t, 0) DEF_RVV_WI_OPS (vint8m4_t, 0) DEF_RVV_WI_OPS (vint8m8_t, 0) -DEF_RVV_WI_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WI_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WI_OPS (vint16mf2_t, 0) DEF_RVV_WI_OPS (vint16m1_t, 0) DEF_RVV_WI_OPS (vint16m2_t, 0) DEF_RVV_WI_OPS (vint16m4_t, 0) DEF_RVV_WI_OPS (vint16m8_t, 0) -DEF_RVV_WI_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WI_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WI_OPS (vint32m1_t, 0) DEF_RVV_WI_OPS (vint32m2_t, 0) DEF_RVV_WI_OPS (vint32m4_t, 0) DEF_RVV_WI_OPS (vint32m8_t, 0) -DEF_RVV_WU_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WU_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WU_OPS (vuint8mf4_t, 0) DEF_RVV_WU_OPS (vuint8mf2_t, 0) DEF_RVV_WU_OPS (vuint8m1_t, 0) DEF_RVV_WU_OPS (vuint8m2_t, 0) DEF_RVV_WU_OPS (vuint8m4_t, 0) DEF_RVV_WU_OPS (vuint8m8_t, 0) -DEF_RVV_WU_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WU_OPS (vuint16mf2_t, 0) DEF_RVV_WU_OPS (vuint16m1_t, 0) DEF_RVV_WU_OPS (vuint16m2_t, 0) DEF_RVV_WU_OPS (vuint16m4_t, 0) DEF_RVV_WU_OPS (vuint16m8_t, 0) -DEF_RVV_WU_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_WU_OPS (vuint32m1_t, 0) DEF_RVV_WU_OPS (vuint32m2_t, 0) DEF_RVV_WU_OPS (vuint32m4_t, 0) DEF_RVV_WU_OPS (vuint32m8_t, 0) -DEF_RVV_WF_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WF_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_WF_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WF_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WF_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WF_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_WF_OPS (vfloat16m8_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_WF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_WF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_WF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_WF_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32) -DEF_RVV_EI16_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vint8mf4_t, 0) DEF_RVV_EI16_OPS (vint8mf2_t, 0) DEF_RVV_EI16_OPS (vint8m1_t, 0) DEF_RVV_EI16_OPS (vint8m2_t, 0) DEF_RVV_EI16_OPS (vint8m4_t, 0) -DEF_RVV_EI16_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vint16mf2_t, 0) DEF_RVV_EI16_OPS (vint16m1_t, 0) DEF_RVV_EI16_OPS (vint16m2_t, 0) DEF_RVV_EI16_OPS (vint16m4_t, 0) DEF_RVV_EI16_OPS (vint16m8_t, 0) -DEF_RVV_EI16_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vint32m1_t, 0) DEF_RVV_EI16_OPS (vint32m2_t, 0) DEF_RVV_EI16_OPS (vint32m4_t, 0) @@ -711,19 +711,19 @@ DEF_RVV_EI16_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_EI16_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vuint8mf4_t, 0) DEF_RVV_EI16_OPS (vuint8mf2_t, 0) DEF_RVV_EI16_OPS (vuint8m1_t, 0) DEF_RVV_EI16_OPS (vuint8m2_t, 0) DEF_RVV_EI16_OPS (vuint8m4_t, 0) -DEF_RVV_EI16_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vuint16mf2_t, 0) DEF_RVV_EI16_OPS (vuint16m1_t, 0) DEF_RVV_EI16_OPS (vuint16m2_t, 0) DEF_RVV_EI16_OPS (vuint16m4_t, 0) DEF_RVV_EI16_OPS (vuint16m8_t, 0) -DEF_RVV_EI16_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vuint32m1_t, 0) DEF_RVV_EI16_OPS (vuint32m2_t, 0) DEF_RVV_EI16_OPS (vuint32m4_t, 0) @@ -733,14 +733,14 @@ DEF_RVV_EI16_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_EI16_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_EI16_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_EI16_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_EI16_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_EI16_OPS (vfloat16m8_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_EI16_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EI16_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_EI16_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_EI16_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_EI16_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32) @@ -751,13 +751,13 @@ DEF_RVV_EI16_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_EI16_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_EI16_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_EEW8_INTERPRET_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EEW8_INTERPRET_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW8_INTERPRET_OPS (vint16mf2_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vint16m1_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vint16m2_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vint16m4_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vint16m8_t, 0) -DEF_RVV_EEW8_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EEW8_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW8_INTERPRET_OPS (vint32m1_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vint32m2_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vint32m4_t, 0) @@ -766,13 +766,13 @@ DEF_RVV_EEW8_INTERPRET_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW8_INTERPRET_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW8_INTERPRET_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW8_INTERPRET_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_EEW8_INTERPRET_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EEW8_INTERPRET_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW8_INTERPRET_OPS (vuint16mf2_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vuint16m1_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vuint16m2_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vuint16m4_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vuint16m8_t, 0) -DEF_RVV_EEW8_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EEW8_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW8_INTERPRET_OPS (vuint32m1_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vuint32m2_t, 0) DEF_RVV_EEW8_INTERPRET_OPS (vuint32m4_t, 0) @@ -788,7 +788,7 @@ DEF_RVV_EEW16_INTERPRET_OPS (vint8m1_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vint8m2_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vint8m4_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vint8m8_t, 0) -DEF_RVV_EEW16_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EEW16_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW16_INTERPRET_OPS (vint32m1_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vint32m2_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vint32m4_t, 0) @@ -803,7 +803,7 @@ DEF_RVV_EEW16_INTERPRET_OPS (vuint8m1_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vuint8m2_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vuint8m4_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vuint8m8_t, 0) -DEF_RVV_EEW16_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_EEW16_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_EEW16_INTERPRET_OPS (vuint32m1_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vuint32m2_t, 0) DEF_RVV_EEW16_INTERPRET_OPS (vuint32m4_t, 0) @@ -994,53 +994,53 @@ DEF_RVV_UNSIGNED_EEW64_LMUL1_INTERPRET_OPS(vbool16_t, 0) DEF_RVV_UNSIGNED_EEW64_LMUL1_INTERPRET_OPS(vbool32_t, 0) DEF_RVV_UNSIGNED_EEW64_LMUL1_INTERPRET_OPS(vbool64_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf4_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint8m1_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint8m2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint8m4_t, 0) -DEF_RVV_X2_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vint16mf2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint16m1_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint16m2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint16m4_t, 0) -DEF_RVV_X2_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vint32m1_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint32m2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint32m4_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf4_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint8m1_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint8m2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint8m4_t, 0) -DEF_RVV_X2_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vuint16mf2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint16m1_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint16m2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint16m4_t, 0) -DEF_RVV_X2_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vuint32m1_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint32m2_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint32m4_t, 0) DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16m1_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16m2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16m4_t, RVV_REQUIRE_ELEN_BF_16) -DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32) @@ -1048,107 +1048,107 @@ DEF_RVV_X2_VLMUL_EXT_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_X2_VLMUL_EXT_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf4_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf2_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vint8m1_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vint8m2_t, 0) -DEF_RVV_X4_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vint16mf2_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vint16m1_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vint16m2_t, 0) -DEF_RVV_X4_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vint32m1_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vint32m2_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf4_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf2_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vuint8m1_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vuint8m2_t, 0) -DEF_RVV_X4_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vuint16mf2_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vuint16m1_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vuint16m2_t, 0) -DEF_RVV_X4_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vuint32m1_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vuint32m2_t, 0) DEF_RVV_X4_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16m1_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16m2_t, RVV_REQUIRE_ELEN_BF_16) -DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_X4_VLMUL_EXT_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_X4_VLMUL_EXT_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf4_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf2_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vint8m1_t, 0) -DEF_RVV_X8_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vint16mf2_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vint16m1_t, 0) -DEF_RVV_X8_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vint32m1_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf4_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf2_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vuint8m1_t, 0) -DEF_RVV_X8_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vuint16mf2_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vuint16m1_t, 0) -DEF_RVV_X8_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vuint32m1_t, 0) DEF_RVV_X8_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16m1_t, RVV_REQUIRE_ELEN_BF_16) -DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_X8_VLMUL_EXT_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf4_t, 0) DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf2_t, 0) -DEF_RVV_X16_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X16_VLMUL_EXT_OPS (vint16mf2_t, 0) -DEF_RVV_X16_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf4_t, 0) DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf2_t, 0) -DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf2_t, 0) -DEF_RVV_X16_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_X16_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X16_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16) -DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_X16_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X16_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) -DEF_RVV_X32_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X32_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X32_VLMUL_EXT_OPS (vint8mf4_t, 0) -DEF_RVV_X32_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X32_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf4_t, 0) -DEF_RVV_X32_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_X32_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_X32_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X32_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X32_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_X32_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) -DEF_RVV_X64_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_X64_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_X64_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_X64_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_LMUL1_OPS (vint8m1_t, 0) DEF_RVV_LMUL1_OPS (vint16m1_t, 0) @@ -1189,20 +1189,20 @@ DEF_RVV_LMUL4_OPS (vbfloat16m4_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_LMUL4_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_LMUL4_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_TUPLE_OPS (vint8mf8x2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint8mf8x2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint8mf8x3_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint8mf8x3_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint8mf8x4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint8mf8x4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint8mf8x5_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint8mf8x5_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint8mf8x6_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint8mf8x6_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint8mf8x7_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint8mf8x7_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint8mf8x8_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint8mf8x8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_TUPLE_OPS (vint8mf8x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint8mf8x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint8mf8x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint8mf8x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint8mf8x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint8mf8x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint8mf8x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint8mf8x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint8mf8x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint8mf8x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint8mf8x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint8mf8x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint8mf8x8_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint8mf8x8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vint8mf4x2_t, 0) DEF_RVV_TUPLE_OPS (vuint8mf4x2_t, 0) DEF_RVV_TUPLE_OPS (vint8mf4x3_t, 0) @@ -1253,20 +1253,20 @@ DEF_RVV_TUPLE_OPS (vint8m2x4_t, 0) DEF_RVV_TUPLE_OPS (vuint8m2x4_t, 0) DEF_RVV_TUPLE_OPS (vint8m4x2_t, 0) DEF_RVV_TUPLE_OPS (vuint8m4x2_t, 0) -DEF_RVV_TUPLE_OPS (vint16mf4x2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint16mf4x2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint16mf4x3_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint16mf4x3_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint16mf4x4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint16mf4x4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint16mf4x5_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint16mf4x5_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint16mf4x6_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint16mf4x6_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint16mf4x7_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint16mf4x7_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint16mf4x8_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint16mf4x8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_TUPLE_OPS (vint16mf4x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint16mf4x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint16mf4x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint16mf4x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint16mf4x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint16mf4x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint16mf4x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint16mf4x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint16mf4x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint16mf4x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint16mf4x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint16mf4x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint16mf4x8_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint16mf4x8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vint16mf2x2_t, 0) DEF_RVV_TUPLE_OPS (vuint16mf2x2_t, 0) DEF_RVV_TUPLE_OPS (vint16mf2x3_t, 0) @@ -1303,20 +1303,20 @@ DEF_RVV_TUPLE_OPS (vint16m2x4_t, 0) DEF_RVV_TUPLE_OPS (vuint16m2x4_t, 0) DEF_RVV_TUPLE_OPS (vint16m4x2_t, 0) DEF_RVV_TUPLE_OPS (vuint16m4x2_t, 0) -DEF_RVV_TUPLE_OPS (vint32mf2x2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint32mf2x2_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint32mf2x3_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint32mf2x3_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint32mf2x4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint32mf2x4_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint32mf2x5_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint32mf2x5_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint32mf2x6_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint32mf2x6_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint32mf2x7_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint32mf2x7_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vint32mf2x8_t, RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vuint32mf2x8_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_TUPLE_OPS (vint32mf2x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint32mf2x2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint32mf2x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint32mf2x3_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint32mf2x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint32mf2x4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint32mf2x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint32mf2x5_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint32mf2x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint32mf2x6_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint32mf2x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint32mf2x7_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vint32mf2x8_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vuint32mf2x8_t, RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vint32m1x2_t, 0) DEF_RVV_TUPLE_OPS (vuint32m1x2_t, 0) DEF_RVV_TUPLE_OPS (vint32m1x3_t, 0) @@ -1361,13 +1361,13 @@ DEF_RVV_TUPLE_OPS (vint64m2x4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vuint64m2x4_t, RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vint64m4x2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vuint64m4x2_t, RVV_REQUIRE_ELEN_64) -DEF_RVV_TUPLE_OPS (vbfloat16mf4x2_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vbfloat16mf4x3_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vbfloat16mf4x4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vbfloat16mf4x5_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vbfloat16mf4x6_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vbfloat16mf4x7_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vbfloat16mf4x8_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_TUPLE_OPS (vbfloat16mf4x2_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vbfloat16mf4x3_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vbfloat16mf4x4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vbfloat16mf4x5_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vbfloat16mf4x6_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vbfloat16mf4x7_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vbfloat16mf4x8_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vbfloat16mf2x2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_TUPLE_OPS (vbfloat16mf2x3_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_TUPLE_OPS (vbfloat16mf2x4_t, RVV_REQUIRE_ELEN_BF_16) @@ -1386,13 +1386,13 @@ DEF_RVV_TUPLE_OPS (vbfloat16m2x2_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_TUPLE_OPS (vbfloat16m2x3_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_TUPLE_OPS (vbfloat16m2x4_t, RVV_REQUIRE_ELEN_BF_16) DEF_RVV_TUPLE_OPS (vbfloat16m4x2_t, RVV_REQUIRE_ELEN_BF_16) -DEF_RVV_TUPLE_OPS (vfloat16mf4x2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat16mf4x3_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat16mf4x4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat16mf4x5_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat16mf4x6_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat16mf4x7_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat16mf4x8_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_TUPLE_OPS (vfloat16mf4x2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat16mf4x3_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat16mf4x4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat16mf4x5_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat16mf4x6_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat16mf4x7_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat16mf4x8_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vfloat16mf2x2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_TUPLE_OPS (vfloat16mf2x3_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_TUPLE_OPS (vfloat16mf2x4_t, RVV_REQUIRE_ELEN_FP_16) @@ -1411,13 +1411,13 @@ DEF_RVV_TUPLE_OPS (vfloat16m2x2_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_TUPLE_OPS (vfloat16m2x3_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_TUPLE_OPS (vfloat16m2x4_t, RVV_REQUIRE_ELEN_FP_16) DEF_RVV_TUPLE_OPS (vfloat16m4x2_t, RVV_REQUIRE_ELEN_FP_16) -DEF_RVV_TUPLE_OPS (vfloat32mf2x2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat32mf2x3_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat32mf2x4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat32mf2x5_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat32mf2x6_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat32mf2x7_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) -DEF_RVV_TUPLE_OPS (vfloat32mf2x8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_TUPLE_OPS (vfloat32mf2x2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat32mf2x3_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat32mf2x4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat32mf2x5_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat32mf2x6_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat32mf2x7_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) +DEF_RVV_TUPLE_OPS (vfloat32mf2x8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64) DEF_RVV_TUPLE_OPS (vfloat32m1x2_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_TUPLE_OPS (vfloat32m1x3_t, RVV_REQUIRE_ELEN_FP_32) DEF_RVV_TUPLE_OPS (vfloat32m1x4_t, RVV_REQUIRE_ELEN_FP_32) @@ -1441,7 +1441,7 @@ DEF_RVV_TUPLE_OPS (vfloat64m2x3_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_TUPLE_OPS (vfloat64m2x4_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_TUPLE_OPS (vfloat64m4x2_t, RVV_REQUIRE_ELEN_FP_64) -DEF_RVV_CRYPTO_SEW32_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_CRYPTO_SEW32_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64) DEF_RVV_CRYPTO_SEW32_OPS (vuint32m1_t, 0) DEF_RVV_CRYPTO_SEW32_OPS (vuint32m2_t, 0) DEF_RVV_CRYPTO_SEW32_OPS (vuint32m4_t, 0) diff --git a/gcc/config/riscv/riscv-vector-switch.def b/gcc/config/riscv/riscv-vector-switch.def index 23744d0..1b0d619 100644 --- a/gcc/config/riscv/riscv-vector-switch.def +++ b/gcc/config/riscv/riscv-vector-switch.def @@ -64,13 +64,13 @@ Encode the ratio of SEW/LMUL into the mask types. |BI |RVVM1BI|RVVMF2BI|RVVMF4BI|RVVMF8BI|RVVMF16BI|RVVMF32BI|RVVMF64BI| */ /* Return 'REQUIREMENT' for machine_mode 'MODE'. - For example: 'MODE' = RVVMF64BImode needs TARGET_MIN_VLEN > 32. */ + For example: 'MODE' = RVVMF64BImode needs TARGET_VECTOR_ELEN_64. */ #ifndef ENTRY #define ENTRY(MODE, REQUIREMENT, VLMUL, RATIO) #endif /* Disable modes if TARGET_MIN_VLEN == 32. */ -ENTRY (RVVMF64BI, TARGET_MIN_VLEN > 32, TARGET_XTHEADVECTOR ? LMUL_1 :LMUL_F8, 64) +ENTRY (RVVMF64BI, TARGET_VECTOR_ELEN_64, TARGET_XTHEADVECTOR ? LMUL_1 :LMUL_F8, 64) ENTRY (RVVMF32BI, true, TARGET_XTHEADVECTOR ? LMUL_1 :LMUL_F4, 32) ENTRY (RVVMF16BI, true, TARGET_XTHEADVECTOR ? LMUL_1 : LMUL_F2 , 16) ENTRY (RVVMF8BI, true, LMUL_1, 8) @@ -85,7 +85,7 @@ ENTRY (RVVM2QI, true, LMUL_2, 4) ENTRY (RVVM1QI, true, LMUL_1, 8) ENTRY (RVVMF2QI, !TARGET_XTHEADVECTOR, LMUL_F2, 16) ENTRY (RVVMF4QI, !TARGET_XTHEADVECTOR, LMUL_F4, 32) -ENTRY (RVVMF8QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F8, 64) +ENTRY (RVVMF8QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F8, 64) /* Disable modes if TARGET_MIN_VLEN == 32. */ ENTRY (RVVM8HI, true, LMUL_8, 2) @@ -93,7 +93,7 @@ ENTRY (RVVM4HI, true, LMUL_4, 4) ENTRY (RVVM2HI, true, LMUL_2, 8) ENTRY (RVVM1HI, true, LMUL_1, 16) ENTRY (RVVMF2HI, !TARGET_XTHEADVECTOR, LMUL_F2, 32) -ENTRY (RVVMF4HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F4, 64) +ENTRY (RVVMF4HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F4, 64) /* Disable modes if TARGET_MIN_VLEN == 32 or !TARGET_VECTOR_ELEN_BF_16. */ ENTRY (RVVM8BF, TARGET_VECTOR_ELEN_BF_16, LMUL_8, 2) @@ -109,21 +109,21 @@ ENTRY (RVVM4HF, TARGET_VECTOR_ELEN_FP_16, LMUL_4, 4) ENTRY (RVVM2HF, TARGET_VECTOR_ELEN_FP_16, LMUL_2, 8) ENTRY (RVVM1HF, TARGET_VECTOR_ELEN_FP_16, LMUL_1, 16) ENTRY (RVVMF2HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, LMUL_F2, 32) -ENTRY (RVVMF4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F4, 64) +ENTRY (RVVMF4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F4, 64) /* Disable modes if TARGET_MIN_VLEN == 32. */ ENTRY (RVVM8SI, true, LMUL_8, 4) ENTRY (RVVM4SI, true, LMUL_4, 8) ENTRY (RVVM2SI, true, LMUL_2, 16) ENTRY (RVVM1SI, true, LMUL_1, 32) -ENTRY (RVVMF2SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F2, 64) +ENTRY (RVVMF2SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F2, 64) /* Disable modes if TARGET_MIN_VLEN == 32 or !TARGET_VECTOR_ELEN_FP_32. */ ENTRY (RVVM8SF, TARGET_VECTOR_ELEN_FP_32, LMUL_8, 4) ENTRY (RVVM4SF, TARGET_VECTOR_ELEN_FP_32, LMUL_4, 8) ENTRY (RVVM2SF, TARGET_VECTOR_ELEN_FP_32, LMUL_2, 16) ENTRY (RVVM1SF, TARGET_VECTOR_ELEN_FP_32, LMUL_1, 32) -ENTRY (RVVMF2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F2, 64) +ENTRY (RVVMF2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F2, 64) /* Disable modes if !TARGET_VECTOR_ELEN_64. */ ENTRY (RVVM8DI, TARGET_VECTOR_ELEN_64, LMUL_8, 8) @@ -152,61 +152,61 @@ ENTRY (RVVM1DF, TARGET_VECTOR_ELEN_FP_64, LMUL_1, 64) TUPLE_ENTRY (RVVM1x8QI, true, RVVM1QI, 8, LMUL_1, 8) TUPLE_ENTRY (RVVMF2x8QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 8, LMUL_F2, 16) TUPLE_ENTRY (RVVMF4x8QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 8, LMUL_F4, 32) -TUPLE_ENTRY (RVVMF8x8QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 8, LMUL_F8, 64) +TUPLE_ENTRY (RVVMF8x8QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 8, LMUL_F8, 64) TUPLE_ENTRY (RVVM1x7QI, true, RVVM1QI, 7, LMUL_1, 8) TUPLE_ENTRY (RVVMF2x7QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 7, LMUL_F2, 16) TUPLE_ENTRY (RVVMF4x7QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 7, LMUL_F4, 32) -TUPLE_ENTRY (RVVMF8x7QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 7, LMUL_F8, 64) +TUPLE_ENTRY (RVVMF8x7QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 7, LMUL_F8, 64) TUPLE_ENTRY (RVVM1x6QI, true, RVVM1QI, 6, LMUL_1, 8) TUPLE_ENTRY (RVVMF2x6QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 6, LMUL_F2, 16) TUPLE_ENTRY (RVVMF4x6QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 6, LMUL_F4, 32) -TUPLE_ENTRY (RVVMF8x6QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 6, LMUL_F8, 64) +TUPLE_ENTRY (RVVMF8x6QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 6, LMUL_F8, 64) TUPLE_ENTRY (RVVM1x5QI, true, RVVM1QI, 5, LMUL_1, 8) TUPLE_ENTRY (RVVMF2x5QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 5, LMUL_F2, 16) TUPLE_ENTRY (RVVMF4x5QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 5, LMUL_F4, 32) -TUPLE_ENTRY (RVVMF8x5QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 5, LMUL_F8, 64) +TUPLE_ENTRY (RVVMF8x5QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 5, LMUL_F8, 64) TUPLE_ENTRY (RVVM2x4QI, true, RVVM2QI, 4, LMUL_2, 4) TUPLE_ENTRY (RVVM1x4QI, true, RVVM1QI, 4, LMUL_1, 8) TUPLE_ENTRY (RVVMF2x4QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 4, LMUL_F2, 16) TUPLE_ENTRY (RVVMF4x4QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 4, LMUL_F4, 32) -TUPLE_ENTRY (RVVMF8x4QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 4, LMUL_F8, 64) +TUPLE_ENTRY (RVVMF8x4QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 4, LMUL_F8, 64) TUPLE_ENTRY (RVVM2x3QI, true, RVVM2QI, 3, LMUL_2, 4) TUPLE_ENTRY (RVVM1x3QI, true, RVVM1QI, 3, LMUL_1, 8) TUPLE_ENTRY (RVVMF2x3QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 3, LMUL_F2, 16) TUPLE_ENTRY (RVVMF4x3QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 3, LMUL_F4, 32) -TUPLE_ENTRY (RVVMF8x3QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 3, LMUL_F8, 64) +TUPLE_ENTRY (RVVMF8x3QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 3, LMUL_F8, 64) TUPLE_ENTRY (RVVM4x2QI, true, RVVM4QI, 2, LMUL_4, 2) TUPLE_ENTRY (RVVM2x2QI, true, RVVM2QI, 2, LMUL_2, 4) TUPLE_ENTRY (RVVM1x2QI, true, RVVM1QI, 2, LMUL_1, 8) TUPLE_ENTRY (RVVMF2x2QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 2, LMUL_F2, 16) TUPLE_ENTRY (RVVMF4x2QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 2, LMUL_F4, 32) -TUPLE_ENTRY (RVVMF8x2QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 2, LMUL_F8, 64) +TUPLE_ENTRY (RVVMF8x2QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 2, LMUL_F8, 64) TUPLE_ENTRY (RVVM1x8HI, true, RVVM1HI, 8, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x8HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 8, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x8HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 8, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x8HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 8, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x7HI, true, RVVM1HI, 7, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x7HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 7, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x7HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 7, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x7HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 7, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x6HI, true, RVVM1HI, 6, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x6HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 6, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x6HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 6, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x6HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 6, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x5HI, true, RVVM1HI, 5, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x5HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 5, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x5HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 5, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x5HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 5, LMUL_F4, 64) TUPLE_ENTRY (RVVM2x4HI, true, RVVM2HI, 4, LMUL_2, 8) TUPLE_ENTRY (RVVM1x4HI, true, RVVM1HI, 4, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x4HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 4, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x4HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 4, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x4HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 4, LMUL_F4, 64) TUPLE_ENTRY (RVVM2x3HI, true, RVVM2HI, 3, LMUL_2, 8) TUPLE_ENTRY (RVVM1x3HI, true, RVVM1HI, 3, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x3HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 3, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x3HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 3, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x3HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 3, LMUL_F4, 64) TUPLE_ENTRY (RVVM4x2HI, true, RVVM4HI, 2, LMUL_4, 4) TUPLE_ENTRY (RVVM2x2HI, true, RVVM2HI, 2, LMUL_2, 8) TUPLE_ENTRY (RVVM1x2HI, true, RVVM1HI, 2, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x2HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 2, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x2HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 2, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x2HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 2, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x8BF, TARGET_VECTOR_ELEN_BF_16, RVVM1BF, 8, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x8BF, TARGET_VECTOR_ELEN_BF_16, RVVMF2BF, 8, LMUL_F2, 32) @@ -236,67 +236,67 @@ TUPLE_ENTRY (RVVMF4x2BF, TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32, RVVMF TUPLE_ENTRY (RVVM1x8HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 8, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x8HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 8, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 8, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 8, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x7HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 7, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x7HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 7, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x7HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 7, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x7HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 7, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x6HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 6, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x6HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 6, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x6HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 6, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x6HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 6, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x5HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 5, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x5HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 5, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x5HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 5, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x5HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 5, LMUL_F4, 64) TUPLE_ENTRY (RVVM2x4HF, TARGET_VECTOR_ELEN_FP_16, RVVM2HF, 4, LMUL_2, 8) TUPLE_ENTRY (RVVM1x4HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 4, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x4HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 4, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 4, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 4, LMUL_F4, 64) TUPLE_ENTRY (RVVM2x3HF, TARGET_VECTOR_ELEN_FP_16, RVVM2HF, 3, LMUL_2, 8) TUPLE_ENTRY (RVVM1x3HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 3, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x3HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 3, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x3HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 3, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x3HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 3, LMUL_F4, 64) TUPLE_ENTRY (RVVM4x2HF, TARGET_VECTOR_ELEN_FP_16, RVVM4HF, 2, LMUL_4, 4) TUPLE_ENTRY (RVVM2x2HF, TARGET_VECTOR_ELEN_FP_16, RVVM2HF, 2, LMUL_2, 8) TUPLE_ENTRY (RVVM1x2HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 2, LMUL_1, 16) TUPLE_ENTRY (RVVMF2x2HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 2, LMUL_F2, 32) -TUPLE_ENTRY (RVVMF4x2HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 2, LMUL_F4, 64) +TUPLE_ENTRY (RVVMF4x2HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 2, LMUL_F4, 64) TUPLE_ENTRY (RVVM1x8SI, true, RVVM1SI, 8, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x8SI, (TARGET_MIN_VLEN > 32) && !TARGET_XTHEADVECTOR, RVVMF2SI, 8, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x8SI, (TARGET_VECTOR_ELEN_64) && !TARGET_XTHEADVECTOR, RVVMF2SI, 8, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x7SI, true, RVVM1SI, 7, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x7SI, (TARGET_MIN_VLEN > 32) && !TARGET_XTHEADVECTOR, RVVMF2SI, 7, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x7SI, (TARGET_VECTOR_ELEN_64) && !TARGET_XTHEADVECTOR, RVVMF2SI, 7, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x6SI, true, RVVM1SI, 6, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x6SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 6, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x6SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 6, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x5SI, true, RVVM1SI, 5, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x5SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 5, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x5SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 5, LMUL_F2, 32) TUPLE_ENTRY (RVVM2x4SI, true, RVVM2SI, 4, LMUL_2, 8) TUPLE_ENTRY (RVVM1x4SI, true, RVVM1SI, 4, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x4SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 4, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x4SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 4, LMUL_F2, 32) TUPLE_ENTRY (RVVM2x3SI, true, RVVM2SI, 3, LMUL_2, 8) TUPLE_ENTRY (RVVM1x3SI, true, RVVM1SI, 3, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x3SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 3, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x3SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 3, LMUL_F2, 32) TUPLE_ENTRY (RVVM4x2SI, true, RVVM4SI, 2, LMUL_4, 4) TUPLE_ENTRY (RVVM2x2SI, true, RVVM2SI, 2, LMUL_2, 8) TUPLE_ENTRY (RVVM1x2SI, true, RVVM1SI, 2, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x2SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 2, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x2SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 2, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x8SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 8, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x8SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 8, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x8SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 8, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x7SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 7, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x7SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 7, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x7SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 7, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x6SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 6, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x6SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 6, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x6SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 6, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x5SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 5, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x5SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 5, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x5SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 5, LMUL_F2, 32) TUPLE_ENTRY (RVVM2x4SF, TARGET_VECTOR_ELEN_FP_32, RVVM2SF, 4, LMUL_2, 8) TUPLE_ENTRY (RVVM1x4SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 4, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x4SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 4, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x4SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 4, LMUL_F2, 32) TUPLE_ENTRY (RVVM2x3SF, TARGET_VECTOR_ELEN_FP_32, RVVM2SF, 3, LMUL_2, 8) TUPLE_ENTRY (RVVM1x3SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 3, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x3SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 3, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x3SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 3, LMUL_F2, 32) TUPLE_ENTRY (RVVM4x2SF, TARGET_VECTOR_ELEN_FP_32, RVVM4SF, 2, LMUL_4, 4) TUPLE_ENTRY (RVVM2x2SF, TARGET_VECTOR_ELEN_FP_32, RVVM2SF, 2, LMUL_2, 8) TUPLE_ENTRY (RVVM1x2SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 2, LMUL_1, 16) -TUPLE_ENTRY (RVVMF2x2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 2, LMUL_F2, 32) +TUPLE_ENTRY (RVVMF2x2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 2, LMUL_F2, 32) TUPLE_ENTRY (RVVM1x8DI, TARGET_VECTOR_ELEN_64, RVVM1DI, 8, LMUL_1, 16) TUPLE_ENTRY (RVVM1x7DI, TARGET_VECTOR_ELEN_64, RVVM1DI, 7, LMUL_1, 16) diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md index c1bd739..f8da71b 100644 --- a/gcc/config/riscv/vector-iterators.md +++ b/gcc/config/riscv/vector-iterators.md @@ -128,9 +128,9 @@ ;; Subset of VI with fractional LMUL types (define_mode_iterator VI_FRAC [ - RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") - RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") - (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") + RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") + (RVVMF2SI "TARGET_VECTOR_ELEN_64") ]) ;; Subset of VI with non-fractional LMUL types @@ -154,10 +154,10 @@ (define_mode_iterator VF [ (RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH") - (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") - (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64") (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64") @@ -169,16 +169,16 @@ (RVVM2BF "TARGET_VECTOR_ELEN_BF_16") (RVVM1BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") - (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32") + (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64") ]) (define_mode_iterator VF_ZVFHMIN [ (RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16") (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") - (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") - (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64") (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64") @@ -305,20 +305,20 @@ ]) (define_mode_iterator VEEWEXT2 [ - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") (RVVM8BF "TARGET_VECTOR_ELEN_BF_16") (RVVM4BF "TARGET_VECTOR_ELEN_BF_16") (RVVM2BF "TARGET_VECTOR_ELEN_BF_16") (RVVM1BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") - (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32") + (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64") (RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16") (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") - (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64") - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") - (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") @@ -328,10 +328,10 @@ ]) (define_mode_iterator VEEWEXT4 [ - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") - (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") @@ -349,68 +349,68 @@ ]) (define_mode_iterator VEEWTRUNC2 [ - RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") (RVVM4BF "TARGET_VECTOR_ELEN_BF_16") (RVVM2BF "TARGET_VECTOR_ELEN_BF_16") (RVVM1BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") - (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32") + (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16") (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") - (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64") (RVVM4SI "TARGET_64BIT") (RVVM2SI "TARGET_64BIT") (RVVM1SI "TARGET_64BIT") - (RVVMF2SI "TARGET_MIN_VLEN > 32 && TARGET_64BIT") + (RVVMF2SI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_64BIT") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_64BIT") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_64BIT") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && TARGET_64BIT") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && TARGET_64BIT") ]) (define_mode_iterator VEEWTRUNC4 [ - RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") (RVVM2HI "TARGET_64BIT") (RVVM1HI "TARGET_64BIT") (RVVMF2HI "TARGET_64BIT") - (RVVMF4HI "TARGET_MIN_VLEN > 32 && TARGET_64BIT") + (RVVMF4HI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT") (RVVM2BF "TARGET_VECTOR_ELEN_BF_16") (RVVM1BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") - (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32 && TARGET_64BIT") + (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64 && TARGET_64BIT") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT") (RVVM1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT") - (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && TARGET_64BIT") + (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && TARGET_64BIT") ]) (define_mode_iterator VEEWTRUNC8 [ (RVVM1QI "TARGET_64BIT") (RVVMF2QI "TARGET_64BIT") (RVVMF4QI "TARGET_64BIT") - (RVVMF8QI "TARGET_MIN_VLEN > 32 && TARGET_64BIT") + (RVVMF8QI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT") ]) (define_mode_iterator VEI16 [ - RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") (RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16") (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") - (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64") - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") - (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") @@ -499,11 +499,11 @@ ]) (define_mode_iterator VFULLI [ - RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM8DI "TARGET_FULL_V") (RVVM4DI "TARGET_FULL_V") (RVVM2DI "TARGET_FULL_V") (RVVM1DI "TARGET_FULL_V") @@ -556,17 +556,17 @@ ]) (define_mode_iterator VI_QH [ - RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") ]) (define_mode_iterator VI_QHS [ - RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (V1QI "riscv_vector::vls_mode_valid_p (V1QImode)") (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)") @@ -607,11 +607,11 @@ ]) (define_mode_iterator VI_QHS_NO_M8 [ - RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") - RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (V1QI "riscv_vector::vls_mode_valid_p (V1QImode)") (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)") @@ -651,10 +651,10 @@ (define_mode_iterator VF_HS [ (RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH") - (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") - (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (V1HF "riscv_vector::vls_mode_valid_p (V1HFmode) && TARGET_ZVFH") (V2HF "riscv_vector::vls_mode_valid_p (V2HFmode) && TARGET_ZVFH") @@ -686,11 +686,11 @@ (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH") - (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (V1HF "riscv_vector::vls_mode_valid_p (V1HFmode) && TARGET_ZVFH") (V2HF "riscv_vector::vls_mode_valid_p (V2HFmode) && TARGET_ZVFH") @@ -721,11 +721,11 @@ ]) (define_mode_iterator V_VLSI_QHS [ - RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (V1QI "riscv_vector::vls_mode_valid_p (V1QImode)") (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)") @@ -803,13 +803,13 @@ ;; E.g. when index mode = RVVM8QImode and Pmode = SImode, if it is not zero_extend or ;; scalar != 1, such gather/scatter is not allowed since we don't have RVVM32SImode. (define_mode_iterator RATIO64 [ - (RVVMF8QI "TARGET_MIN_VLEN > 32") - (RVVMF4HI "TARGET_MIN_VLEN > 32") - (RVVMF2SI "TARGET_MIN_VLEN > 32") + (RVVMF8QI "TARGET_VECTOR_ELEN_64") + (RVVMF4HI "TARGET_VECTOR_ELEN_64") + (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") - (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32") - (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64") + (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64") ]) @@ -867,9 +867,9 @@ ]) (define_mode_iterator RATIO64I [ - (RVVMF8QI "TARGET_MIN_VLEN > 32") - (RVVMF4HI "TARGET_MIN_VLEN > 32") - (RVVMF2SI "TARGET_MIN_VLEN > 32") + (RVVMF8QI "TARGET_VECTOR_ELEN_64") + (RVVMF4HI "TARGET_VECTOR_ELEN_64") + (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT") ]) @@ -929,23 +929,23 @@ ]) (define_mode_iterator V_FRACT [ - RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") - (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32") + (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64") - (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32") + (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64") - (RVVMF2SI "TARGET_MIN_VLEN > 32") + (RVVMF2SI "TARGET_VECTOR_ELEN_64") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") ]) (define_mode_iterator VWEXTI [ - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") @@ -991,7 +991,7 @@ (RVVM4SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64") (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64") @@ -1024,7 +1024,7 @@ (RVVM4SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF2SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64") (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64") @@ -1103,7 +1103,7 @@ ]) (define_mode_iterator VQEXTI [ - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64") (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64") @@ -1164,27 +1164,27 @@ ]) (define_mode_iterator V1T [ - (RVVMF8x2QI "TARGET_MIN_VLEN > 32") - (RVVMF8x3QI "TARGET_MIN_VLEN > 32") - (RVVMF8x4QI "TARGET_MIN_VLEN > 32") - (RVVMF8x5QI "TARGET_MIN_VLEN > 32") - (RVVMF8x6QI "TARGET_MIN_VLEN > 32") - (RVVMF8x7QI "TARGET_MIN_VLEN > 32") - (RVVMF8x8QI "TARGET_MIN_VLEN > 32") - (RVVMF4x2HI "TARGET_MIN_VLEN > 32") - (RVVMF4x3HI "TARGET_MIN_VLEN > 32") - (RVVMF4x4HI "TARGET_MIN_VLEN > 32") - (RVVMF4x5HI "TARGET_MIN_VLEN > 32") - (RVVMF4x6HI "TARGET_MIN_VLEN > 32") - (RVVMF4x7HI "TARGET_MIN_VLEN > 32") - (RVVMF4x8HI "TARGET_MIN_VLEN > 32") - (RVVMF2x2SI "TARGET_MIN_VLEN > 32") - (RVVMF2x3SI "TARGET_MIN_VLEN > 32") - (RVVMF2x4SI "TARGET_MIN_VLEN > 32") - (RVVMF2x5SI "TARGET_MIN_VLEN > 32") - (RVVMF2x6SI "TARGET_MIN_VLEN > 32") - (RVVMF2x7SI "TARGET_MIN_VLEN > 32") - (RVVMF2x8SI "TARGET_MIN_VLEN > 32") + (RVVMF8x2QI "TARGET_VECTOR_ELEN_64") + (RVVMF8x3QI "TARGET_VECTOR_ELEN_64") + (RVVMF8x4QI "TARGET_VECTOR_ELEN_64") + (RVVMF8x5QI "TARGET_VECTOR_ELEN_64") + (RVVMF8x6QI "TARGET_VECTOR_ELEN_64") + (RVVMF8x7QI "TARGET_VECTOR_ELEN_64") + (RVVMF8x8QI "TARGET_VECTOR_ELEN_64") + (RVVMF4x2HI "TARGET_VECTOR_ELEN_64") + (RVVMF4x3HI "TARGET_VECTOR_ELEN_64") + (RVVMF4x4HI "TARGET_VECTOR_ELEN_64") + (RVVMF4x5HI "TARGET_VECTOR_ELEN_64") + (RVVMF4x6HI "TARGET_VECTOR_ELEN_64") + (RVVMF4x7HI "TARGET_VECTOR_ELEN_64") + (RVVMF4x8HI "TARGET_VECTOR_ELEN_64") + (RVVMF2x2SI "TARGET_VECTOR_ELEN_64") + (RVVMF2x3SI "TARGET_VECTOR_ELEN_64") + (RVVMF2x4SI "TARGET_VECTOR_ELEN_64") + (RVVMF2x5SI "TARGET_VECTOR_ELEN_64") + (RVVMF2x6SI "TARGET_VECTOR_ELEN_64") + (RVVMF2x7SI "TARGET_VECTOR_ELEN_64") + (RVVMF2x8SI "TARGET_VECTOR_ELEN_64") (RVVM1x2DI "TARGET_VECTOR_ELEN_64") (RVVM1x3DI "TARGET_VECTOR_ELEN_64") (RVVM1x4DI "TARGET_VECTOR_ELEN_64") @@ -1192,27 +1192,27 @@ (RVVM1x6DI "TARGET_VECTOR_ELEN_64") (RVVM1x7DI "TARGET_VECTOR_ELEN_64") (RVVM1x8DI "TARGET_VECTOR_ELEN_64") - (RVVMF4x2BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16") - (RVVMF4x3BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16") - (RVVMF4x4BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16") - (RVVMF4x5BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16") - (RVVMF4x6BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16") - (RVVMF4x7BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16") - (RVVMF4x8BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16") - (RVVMF4x2HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16") - (RVVMF4x3HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16") - (RVVMF4x4HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16") - (RVVMF4x5HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16") - (RVVMF4x6HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16") - (RVVMF4x7HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16") - (RVVMF4x8HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16") - (RVVMF2x2SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2x3SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2x4SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2x5SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2x6SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2x7SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32") - (RVVMF2x8SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32") + (RVVMF4x2BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16") + (RVVMF4x3BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16") + (RVVMF4x4BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16") + (RVVMF4x5BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16") + (RVVMF4x6BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16") + (RVVMF4x7BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16") + (RVVMF4x8BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16") + (RVVMF4x2HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16") + (RVVMF4x3HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16") + (RVVMF4x4HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16") + (RVVMF4x5HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16") + (RVVMF4x6HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16") + (RVVMF4x7HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16") + (RVVMF4x8HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16") + (RVVMF2x2SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (RVVMF2x3SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (RVVMF2x4SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (RVVMF2x5SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (RVVMF2x6SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (RVVMF2x7SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") + (RVVMF2x8SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32") (RVVM1x2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1x3DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1x4DF "TARGET_VECTOR_ELEN_FP_64") @@ -1530,7 +1530,7 @@ (V4096BI "riscv_vector::vls_mode_valid_p (V4096BImode) && TARGET_MIN_VLEN >= 4096")]) (define_mode_iterator VB [ - (RVVMF64BI "TARGET_MIN_VLEN > 32") RVVMF32BI RVVMF16BI RVVMF8BI RVVMF4BI RVVMF2BI RVVM1BI + (RVVMF64BI "TARGET_VECTOR_ELEN_64") RVVMF32BI RVVMF16BI RVVMF8BI RVVMF4BI RVVMF2BI RVVM1BI ]) ;; Iterator for indexed loads and stores. We must disallow 64-bit indices on @@ -1539,11 +1539,11 @@ ;; VINDEXED [VI8 VI16 VI32 (VI64 "TARGET_64BIT")]. (define_mode_iterator VINDEXED [ - RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") - RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32") + RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64") - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") (RVVM8DI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT") (RVVM4DI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT") @@ -1555,15 +1555,15 @@ (RVVM2BF "TARGET_VECTOR_ELEN_BF_16") (RVVM1BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") - (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32") + (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64") (RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH") - (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT") @@ -3392,11 +3392,11 @@ (define_mode_iterator V_VLS_F_CONVERT_SI [ (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") - (RVVMF2HF "TARGET_ZVFH") (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32") + (RVVMF2HF "TARGET_ZVFH") (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64") (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64") @@ -3510,11 +3510,11 @@ (define_mode_iterator V_VLS_F_CONVERT_DI [ (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH") - (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32") + (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") - (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32") + (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64") (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64") (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64") @@ -4400,23 +4400,23 @@ (V4096BI "riscv_vector::vls_mode_valid_p (V4096BImode) && TARGET_MIN_VLEN >= 4096")]) (define_mode_iterator VSI [ - RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") ]) (define_mode_iterator VLMULX2_SI [ - RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") ]) (define_mode_iterator VLMULX4_SI [ - RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") ]) (define_mode_iterator VLMULX8_SI [ - RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32") + RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64") ]) (define_mode_iterator VLMULX16_SI [ - (RVVMF2SI "TARGET_MIN_VLEN > 32") + (RVVMF2SI "TARGET_VECTOR_ELEN_64") ]) (define_mode_attr VSIX2 [ @@ -4854,7 +4854,7 @@ ]) (define_mode_iterator SF_XF [ - RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32") + RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64") ]) diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 8ee43cf..51eb64f 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -3939,7 +3939,7 @@ (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VWEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf2\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) @@ -3959,7 +3959,7 @@ (any_extend:VQEXTI (match_operand:<V_QUAD_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VQEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf4\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) @@ -3979,7 +3979,7 @@ (any_extend:VOEXTI (match_operand:<V_OCT_TRUNC> 3 "register_operand" " vr, vr")) (match_operand:VOEXTI 2 "vector_merge_operand" " vu, 0")))] - "TARGET_VECTOR" + "TARGET_VECTOR && !TARGET_XTHEADVECTOR" "v<sz>ext.vf8\t%0,%3%p1" [(set_attr "type" "vext") (set_attr "mode" "<MODE>")]) diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc index 52f44b1..5377ad6c 100644 --- a/gcc/config/rs6000/rs6000-logue.cc +++ b/gcc/config/rs6000/rs6000-logue.cc @@ -5351,6 +5351,8 @@ rs6000_output_function_epilogue (FILE *file) i = 1; else if (! strcmp (language_string, "GNU Ada")) i = 3; + else if (! strcmp (language_string, "GCC COBOL")) + i = 7; else if (! strcmp (language_string, "GNU Modula-2")) i = 8; else if (lang_GNU_CXX () |