aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/riscv/riscv-v.cc590
1 files changed, 299 insertions, 291 deletions
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 96f0b94..66c8b29 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1340,334 +1340,342 @@ expand_const_vector_duplicate (rtx target, rvv_builder *builder)
}
static void
-expand_const_vector (rtx target, rtx src)
+expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder)
{
machine_mode mode = GET_MODE (target);
rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
- rtx elt;
- if (const_vec_duplicate_p (src, &elt))
- return expand_const_vec_duplicate (target, src, elt);
+ /* Describe the case by choosing NPATTERNS = 4 as an example. */
+ insn_code icode;
- /* Support scalable const series vector. */
- rtx base, step;
- if (const_vec_series_p (src, &base, &step))
- return expand_const_vec_series (target, base, step);
+ /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
+ rtx vid = gen_reg_rtx (builder->mode ());
+ rtx vid_ops[] = {vid};
+ icode = code_for_pred_series (builder->mode ());
+ emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
- /* Handle variable-length vector. */
- unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
- unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
- rvv_builder builder (mode, npatterns, nelts_per_pattern);
- for (unsigned int i = 0; i < nelts_per_pattern; i++)
+ if (builder->npatterns_all_equal_p ())
{
- for (unsigned int j = 0; j < npatterns; j++)
- builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
- }
- builder.finalize ();
+ /* Generate the variable-length vector following this rule:
+ { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
+ E.g. { 0, 0, 8, 8, 16, 16, ... } */
+
+ /* We want to create a pattern where value[idx] = floor (idx /
+ NPATTERNS). As NPATTERNS is always a power of two we can
+ rewrite this as = idx & -NPATTERNS. */
+ /* Step 2: VID AND -NPATTERNS:
+ { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */
+ rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ());
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx and_ops[] = {tmp1, vid, imm};
+ icode = code_for_pred_scalar (AND, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, and_ops);
+
+ /* Step 3: Convert to step size 1. */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ /* log2 (npatterns) to get the shift amount to convert
+ Eg. { 0, 0, 0, 0, 4, 4, ... }
+ into { 0, 0, 0, 0, 1, 1, ... }. */
+ HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ());
+ rtx shift = gen_int_mode (shift_amt, builder->inner_mode ());
+ rtx shift_ops[] = {tmp2, tmp1, shift};
+ icode = code_for_pred_scalar (ASHIFTRT, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, shift_ops);
+
+ /* Step 4: Multiply to step size n. */
+ HOST_WIDE_INT step_size =
+ INTVAL (builder->elt (builder->npatterns ()))
+ - INTVAL (builder->elt (0));
+ rtx tmp3 = gen_reg_rtx (builder->mode ());
+ if (pow2p_hwi (step_size))
+ {
+ /* Power of 2 can be handled with a left shift. */
+ HOST_WIDE_INT shift = exact_log2 (step_size);
+ rtx shift_amount = gen_int_mode (shift, Pmode);
+ insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+ rtx ops[] = {tmp3, tmp2, shift_amount};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
+ else
+ {
+ rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ());
+ insn_code icode = code_for_pred_scalar (MULT, builder->mode ());
+ rtx ops[] = {tmp3, tmp2, mult_amt};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
- if (CONST_VECTOR_DUPLICATE_P (src))
- return expand_const_vector_duplicate (target, &builder);
- else if (CONST_VECTOR_STEPPED_P (src))
+ /* Step 5: Add starting value to all elements. */
+ HOST_WIDE_INT init_val = INTVAL (builder->elt (0));
+ if (init_val == 0)
+ emit_move_insn (result, tmp3);
+ else
+ {
+ rtx dup = gen_const_vector_dup (builder->mode (), init_val);
+ rtx add_ops[] = {result, tmp3, dup};
+ icode = code_for_pred (PLUS, builder->mode ());
+ emit_vlmax_insn (icode, BINARY_OP, add_ops);
+ }
+ }
+ else
{
- gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
- if (builder.single_step_npatterns_p ())
+ /* Generate the variable-length vector following this rule:
+ { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
+ if (builder->npatterns_vid_diff_repeated_p ())
{
- /* Describe the case by choosing NPATTERNS = 4 as an example. */
- insn_code icode;
-
- /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }. */
- rtx vid = gen_reg_rtx (builder.mode ());
- rtx vid_ops[] = {vid};
- icode = code_for_pred_series (builder.mode ());
- emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
-
- if (builder.npatterns_all_equal_p ())
+ /* Case 1: For example as below:
+ {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+ We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+ repeated as below after minus vid.
+ {3, 1, -1, -3, 3, 1, -1, -3...}
+ Then we can simplify the diff code gen to at most
+ npatterns(). */
+ rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+ /* Step 1: Generate diff = TARGET - VID. */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
{
- /* Generate the variable-length vector following this rule:
- { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
- E.g. { 0, 0, 8, 8, 16, 16, ... } */
-
- /* We want to create a pattern where value[idx] = floor (idx /
- NPATTERNS). As NPATTERNS is always a power of two we can
- rewrite this as = idx & -NPATTERNS. */
- /* Step 2: VID AND -NPATTERNS:
- { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
- */
- rtx imm
- = gen_int_mode (-builder.npatterns (), builder.inner_mode ());
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx and_ops[] = {tmp1, vid, imm};
- icode = code_for_pred_scalar (AND, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, and_ops);
-
- /* Step 3: Convert to step size 1. */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- /* log2 (npatterns) to get the shift amount to convert
- Eg. { 0, 0, 0, 0, 4, 4, ... }
- into { 0, 0, 0, 0, 1, 1, ... }. */
- HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ;
- rtx shift = gen_int_mode (shift_amt, builder.inner_mode ());
- rtx shift_ops[] = {tmp2, tmp1, shift};
- icode = code_for_pred_scalar (ASHIFTRT, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, shift_ops);
-
- /* Step 4: Multiply to step size n. */
- HOST_WIDE_INT step_size =
- INTVAL (builder.elt (builder.npatterns ()))
- - INTVAL (builder.elt (0));
- rtx tmp3 = gen_reg_rtx (builder.mode ());
- if (pow2p_hwi (step_size))
- {
- /* Power of 2 can be handled with a left shift. */
- HOST_WIDE_INT shift = exact_log2 (step_size);
- rtx shift_amount = gen_int_mode (shift, Pmode);
- insn_code icode = code_for_pred_scalar (ASHIFT, mode);
- rtx ops[] = {tmp3, tmp2, shift_amount};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- }
- else
- {
- rtx mult_amt = gen_int_mode (step_size, builder.inner_mode ());
- insn_code icode = code_for_pred_scalar (MULT, builder.mode ());
- rtx ops[] = {tmp3, tmp2, mult_amt};
- emit_vlmax_insn (icode, BINARY_OP, ops);
- }
-
- /* Step 5: Add starting value to all elements. */
- HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
- if (init_val == 0)
- emit_move_insn (result, tmp3);
- else
- {
- rtx dup = gen_const_vector_dup (builder.mode (), init_val);
- rtx add_ops[] = {result, tmp3, dup};
- icode = code_for_pred (PLUS, builder.mode ());
- emit_vlmax_insn (icode, BINARY_OP, add_ops);
- }
+ poly_int64 diff = rtx_to_poly_int64 (builder->elt (i)) - i;
+ v.quick_push (gen_int_mode (diff, v.inner_mode ()));
}
- else
- {
- /* Generate the variable-length vector following this rule:
- { a, b, a + step, b + step, a + step*2, b + step*2, ... } */
- if (builder.npatterns_vid_diff_repeated_p ())
- {
- /* Case 1: For example as below:
- {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
- We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
- repeated as below after minus vid.
- {3, 1, -1, -3, 3, 1, -1, -3...}
- Then we can simplify the diff code gen to at most
- npatterns(). */
- rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
- /* Step 1: Generate diff = TARGET - VID. */
- for (unsigned int i = 0; i < v.npatterns (); ++i)
- {
- poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
- v.quick_push (gen_int_mode (diff, v.inner_mode ()));
- }
-
- /* Step 2: Generate result = VID + diff. */
- rtx vec = v.build ();
- rtx add_ops[] = {result, vid, vec};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
- }
- else
- {
- /* Case 2: For example as below:
- { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
- */
- rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
- /* Step 1: Generate { a, b, a, b, ... } */
- for (unsigned int i = 0; i < v.npatterns (); ++i)
- v.quick_push (builder.elt (i));
- rtx new_base = v.build ();
-
- /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
- rtx shift_count
- = gen_int_mode (exact_log2 (builder.npatterns ()),
- builder.inner_mode ());
- rtx tmp1 = gen_reg_rtx (builder.mode ());
- rtx shift_ops[] = {tmp1, vid, shift_count};
- emit_vlmax_insn (code_for_pred_scalar
- (LSHIFTRT, builder.mode ()), BINARY_OP,
- shift_ops);
-
- /* Step 3: Generate tmp2 = tmp1 * step.  */
- rtx tmp2 = gen_reg_rtx (builder.mode ());
- rtx step
- = simplify_binary_operation (MINUS, builder.inner_mode (),
- builder.elt (v.npatterns()),
- builder.elt (0));
- expand_vec_series (tmp2, const0_rtx, step, tmp1);
-
- /* Step 4: Generate result = tmp2 + new_base.  */
- rtx add_ops[] = {result, tmp2, new_base};
- emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
- BINARY_OP, add_ops);
- }
- }
+ /* Step 2: Generate result = VID + diff. */
+ rtx vec = v.build ();
+ rtx add_ops[] = {result, vid, vec};
+ emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+ add_ops);
}
- else if (builder.interleaved_stepped_npatterns_p ())
+ else
{
- rtx base1 = builder.elt (0);
- rtx base2 = builder.elt (1);
- poly_int64 step1
- = rtx_to_poly_int64 (builder.elt (builder.npatterns ()))
- - rtx_to_poly_int64 (base1);
- poly_int64 step2
- = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1))
- - rtx_to_poly_int64 (base2);
+ /* Case 2: For example as below:
+ { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+ */
+ rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+ /* Step 1: Generate { a, b, a, b, ... } */
+ for (unsigned int i = 0; i < v.npatterns (); ++i)
+ v.quick_push (builder->elt (i));
+ rtx new_base = v.build ();
+
+ /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
+ rtx shift_count = gen_int_mode (exact_log2 (builder->npatterns ()),
+ builder->inner_mode ());
+ rtx tmp1 = gen_reg_rtx (builder->mode ());
+ rtx shift_ops[] = {tmp1, vid, shift_count};
+ emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder->mode ()),
+ BINARY_OP, shift_ops);
+
+ /* Step 3: Generate tmp2 = tmp1 * step.  */
+ rtx tmp2 = gen_reg_rtx (builder->mode ());
+ rtx step
+ = simplify_binary_operation (MINUS, builder->inner_mode (),
+ builder->elt (v.npatterns()),
+ builder->elt (0));
+ expand_vec_series (tmp2, const0_rtx, step, tmp1);
+
+ /* Step 4: Generate result = tmp2 + new_base.  */
+ rtx add_ops[] = {result, tmp2, new_base};
+ emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+ add_ops);
+ }
+ }
+
+ if (result != target)
+ emit_move_insn (target, result);
+}
- /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
- integer vector mode to generate such vector efficiently.
+static void
+expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src,
+ rvv_builder *builder)
+{
+ machine_mode mode = GET_MODE (target);
+ rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+ rtx base1 = builder->elt (0);
+ rtx base2 = builder->elt (1);
- E.g. EEW = 16, { 2, 0, 4, 0, ... }
+ poly_int64 step1 = rtx_to_poly_int64 (builder->elt (builder->npatterns ()))
+ - rtx_to_poly_int64 (base1);
+ poly_int64 step2 =
+ rtx_to_poly_int64 (builder->elt (builder->npatterns () + 1))
+ - rtx_to_poly_int64 (base2);
- can be interpreted into:
+ /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
+ integer vector mode to generate such vector efficiently.
- EEW = 32, { 2, 4, ... }.
+ E.g. EEW = 16, { 2, 0, 4, 0, ... }
- Both the series1 and series2 may overflow before taking the IOR
- to generate the final result. However, only series1 matters
- because the series2 will shift before IOR, thus the overflow
- bits will never pollute the final result.
+ can be interpreted into:
- For now we forbid the negative steps and overflow, and they
- will fall back to the default merge way to generate the
- const_vector. */
+ EEW = 32, { 2, 4, ... }.
- unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
- scalar_int_mode new_smode;
- machine_mode new_mode;
- poly_uint64 new_nunits
- = exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
+ Both the series1 and series2 may overflow before taking the IOR
+ to generate the final result. However, only series1 matters
+ because the series2 will shift before IOR, thus the overflow
+ bits will never pollute the final result.
- poly_int64 base1_poly = rtx_to_poly_int64 (base1);
- bool overflow_smode_p = false;
+ For now we forbid the negative steps and overflow, and they
+ will fall back to the default merge way to generate the
+ const_vector. */
- if (!step1.is_constant ())
- overflow_smode_p = true;
- else
- {
- int elem_count = XVECLEN (src, 0);
- uint64_t step1_val = step1.to_constant ();
- uint64_t base1_val = base1_poly.to_constant ();
- uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+ unsigned int new_smode_bitsize = builder->inner_bits_size () * 2;
+ scalar_int_mode new_smode;
+ machine_mode new_mode;
+ poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder->mode ()), 2);
- if ((elem_val >> builder.inner_bits_size ()) != 0)
- overflow_smode_p = true;
- }
+ poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+ bool overflow_smode_p = false;
- if (known_ge (step1, 0) && known_ge (step2, 0)
- && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
- && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
- && !overflow_smode_p)
+ if (!step1.is_constant ())
+ overflow_smode_p = true;
+ else
+ {
+ int elem_count = XVECLEN (src, 0);
+ uint64_t step1_val = step1.to_constant ();
+ uint64_t base1_val = base1_poly.to_constant ();
+ uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+ if ((elem_val >> builder->inner_bits_size ()) != 0)
+ overflow_smode_p = true;
+ }
+
+ if (known_ge (step1, 0) && known_ge (step2, 0)
+ && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
+ && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+ && !overflow_smode_p)
+ {
+ rtx tmp1 = gen_reg_rtx (new_mode);
+ base1 = gen_int_mode (base1_poly, new_smode);
+ expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
+
+ if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
+ /* { 1, 0, 2, 0, ... }. */
+ emit_move_insn (result, gen_lowpart (mode, tmp1));
+ else if (known_eq (step2, 0))
+ {
+ /* { 1, 1, 2, 1, ... }. */
+ rtx scalar = expand_simple_binop (
+ Xmode, ASHIFT, gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
+ gen_int_mode (builder->inner_bits_size (), Xmode), NULL_RTX, false,
+ OPTAB_DIRECT);
+ scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
+ rtx tmp2 = gen_reg_rtx (new_mode);
+ rtx ior_ops[] = {tmp2, tmp1, scalar};
+ emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), BINARY_OP,
+ ior_ops);
+ emit_move_insn (result, gen_lowpart (mode, tmp2));
+ }
+ else
+ {
+ /* { 1, 3, 2, 6, ... }. */
+ rtx tmp2 = gen_reg_rtx (new_mode);
+ base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
+ expand_vec_series (tmp2, base2, gen_int_mode (step2, new_smode));
+ rtx shifted_tmp2;
+ rtx shift = gen_int_mode (builder->inner_bits_size (), Xmode);
+ if (lra_in_progress)
{
- rtx tmp1 = gen_reg_rtx (new_mode);
- base1 = gen_int_mode (base1_poly, new_smode);
- expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
-
- if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
- /* { 1, 0, 2, 0, ... }. */
- emit_move_insn (result, gen_lowpart (mode, tmp1));
- else if (known_eq (step2, 0))
- {
- /* { 1, 1, 2, 1, ... }. */
- rtx scalar = expand_simple_binop (
- Xmode, ASHIFT,
- gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
- gen_int_mode (builder.inner_bits_size (), Xmode),
- NULL_RTX, false, OPTAB_DIRECT);
- scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
- rtx tmp2 = gen_reg_rtx (new_mode);
- rtx ior_ops[] = {tmp2, tmp1, scalar};
- emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode),
- BINARY_OP, ior_ops);
- emit_move_insn (result, gen_lowpart (mode, tmp2));
- }
- else
- {
- /* { 1, 3, 2, 6, ... }. */
- rtx tmp2 = gen_reg_rtx (new_mode);
- base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
- expand_vec_series (tmp2, base2,
- gen_int_mode (step2, new_smode));
- rtx shifted_tmp2;
- rtx shift = gen_int_mode (builder.inner_bits_size (), Xmode);
- if (lra_in_progress)
- {
- shifted_tmp2 = gen_reg_rtx (new_mode);
- rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
- emit_vlmax_insn (code_for_pred_scalar
- (ASHIFT, new_mode), BINARY_OP,
- shift_ops);
- }
- else
- shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2,
- shift, NULL_RTX, false,
- OPTAB_DIRECT);
- rtx tmp3 = gen_reg_rtx (new_mode);
- rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
- emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP,
- ior_ops);
- emit_move_insn (result, gen_lowpart (mode, tmp3));
- }
+ shifted_tmp2 = gen_reg_rtx (new_mode);
+ rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
+ emit_vlmax_insn (code_for_pred_scalar (ASHIFT, new_mode),
+ BINARY_OP, shift_ops);
}
else
- {
- rtx vid = gen_reg_rtx (mode);
- expand_vec_series (vid, const0_rtx, const1_rtx);
- /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */
- rtx shifted_vid;
- if (lra_in_progress)
- {
- shifted_vid = gen_reg_rtx (mode);
- rtx shift = gen_int_mode (1, Xmode);
- rtx shift_ops[] = {shifted_vid, vid, shift};
- emit_vlmax_insn (code_for_pred_scalar
- (ASHIFT, mode), BINARY_OP,
- shift_ops);
- }
- else
- shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid,
- const1_rtx, NULL_RTX,
- false, OPTAB_DIRECT);
- rtx tmp1 = gen_reg_rtx (mode);
- rtx tmp2 = gen_reg_rtx (mode);
- expand_vec_series (tmp1, base1,
- gen_int_mode (step1, builder.inner_mode ()),
- shifted_vid);
- expand_vec_series (tmp2, base2,
- gen_int_mode (step2, builder.inner_mode ()),
- shifted_vid);
-
- /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */
- rtx and_vid = gen_reg_rtx (mode);
- rtx and_ops[] = {and_vid, vid, const1_rtx};
- emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP,
- and_ops);
- rtx mask = gen_reg_rtx (builder.mask_mode ());
- expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
-
- rtx ops[] = {result, tmp1, tmp2, mask};
- emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
- }
+ shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, shift,
+ NULL_RTX, false, OPTAB_DIRECT);
+ rtx tmp3 = gen_reg_rtx (new_mode);
+ rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
+ emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, ior_ops);
+ emit_move_insn (result, gen_lowpart (mode, tmp3));
}
- else
- /* TODO: We will enable more variable-length vector in the future. */
- gcc_unreachable ();
}
else
- gcc_unreachable ();
+ {
+ rtx vid = gen_reg_rtx (mode);
+ expand_vec_series (vid, const0_rtx, const1_rtx);
+ /* Transform into { 0, 0, 1, 1, 2, 2, ... }. */
+ rtx shifted_vid;
+ if (lra_in_progress)
+ {
+ shifted_vid = gen_reg_rtx (mode);
+ rtx shift = gen_int_mode (1, Xmode);
+ rtx shift_ops[] = {shifted_vid, vid, shift};
+ emit_vlmax_insn (code_for_pred_scalar (ASHIFT, mode), BINARY_OP,
+ shift_ops);
+ }
+ else
+ shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
+ NULL_RTX, false, OPTAB_DIRECT);
+ rtx tmp1 = gen_reg_rtx (mode);
+ rtx tmp2 = gen_reg_rtx (mode);
+ expand_vec_series (tmp1, base1,
+ gen_int_mode (step1, builder->inner_mode ()),
+ shifted_vid);
+ expand_vec_series (tmp2, base2,
+ gen_int_mode (step2, builder->inner_mode ()),
+ shifted_vid);
+
+ /* Transform into { 0, 1, 0, 1, 0, 1, ... }. */
+ rtx and_vid = gen_reg_rtx (mode);
+ rtx and_ops[] = {and_vid, vid, const1_rtx};
+ emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+ rtx mask = gen_reg_rtx (builder->mask_mode ());
+ expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
+
+ rtx ops[] = {result, tmp1, tmp2, mask};
+ emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
+ }
if (result != target)
emit_move_insn (target, result);
}
+static void
+expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder)
+{
+ gcc_assert (GET_MODE_CLASS (GET_MODE (target)) == MODE_VECTOR_INT);
+
+ if (builder->single_step_npatterns_p ())
+ return expand_const_vector_single_step_npatterns (target, builder);
+ else if (builder->interleaved_stepped_npatterns_p ())
+ return expand_const_vector_interleaved_stepped_npatterns (target, src,
+ builder);
+
+ /* TODO: We will enable more variable-length vector in the future. */
+ gcc_unreachable ();
+}
+
+static void
+expand_const_vector (rtx target, rtx src)
+{
+ rtx elt;
+ if (const_vec_duplicate_p (src, &elt))
+ return expand_const_vec_duplicate (target, src, elt);
+
+ /* Support scalable const series vector. */
+ rtx base, step;
+ if (const_vec_series_p (src, &base, &step))
+ return expand_const_vec_series(target, base, step);
+
+ /* Handle variable-length vector. */
+ unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
+ unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
+ rvv_builder builder (GET_MODE (target), npatterns, nelts_per_pattern);
+
+ for (unsigned int i = 0; i < nelts_per_pattern; i++)
+ {
+ for (unsigned int j = 0; j < npatterns; j++)
+ builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+ }
+
+ builder.finalize ();
+
+ if (CONST_VECTOR_DUPLICATE_P (src))
+ return expand_const_vector_duplicate (target, &builder);
+ else if (CONST_VECTOR_STEPPED_P (src))
+ return expand_const_vector_stepped (target, src, &builder);
+
+ gcc_unreachable ();
+}
+
/* Get the frm mode with given CONST_INT rtx, the default mode is
FRM_DYN. */
enum floating_point_rounding_mode