From 06039e71f02e478280ed63a63cbf5e76f3897513 Mon Sep 17 00:00:00 2001
From: Richard Ball <richard.ball@arm.com>
Date: Mon, 18 Jul 2022 11:30:04 +0100
Subject: Replace manual swapping idiom with std::swap in aarch64.cc

gcc/config/aarch64/aarch64.cc has a few manual swapping idioms of the form:

x = in0, in0 = in1, in1 = x;

The preferred way is using the standard:

std::swap (in0, in1);

We should just fix these to use std::swap.
This will also allow us to eliminate the x temporary rtx.

gcc/ChangeLog:

	* config/aarch64/aarch64.cc (aarch64_evpc_trn): Use std:swap.
	(aarch64_evpc_uzp): Likewise.
	(aarch64_evpc_zip): Likewise.
---
 gcc/config/aarch64/aarch64.cc | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'gcc')

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 1a514c1..4b486ae 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23498,7 +23498,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
 {
   HOST_WIDE_INT odd;
   poly_uint64 nelt = d->perm.length ();
-  rtx out, in0, in1, x;
+  rtx out, in0, in1;
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -23522,7 +23522,7 @@ aarch64_evpc_trn (struct expand_vec_perm_d *d)
      at the head of aarch64-sve.md for details.  */
   if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
     {
-      x = in0, in0 = in1, in1 = x;
+      std::swap (in0, in1);
       odd = !odd;
     }
   out = d->target;
@@ -23592,7 +23592,7 @@ static bool
 aarch64_evpc_uzp (struct expand_vec_perm_d *d)
 {
   HOST_WIDE_INT odd;
-  rtx out, in0, in1, x;
+  rtx out, in0, in1;
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -23615,7 +23615,7 @@ aarch64_evpc_uzp (struct expand_vec_perm_d *d)
      at the head of aarch64-sve.md for details.  */
   if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
     {
-      x = in0, in0 = in1, in1 = x;
+      std::swap (in0, in1);
       odd = !odd;
     }
   out = d->target;
@@ -23631,7 +23631,7 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
 {
   unsigned int high;
   poly_uint64 nelt = d->perm.length ();
-  rtx out, in0, in1, x;
+  rtx out, in0, in1;
   machine_mode vmode = d->vmode;
 
   if (GET_MODE_UNIT_SIZE (vmode) > 8)
@@ -23656,7 +23656,7 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
      at the head of aarch64-sve.md for details.  */
   if (BYTES_BIG_ENDIAN && d->vec_flags == VEC_ADVSIMD)
     {
-      x = in0, in0 = in1, in1 = x;
+      std::swap (in0, in1);
       high = !high;
     }
   out = d->target;
-- 
cgit v1.1


From ce92603fbe3b4870e0a38efee1ee766d62942065 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Mon, 18 Jul 2022 12:06:00 +0200
Subject: Improve common reduction vs builtin code generation in loop
 distribution

loop distribution currently cannot handle the situation when the
last partition is a builtin but there's a common reduction in all
partitions (like the final IV value).  The following lifts this
restriction by making the last non-builtin partition provide the
definitions for the loop-closed PHI nodes.  Since we have heuristics
in place to avoid code generating builtins last writing a testcase
is difficult (but I ran into a case with other pending patches that
made the heuristic ineffective).  What's remaining is the inability
to preserve common reductions when all partitions could be builtins
(in some cases final value replacement could come to the rescue here).

	* tree-loop-distribution.cc (copy_loop_before): Add
	the ability to replace the original LC PHI defs.
	(generate_loops_for_partition): Pass through a flag
	whether to redirect original LC PHI defs.
	(generate_code_for_partition): Likewise.
	(loop_distribution::distribute_loop): Compute the partition
	that should provide the LC PHI defs for common reductions
	and pass that down.
---
 gcc/tree-loop-distribution.cc | 64 ++++++++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 19 deletions(-)

(limited to 'gcc')

diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index ed7f432..0714bc4 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -942,7 +942,7 @@ stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt)
 /* Return a copy of LOOP placed before LOOP.  */
 
 static class loop *
-copy_loop_before (class loop *loop)
+copy_loop_before (class loop *loop, bool redirect_lc_phi_defs)
 {
   class loop *res;
   edge preheader = loop_preheader_edge (loop);
@@ -950,6 +950,24 @@ copy_loop_before (class loop *loop)
   initialize_original_copy_tables ();
   res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
   gcc_assert (res != NULL);
+
+  /* When a not last partition is supposed to keep the LC PHIs computed
+     adjust their definitions.  */
+  if (redirect_lc_phi_defs)
+    {
+      edge exit = single_exit (loop);
+      for (gphi_iterator si = gsi_start_phis (exit->dest); !gsi_end_p (si);
+	   gsi_next (&si))
+	{
+	  gphi *phi = si.phi ();
+	  if (virtual_operand_p (gimple_phi_result (phi)))
+	    continue;
+	  use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
+	  tree new_def = get_current_def (USE_FROM_PTR (use_p));
+	  SET_USE (use_p, new_def);
+	}
+    }
+
   free_original_copy_tables ();
   delete_update_ssa ();
 
@@ -977,7 +995,7 @@ create_bb_after_loop (class loop *loop)
 
 static void
 generate_loops_for_partition (class loop *loop, partition *partition,
-			      bool copy_p)
+			      bool copy_p, bool keep_lc_phis_p)
 {
   unsigned i;
   basic_block *bbs;
@@ -985,7 +1003,7 @@ generate_loops_for_partition (class loop *loop, partition *partition,
   if (copy_p)
     {
       int orig_loop_num = loop->orig_loop_num;
-      loop = copy_loop_before (loop);
+      loop = copy_loop_before (loop, keep_lc_phis_p);
       gcc_assert (loop != NULL);
       loop->orig_loop_num = orig_loop_num;
       create_preheader (loop, CP_SIMPLE_PREHEADERS);
@@ -1336,7 +1354,8 @@ destroy_loop (class loop *loop)
 
 static bool 
 generate_code_for_partition (class loop *loop,
-			     partition *partition, bool copy_p)
+			     partition *partition, bool copy_p,
+			     bool keep_lc_phis_p)
 {
   switch (partition->kind)
     {
@@ -1345,7 +1364,8 @@ generate_code_for_partition (class loop *loop,
       /* Reductions all have to be in the last partition.  */
       gcc_assert (!partition_reduction_p (partition)
 		  || !copy_p);
-      generate_loops_for_partition (loop, partition, copy_p);
+      generate_loops_for_partition (loop, partition, copy_p,
+				    keep_lc_phis_p);
       return false;
 
     case PKIND_MEMSET:
@@ -3013,6 +3033,7 @@ loop_distribution::distribute_loop (class loop *loop,
 
   bool any_builtin = false;
   bool reduction_in_all = false;
+  int reduction_partition_num = -1;
   FOR_EACH_VEC_ELT (partitions, i, partition)
     {
       reduction_in_all
@@ -3092,10 +3113,13 @@ loop_distribution::distribute_loop (class loop *loop,
     }
 
   /* Put a non-builtin partition last if we need to preserve a reduction.
-     ???  This is a workaround that makes sort_partitions_by_post_order do
-     the correct thing while in reality it should sort each component
-     separately and then put the component with a reduction or a non-builtin
-     last.  */
+     In most cases this helps to keep a normal partition last avoiding to
+     spill a reduction result across builtin calls.
+     ???  The proper way would be to use dependences to see whether we
+     can move builtin partitions earlier during merge_dep_scc_partitions
+     and its sort_partitions_by_post_order.  Especially when the
+     dependence graph is composed of multiple independent subgraphs the
+     heuristic does not work reliably.  */
   if (reduction_in_all
       && partition_builtin_p (partitions.last()))
     FOR_EACH_VEC_ELT (partitions, i, partition)
@@ -3126,19 +3150,20 @@ loop_distribution::distribute_loop (class loop *loop,
 
   finalize_partitions (loop, &partitions, &alias_ddrs);
 
-  /* If there is a reduction in all partitions make sure the last one
-     is not classified for builtin code generation.  */
+  /* If there is a reduction in all partitions make sure the last
+     non-builtin partition provides the LC PHI defs.  */
   if (reduction_in_all)
     {
-      partition = partitions.last ();
-      if (only_patterns_p
-	  && partition_builtin_p (partition)
-	  && !partition_builtin_p (partitions[0]))
+      FOR_EACH_VEC_ELT (partitions, i, partition)
+	if (!partition_builtin_p (partition))
+	  reduction_partition_num = i;
+      if (reduction_partition_num == -1)
 	{
-	  nbp = 0;
-	  goto ldist_done;
+	  /* If all partitions are builtin, force the last one to
+	     be code generated as normal partition.  */
+	  partition = partitions.last ();
+	  partition->kind = PKIND_NORMAL;
 	}
-      partition->kind = PKIND_NORMAL;
     }
 
   nbp = partitions.length ();
@@ -3164,7 +3189,8 @@ loop_distribution::distribute_loop (class loop *loop,
     {
       if (partition_builtin_p (partition))
 	(*nb_calls)++;
-      *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1);
+      *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1,
+						 i == reduction_partition_num);
     }
 
  ldist_done:
-- 
cgit v1.1


From 9c8349ee1a35dac61b84bbae115ee6a1eeb6ddbd Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@gmail.com>
Date: Mon, 18 Jul 2022 14:32:41 +0300
Subject: arc: Fix interrupt's epilogue.

The stack pointer adjustment in interrupt epilogue is happening after
restoring the ZOL registers which is wrong. Fixing this.

gcc/
	* config/arc/arc.cc (arc_expand_epilogue): Adjust the frame
	pointer first when in interrupts.

gcc/testsuite/
	* gcc.target/arc/interrupt-13.c: New file.

Signed-off-by: Claudiu Zissulescu <claziss@gmail.com>
---
 gcc/config/arc/arc.cc                       |  2 +-
 gcc/testsuite/gcc.target/arc/interrupt-13.c | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/arc/interrupt-13.c

(limited to 'gcc')

diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index fbc17e6..77730c8 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -3965,7 +3965,7 @@ arc_expand_epilogue (int sibcall_p)
   if (size)
     emit_insn (gen_blockage ());
 
-  if (ARC_INTERRUPT_P (fn_type) && restore_fp)
+  if (ARC_INTERRUPT_P (fn_type))
     {
       /* We need to restore FP before any SP operation in an
 	 interrupt.  */
diff --git a/gcc/testsuite/gcc.target/arc/interrupt-13.c b/gcc/testsuite/gcc.target/arc/interrupt-13.c
new file mode 100644
index 0000000..0ed8451
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/interrupt-13.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2" } */
+
+extern int foo (int *);
+
+void __attribute__((interrupt("ilink")))
+irq (void)
+{
+  struct {
+    int x0;
+    int x1;
+  } a = {1 ,2};
+  foo ((int *)&a);
+}
+
+/* { dg-final { scan-assembler "add_s\\s+sp,sp,8.*pop_s\\s+r0" } } */
-- 
cgit v1.1


From 7313381d2ce44b72b4c9f70bd5670e5d78d1f631 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Mon, 18 Jul 2022 12:57:10 +0100
Subject: arm: Replace arm_builtin_vectorized_function [PR106253]
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch extends the fix for PR106253 to AArch32.  As with AArch64,
we were using ACLE intrinsics to vectorise scalar built-ins, even
though the two sometimes have different ECF_* flags.  (That in turn
is because the ACLE intrinsics should follow the instruction semantics
as closely as possible, whereas the scalar built-ins follow language
specs.)

The patch also removes the copysignf built-in, which only existed
for this purpose and wasn't a “real” arm_neon.h built-in.

Doing this also has the side-effect of enabling vectorisation of
rint and roundeven.  Logically that should be a separate patch,
but making it one would have meant adding a new int iterator
for the original set of instructions and then removing it again
when including new functions.

I've restricted the bswap tests to little-endian because we end
up with excessive spilling on big-endian.  E.g.:

        sub     sp, sp, #8
        vstr    d1, [sp]
        vldr    d16, [sp]
        vrev16.8        d16, d16
        vstr    d16, [sp]
        vldr    d0, [sp]
        add     sp, sp, #8
        @ sp needed
        bx      lr

Similarly, the copysign tests require little-endian because on
big-endian we unnecessarily load the constant from the constant pool:

        vldr.32 s15, .L3
        vdup.32 d0, d7[1]
        vbsl    d0, d2, d1
        bx      lr
.L3:
        .word   -2147483648

gcc/
	PR target/106253
	* config/arm/arm-builtins.cc (arm_builtin_vectorized_function):
	Delete.
	* config/arm/arm-protos.h (arm_builtin_vectorized_function): Delete.
	* config/arm/arm.cc (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION):
	Delete.
	* config/arm/arm_neon_builtins.def (copysignf): Delete.
	* config/arm/iterators.md (nvrint_pattern): New attribute.
	* config/arm/neon.md (<NEON_VRINT:nvrint_pattern><VCVTF:mode>2):
	New pattern.
	(l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2):
	Likewise.
	(neon_copysignf<mode>): Rename to...
	(copysign<mode>3): ...this.

gcc/testsuite/
	PR target/106253
	* gcc.target/arm/vect_unary_1.c: New test.
	* gcc.target/arm/vect_binary_1.c: Likewise.
---
 gcc/config/arm/arm-builtins.cc               | 123 ---------------
 gcc/config/arm/arm-protos.h                  |   1 -
 gcc/config/arm/arm.cc                        |   4 -
 gcc/config/arm/arm_neon_builtins.def         |   1 -
 gcc/config/arm/iterators.md                  |   7 +
 gcc/config/arm/neon.md                       |  17 +-
 gcc/testsuite/gcc.target/arm/vect_binary_1.c |  50 ++++++
 gcc/testsuite/gcc.target/arm/vect_unary_1.c  | 224 +++++++++++++++++++++++++++
 8 files changed, 297 insertions(+), 130 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/vect_binary_1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/vect_unary_1.c

(limited to 'gcc')

diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index d917137..8f8155c 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -4026,129 +4026,6 @@ arm_expand_builtin (tree exp,
   return NULL_RTX;
 }
 
-tree
-arm_builtin_vectorized_function (unsigned int fn, tree type_out, tree type_in)
-{
-  machine_mode in_mode, out_mode;
-  int in_n, out_n;
-  bool out_unsigned_p = TYPE_UNSIGNED (type_out);
-
-  /* Can't provide any vectorized builtins when we can't use NEON.  */
-  if (!TARGET_NEON)
-    return NULL_TREE;
-
-  if (TREE_CODE (type_out) != VECTOR_TYPE
-      || TREE_CODE (type_in) != VECTOR_TYPE)
-    return NULL_TREE;
-
-  out_mode = TYPE_MODE (TREE_TYPE (type_out));
-  out_n = TYPE_VECTOR_SUBPARTS (type_out);
-  in_mode = TYPE_MODE (TREE_TYPE (type_in));
-  in_n = TYPE_VECTOR_SUBPARTS (type_in);
-
-/* ARM_CHECK_BUILTIN_MODE and ARM_FIND_VRINT_VARIANT are used to find the
-   decl of the vectorized builtin for the appropriate vector mode.
-   NULL_TREE is returned if no such builtin is available.  */
-#undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C)    \
-  (TARGET_VFP5   \
-   && flag_unsafe_math_optimizations \
-   && ARM_CHECK_BUILTIN_MODE_1 (C))
-
-#undef ARM_CHECK_BUILTIN_MODE_1
-#define ARM_CHECK_BUILTIN_MODE_1(C) \
-  (out_mode == SFmode && out_n == C \
-   && in_mode == SFmode && in_n == C)
-
-#undef ARM_FIND_VRINT_VARIANT
-#define ARM_FIND_VRINT_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-    ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sf, false) \
-    : (ARM_CHECK_BUILTIN_MODE (4) \
-      ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sf, false) \
-      : NULL_TREE))
-
-  switch (fn)
-    {
-    CASE_CFN_FLOOR:
-      return ARM_FIND_VRINT_VARIANT (vrintm);
-    CASE_CFN_CEIL:
-      return ARM_FIND_VRINT_VARIANT (vrintp);
-    CASE_CFN_TRUNC:
-      return ARM_FIND_VRINT_VARIANT (vrintz);
-    CASE_CFN_ROUND:
-      return ARM_FIND_VRINT_VARIANT (vrinta);
-#undef ARM_CHECK_BUILTIN_MODE_1
-#define ARM_CHECK_BUILTIN_MODE_1(C) \
-  (out_mode == SImode && out_n == C \
-   && in_mode == SFmode && in_n == C)
-
-#define ARM_FIND_VCVT_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v2sfv2si, false) \
-   : (ARM_CHECK_BUILTIN_MODE (4) \
-     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##v4sfv4si, false) \
-     : NULL_TREE))
-
-#define ARM_FIND_VCVTU_VARIANT(N) \
-  (ARM_CHECK_BUILTIN_MODE (2) \
-   ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv2sfv2si, false) \
-   : (ARM_CHECK_BUILTIN_MODE (4) \
-     ? arm_builtin_decl(ARM_BUILTIN_NEON_##N##uv4sfv4si, false) \
-     : NULL_TREE))
-    CASE_CFN_LROUND:
-      return (out_unsigned_p
-	      ? ARM_FIND_VCVTU_VARIANT (vcvta)
-	      : ARM_FIND_VCVT_VARIANT (vcvta));
-    CASE_CFN_LCEIL:
-      return (out_unsigned_p
-	      ? ARM_FIND_VCVTU_VARIANT (vcvtp)
-	      : ARM_FIND_VCVT_VARIANT (vcvtp));
-    CASE_CFN_LFLOOR:
-      return (out_unsigned_p
-	      ? ARM_FIND_VCVTU_VARIANT (vcvtm)
-	      : ARM_FIND_VCVT_VARIANT (vcvtm));
-#undef ARM_CHECK_BUILTIN_MODE
-#define ARM_CHECK_BUILTIN_MODE(C, N) \
-  (out_mode == N##mode && out_n == C \
-   && in_mode == N##mode && in_n == C)
-    case CFN_BUILT_IN_BSWAP16:
-      if (ARM_CHECK_BUILTIN_MODE (4, HI))
-	return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4hi, false);
-      else if (ARM_CHECK_BUILTIN_MODE (8, HI))
-	return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv8hi, false);
-      else
-	return NULL_TREE;
-    case CFN_BUILT_IN_BSWAP32:
-      if (ARM_CHECK_BUILTIN_MODE (2, SI))
-	return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2si, false);
-      else if (ARM_CHECK_BUILTIN_MODE (4, SI))
-	return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv4si, false);
-      else
-	return NULL_TREE;
-    case CFN_BUILT_IN_BSWAP64:
-      if (ARM_CHECK_BUILTIN_MODE (2, DI))
-	return arm_builtin_decl (ARM_BUILTIN_NEON_bswapv2di, false);
-      else
-	return NULL_TREE;
-    CASE_CFN_COPYSIGN:
-      if (ARM_CHECK_BUILTIN_MODE (2, SF))
-	return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv2sf, false);
-      else if (ARM_CHECK_BUILTIN_MODE (4, SF))
-	return arm_builtin_decl (ARM_BUILTIN_NEON_copysignfv4sf, false);
-      else
-	return NULL_TREE;
-
-    default:
-      return NULL_TREE;
-    }
-  return NULL_TREE;
-}
-#undef ARM_FIND_VCVT_VARIANT
-#undef ARM_FIND_VCVTU_VARIANT
-#undef ARM_CHECK_BUILTIN_MODE
-#undef ARM_FIND_VRINT_VARIANT
-
 void
 arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 {
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 9d14209..f8aabbd 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -103,7 +103,6 @@ extern void neon_pairwise_reduce (rtx, rtx, machine_mode,
 				  rtx (*) (rtx, rtx, rtx));
 extern rtx mve_bool_vec_to_const (rtx const_vec);
 extern rtx neon_make_constant (rtx, bool generate = true);
-extern tree arm_builtin_vectorized_function (unsigned int, tree, tree);
 extern void neon_expand_vector_init (rtx, rtx);
 extern void neon_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
 extern void arm_const_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index 33fb98d..eca99c9 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -739,10 +739,6 @@ static const struct attribute_spec arm_attribute_table[] =
 #undef TARGET_VECTORIZE_BUILTINS
 #define TARGET_VECTORIZE_BUILTINS
 
-#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
-#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
-  arm_builtin_vectorized_function
-
 #undef TARGET_VECTOR_ALIGNMENT
 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
 
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 445b2bf..2e642cc 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -264,7 +264,6 @@ VAR1 (UNOP, vcvtv4hf, v4sf)
 VAR10 (TERNOP, vbsl,
 	 v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
 VAR2 (TERNOP, vbsl, v8hf, v4hf)
-VAR2 (UNOP, copysignf, v2sf, v4sf)
 VAR2 (UNOP, vrintn, v2sf, v4sf)
 VAR2 (UNOP, vrinta, v2sf, v4sf)
 VAR2 (UNOP, vrintp, v2sf, v4sf)
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 37cf797..29062cd 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1150,6 +1150,13 @@
                               (UNSPEC_VRINTA "unconditional") (UNSPEC_VRINTM "unconditional")
                               (UNSPEC_VRINTR "nocond") (UNSPEC_VRINTX "nocond")])
 
+(define_int_attr nvrint_pattern [(UNSPEC_NVRINTZ "btrunc")
+				 (UNSPEC_NVRINTP "ceil")
+				 (UNSPEC_NVRINTA "round")
+				 (UNSPEC_NVRINTM "floor")
+				 (UNSPEC_NVRINTX "rint")
+				 (UNSPEC_NVRINTN "roundeven")])
+
 (define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
                                 (UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
                                 (UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 275bcc1..e1dae28 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -635,6 +635,13 @@
  [(set_attr "type" "neon_fp_mla_s<q>")]
 )
 
+(define_expand "<NEON_VRINT:nvrint_pattern><VCVTF:mode>2"
+  [(set (match_operand:VCVTF 0 "s_register_operand")
+        (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand")]
+		      NEON_VRINT))]
+  "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
+)
+
 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
         (unspec:VCVTF [(match_operand:VCVTF 1
@@ -645,6 +652,14 @@
   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
 )
 
+(define_expand "l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2"
+  [(set (match_operand:<V_cmp_result> 0 "register_operand")
+	(FIXUORS:<V_cmp_result>
+	  (unspec:VCVTF [(match_operand:VCVTF 1 "register_operand")]
+			NEON_VCVT)))]
+  "TARGET_NEON && TARGET_VFP5 && flag_unsafe_math_optimizations"
+)
+
 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
 	(FIXUORS:<V_cmp_result> (unspec:VCVTF
@@ -3059,7 +3074,7 @@
   "TARGET_I8MM"
 )
 
-(define_expand "neon_copysignf<mode>"
+(define_expand "copysign<mode>3"
   [(match_operand:VCVTF 0 "register_operand")
    (match_operand:VCVTF 1 "register_operand")
    (match_operand:VCVTF 2 "register_operand")]
diff --git a/gcc/testsuite/gcc.target/arm/vect_binary_1.c b/gcc/testsuite/gcc.target/arm/vect_binary_1.c
new file mode 100644
index 0000000..c1fc905
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/vect_binary_1.c
@@ -0,0 +1,50 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_hard_ok } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-add-options arm_v8_neon }  */
+/* { dg-additional-options "-O3 -mfloat-abi=hard" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+
+#define TEST2(OUT, NAME, IN)						\
+OUT __attribute__((vector_size(sizeof(OUT) * 2)))			\
+test2_##OUT##_##NAME##_##IN (float dummy,				\
+			     IN __attribute__((vector_size(sizeof(IN) * 2))) y, \
+			     IN __attribute__((vector_size(sizeof(IN) * 2))) z) \
+{									\
+  OUT __attribute__((vector_size(sizeof(OUT) * 2))) x;			\
+  x[0] = __builtin_##NAME (y[0], z[0]);					\
+  x[1] = __builtin_##NAME (y[1], z[1]);					\
+  return x;								\
+}
+
+#define TEST4(OUT, NAME, IN)						\
+OUT __attribute__((vector_size(sizeof(OUT) * 4)))			\
+test4_##OUT##_##NAME##_##IN (float dummy,				\
+			     IN __attribute__((vector_size(sizeof(OUT) * 4))) y,	\
+			     IN __attribute__((vector_size(sizeof(OUT) * 4))) z)	\
+{									\
+  OUT __attribute__((vector_size(sizeof(OUT) * 4))) x;			\
+  x[0] = __builtin_##NAME (y[0], z[0]);					\
+  x[1] = __builtin_##NAME (y[1], z[1]);					\
+  x[2] = __builtin_##NAME (y[2], z[2]);					\
+  x[3] = __builtin_##NAME (y[3], z[3]);					\
+  return x;								\
+}
+
+/*
+** test2_float_copysignf_float: { target arm_little_endian }
+**	vmov.i32	d0, #(0x80000000|2147483648)(\s+.*)
+**	vbsl	d0, d2, d1
+**	bx	lr
+*/
+TEST2 (float, copysignf, float)
+
+/*
+** test4_float_copysignf_float: { target arm_little_endian }
+**	vmov.i32	q0, #(0x80000000|2147483648)(\s+.*)
+**	vbsl	q0, q2, q1
+**	bx	lr
+*/
+TEST4 (float, copysignf, float)
diff --git a/gcc/testsuite/gcc.target/arm/vect_unary_1.c b/gcc/testsuite/gcc.target/arm/vect_unary_1.c
new file mode 100644
index 0000000..4677180
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/vect_unary_1.c
@@ -0,0 +1,224 @@
+/* { dg-do compile { target { arm*-*-* } } } */
+/* { dg-require-effective-target arm_hard_ok } */
+/* { dg-require-effective-target arm_v8_neon_ok } */
+/* { dg-add-options arm_v8_neon }  */
+/* { dg-additional-options "-Ofast -mfloat-abi=hard" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <stdint.h>
+
+#define TEST2(OUT, NAME, IN)						\
+OUT __attribute__((vector_size(sizeof(OUT) * 2)))			\
+test2_##OUT##_##NAME##_##IN (float dummy,				\
+			     IN __attribute__((vector_size(sizeof(IN) * 2))) y) \
+{									\
+  OUT __attribute__((vector_size(sizeof(OUT) * 2))) x;			\
+  x[0] = __builtin_##NAME (y[0]);					\
+  x[1] = __builtin_##NAME (y[1]);					\
+  return x;								\
+}
+
+#define TEST4(OUT, NAME, IN)						\
+OUT __attribute__((vector_size(sizeof(OUT) * 4)))			\
+test4_##OUT##_##NAME##_##IN (float dummy,				\
+			     IN __attribute__((vector_size(sizeof(OUT) * 4))) y)	\
+{									\
+  OUT __attribute__((vector_size(sizeof(OUT) * 4))) x;			\
+  x[0] = __builtin_##NAME (y[0]);					\
+  x[1] = __builtin_##NAME (y[1]);					\
+  x[2] = __builtin_##NAME (y[2]);					\
+  x[3] = __builtin_##NAME (y[3]);					\
+  return x;								\
+}
+
+#define TEST8(OUT, NAME, IN)						\
+OUT __attribute__((vector_size(sizeof(OUT) * 8)))			\
+test8_##OUT##_##NAME##_##IN (float dummy,				\
+			     IN __attribute__((vector_size(sizeof(OUT) * 8))) y)	\
+{									\
+  OUT __attribute__((vector_size(sizeof(OUT) * 8))) x;			\
+  x[0] = __builtin_##NAME (y[0]);					\
+  x[1] = __builtin_##NAME (y[1]);					\
+  x[2] = __builtin_##NAME (y[2]);					\
+  x[3] = __builtin_##NAME (y[3]);					\
+  x[4] = __builtin_##NAME (y[4]);					\
+  x[5] = __builtin_##NAME (y[5]);					\
+  x[6] = __builtin_##NAME (y[6]);					\
+  x[7] = __builtin_##NAME (y[7]);					\
+  return x;								\
+}
+
+/*
+** test2_float_truncf_float:
+**	vrintz.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (float, truncf, float)
+
+/*
+** test4_float_truncf_float:
+**	vrintz.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (float, truncf, float)
+
+/*
+** test2_float_roundf_float:
+**	vrinta.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (float, roundf, float)
+
+/*
+** test4_float_roundf_float:
+**	vrinta.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (float, roundf, float)
+
+/*
+** test2_float_floorf_float:
+**	vrintm.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (float, floorf, float)
+
+/*
+** test4_float_floorf_float:
+**	vrintm.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (float, floorf, float)
+
+/*
+** test2_float_ceilf_float:
+**	vrintp.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (float, ceilf, float)
+
+/*
+** test4_float_ceilf_float:
+**	vrintp.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (float, ceilf, float)
+
+/*
+** test2_float_rintf_float:
+**	vrintx.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (float, rintf, float)
+
+/*
+** test4_float_rintf_float:
+**	vrintx.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (float, rintf, float)
+
+/*
+** test2_float_roundevenf_float:
+**	vrintn.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (float, roundevenf, float)
+
+/*
+** test4_float_roundevenf_float:
+**	vrintn.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (float, roundevenf, float)
+
+/*
+** test2_int_roundf_float:
+**	vcvta.s32.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (int, roundf, float)
+
+/*
+** test4_int_roundf_float:
+**	vcvta.s32.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (int, roundf, float)
+
+/*
+** test2_int_floorf_float:
+**	vcvtm.s32.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (int, floorf, float)
+
+/*
+** test4_int_floorf_float:
+**	vcvtm.s32.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (int, floorf, float)
+
+/*
+** test2_int_ceilf_float:
+**	vcvtp.s32.f32	d0, d1
+**	bx	lr
+*/
+TEST2 (int, ceilf, float)
+
+/*
+** test4_int_ceilf_float:
+**	vcvtp.s32.f32	q0, q1
+**	bx	lr
+*/
+TEST4 (int, ceilf, float)
+
+/*
+** test2_int_clz_int:
+**	vclz.i32	d0, d1
+**	bx	lr
+*/
+TEST2 (int, clz, int)
+
+/*
+** test4_int_clz_int:
+**	vclz.i32	q0, q1
+**	bx	lr
+*/
+TEST4 (int, clz, int)
+
+/*
+** test4_int16_t_bswap16_int16_t: { target arm_little_endian }
+**	vrev16.8	d0, d1
+**	bx	lr
+*/
+TEST4 (int16_t, bswap16, int16_t)
+
+/*
+** test8_int16_t_bswap16_int16_t: { target arm_little_endian }
+**	vrev16.8	q0, q1
+**	bx	lr
+*/
+TEST8 (int16_t, bswap16, int16_t)
+
+/*
+** test2_int_bswap32_int: { target arm_little_endian }
+**	vrev32.8	d0, d1
+**	bx	lr
+*/
+TEST2 (int, bswap32, int)
+
+/*
+** test4_int_bswap32_int: { target arm_little_endian }
+**	vrev32.8	q0, q1
+**	bx	lr
+*/
+TEST4 (int, bswap32, int)
+
+/*
+** test2_int64_t_bswap64_int64_t: { target arm_little_endian }
+**	vrev64.8	q0, q1
+**	bx	lr
+*/
+TEST2 (int64_t, bswap64, int64_t)
-- 
cgit v1.1


From 87f46a16ec05beb51439f55a4d3c36d64b95b00f Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Mon, 18 Jul 2022 13:09:57 +0200
Subject: Fix builtin vs non-builtin partition merge in loop distribution

When r7-6373-g40b6bff965d004 fixed a costing issue it failed to
make the logic symmetric which means that we now fuse
normal vs. builtin when the cost model says so but we don't fuse
builtin vs. normal.  The following fixes that, also allowing
the cost model to decide to fuse two builtin partitions as otherwise
an intermediate non-builtin can result in a partial merge as well.

	* tree-loop-distribution.cc (loop_distribution::distribute_loop):
	When computing cost-based merging do not disregard builtin
	classified partitions in some cases.

	* gcc.dg/tree-ssa/ldist-24.c: XFAIL.
	* gcc.dg/tree-ssa/ldist-36.c: Adjust expected outcome.
---
 gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c | 5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c | 3 ++-
 gcc/tree-loop-distribution.cc            | 5 +----
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'gcc')

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c
index 75f7b8f..2403a24 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c
@@ -20,5 +20,6 @@ void foo ()
       }
 }
 
-/* { dg-final { scan-tree-dump "generated memcpy" "ldist" } } */
-/* { dg-final { scan-tree-dump "generated memset zero" "ldist" } } */
+/* The cost modeling does not consider WAR as beneficial to split.  */
+/* { dg-final { scan-tree-dump "generated memcpy" "ldist" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "generated memset zero" "ldist" { xfail *-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
index 07393f0..6d56006 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-36.c
@@ -25,4 +25,5 @@ foo (struct st * restrict p)
     }
 }
 
-/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 3 library" 1 "ldist" } } */
+/* The cost modeling doesn't consider splitting a WAR re-use profitable.  */
+/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 1 loops and 1 library" 1 "ldist" } } */
diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
index 0714bc4..0ee441c 100644
--- a/gcc/tree-loop-distribution.cc
+++ b/gcc/tree-loop-distribution.cc
@@ -3090,10 +3090,7 @@ loop_distribution::distribute_loop (class loop *loop,
   for (i = 0; partitions.iterate (i, &into); ++i)
     {
       bool changed = false;
-      if (partition_builtin_p (into) || into->kind == PKIND_PARTIAL_MEMSET)
-	continue;
-      for (int j = i + 1;
-	   partitions.iterate (j, &partition); ++j)
+      for (int j = i + 1; partitions.iterate (j, &partition); ++j)
 	{
 	  if (share_memory_accesses (rdg, into, partition))
 	    {
-- 
cgit v1.1


From 7501eec65c60701f72621d04eeb5342bad2fe4fb Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@gmail.com>
Date: Mon, 18 Jul 2022 15:07:00 +0300
Subject: arc: Add ARCHS release 310a tune variant.

Add mtune and mcpu options for ARCHS release 310a type CPU. The
mtune=release31a is designed to be used as an alternative to the
mcpu=hs4x_rel31 option.
ARCHS4x release 31a uses DSP instructions which are implemented a bit
different than mpy9. Hence, use safer mpy2 option.

gcc/
	* config/arc/arc-arch.h (arc_tune_attr): Add
	ARC_TUNE_ARCHS4X_REL31A variant.
	* config/arc/arc.cc (arc_override_options): Tune options for
	release 310a.
	(arc_sched_issue_rate): Use correct enum.
	(arc600_corereg_hazard): Textual change.
	(arc_hazard): Add release 310a tunning.
	* config/arc/arc.md (tune): Update and take into consideration new
	tune option.
	(tune_dspmpy): Likewise.
	(tune_store): New attribute.
	* config/arc/arc.opt (mtune): New tune option.
	* config/arc/arcHS4x.md (hs4x_brcc0, hs4x_brcc1): New cpu units.
	(hs4x_brcc_op): New instruction rezervation.
	(hs4x_data_store_1_op): Likewise.
	* config/arc/arc-cpus.def (hs4x_rel31): New cpu variant.
	* config/arc/arc-tables.opt: Regenerate.
	* config/arc/t-multilib: Likewise.
	* doc/invoke.texi (ARC): Update mcpu and tune sections.

Signed-off-by: Claudiu Zissulescu <claziss@gmail.com>
---
 gcc/config/arc/arc-arch.h     |   3 +-
 gcc/config/arc/arc-cpus.def   |   1 +
 gcc/config/arc/arc-tables.opt |   3 +
 gcc/config/arc/arc.cc         | 192 ++++++++++++++++++++++++++----------------
 gcc/config/arc/arc.md         |  32 ++++---
 gcc/config/arc/arc.opt        |   3 +
 gcc/config/arc/arcHS4x.md     |  17 +++-
 gcc/config/arc/t-multilib     |   4 +-
 gcc/doc/invoke.texi           |  16 ++++
 9 files changed, 181 insertions(+), 90 deletions(-)

(limited to 'gcc')

diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
index 4c728a8..83b156e 100644
--- a/gcc/config/arc/arc-arch.h
+++ b/gcc/config/arc/arc-arch.h
@@ -77,7 +77,8 @@ enum arc_tune_attr
     ARC_TUNE_CORE_3,
     ARC_TUNE_ARCHS4X,
     ARC_TUNE_ARCHS4XD,
-    ARC_TUNE_ARCHS4XD_SLOW
+    ARC_TUNE_ARCHS4XD_SLOW,
+    ARC_TUNE_ARCHS4X_REL31A
   };
 
 /* Extra options for a processor template to hold any CPU specific
diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
index baf61db..5668b0f 100644
--- a/gcc/config/arc/arc-cpus.def
+++ b/gcc/config/arc/arc-cpus.def
@@ -64,6 +64,7 @@ ARC_CPU (hs38,	     hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, NONE)
 ARC_CPU (hs38_linux, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64|FL_FPU_FPUD_ALL, NONE, NONE)
 ARC_CPU (hs4x,  hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4X)
 ARC_CPU (hs4xd, hs, FL_MPYOPT_9|FL_DIVREM|FL_LL64, NONE, ARCHS4XD)
+ARC_CPU (hs4x_rel31, hs, FL_MPYOPT_2|FL_DIVREM|FL_LL64, NONE, ARCHS4X_REL31A)
 
 ARC_CPU (arc600,	  6xx, FL_BS, NONE, ARC600)
 ARC_CPU (arc600_norm,	  6xx, FL_BS|FL_NORM, NONE, ARC600)
diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt
index 8cc5135..0a0d354 100644
--- a/gcc/config/arc/arc-tables.opt
+++ b/gcc/config/arc/arc-tables.opt
@@ -70,6 +70,9 @@ EnumValue
 Enum(processor_type) String(hs4xd) Value(PROCESSOR_hs4xd)
 
 EnumValue
+Enum(processor_type) String(hs4x_rel31) Value(PROCESSOR_hs4x_rel31)
+
+EnumValue
 Enum(processor_type) String(arc600) Value(PROCESSOR_arc600)
 
 EnumValue
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index 77730c8..064790b 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -646,8 +646,8 @@ arc_sched_issue_rate (void)
 {
   switch (arc_tune)
     {
-    case TUNE_ARCHS4X:
-    case TUNE_ARCHS4XD:
+    case ARC_TUNE_ARCHS4X:
+    case ARC_TUNE_ARCHS4XD:
       return 3;
     default:
       break;
@@ -1458,6 +1458,12 @@ arc_override_options (void)
   if (!OPTION_SET_P (unaligned_access) && TARGET_HS)
     unaligned_access = 1;
 
+  if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
+    {
+      TARGET_CODE_DENSITY_FRAME = 0;
+      flag_delayed_branch = 0;
+    }
+
   /* These need to be done at start up.  It's convenient to do them here.  */
   arc_init ();
 }
@@ -7817,6 +7823,115 @@ arc_store_addr_hazard_p (rtx_insn* producer, rtx_insn* consumer)
   return arc_store_addr_hazard_internal_p (producer, consumer);
 }
 
+/* Return length adjustment for INSN.
+   For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+
+static int
+arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
+{
+  if (!TARGET_ARC600)
+    return 0;
+  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
+    pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
+  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
+    succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
+  if (recog_memoized (pred) == CODE_FOR_mulsi_600
+      || recog_memoized (pred) == CODE_FOR_umul_600
+      || recog_memoized (pred) == CODE_FOR_mac_600
+      || recog_memoized (pred) == CODE_FOR_mul64_600
+      || recog_memoized (pred) == CODE_FOR_mac64_600
+      || recog_memoized (pred) == CODE_FOR_umul64_600
+      || recog_memoized (pred) == CODE_FOR_umac64_600)
+    return 0;
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
+    {
+      const_rtx x = *iter;
+      switch (GET_CODE (x))
+	{
+	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
+	  break;
+	default:
+	  /* This is also fine for PRE/POST_MODIFY, because they
+	     contain a SET.  */
+	  continue;
+	}
+      rtx dest = XEXP (x, 0);
+      /* Check if this sets a an extension register.  N.B. we use 61 for the
+	 condition codes, which is definitely not an extension register.  */
+      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
+	  /* Check if the same register is used by the PAT.  */
+	  && (refers_to_regno_p
+	      (REGNO (dest),
+	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
+	       PATTERN (succ), 0)))
+	return 4;
+    }
+  return 0;
+}
+
+/* For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+
+int
+arc_hazard (rtx_insn *pred, rtx_insn *succ)
+{
+  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
+    return 0;
+
+  if (TARGET_ARC600)
+    return arc600_corereg_hazard (pred, succ);
+
+  return 0;
+}
+
+/* When compiling for release 310a, insert a nop before any
+   conditional jump.  */
+
+static int
+arc_check_release31a (rtx_insn *pred, rtx_insn *succ)
+{
+  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
+    return 0;
+
+  if (!JUMP_P (pred) && !single_set (pred))
+    return 0;
+
+  if (!JUMP_P (succ) && !single_set (succ))
+    return 0;
+
+  if (TARGET_HS && (arc_tune == ARC_TUNE_ARCHS4X_REL31A))
+    switch (get_attr_type (pred))
+      {
+      case TYPE_STORE:
+	switch (get_attr_type (succ))
+	  {
+	  case TYPE_BRCC:
+	  case TYPE_BRCC_NO_DELAY_SLOT:
+	  case TYPE_LOOP_END:
+	    return 1;
+	  default:
+	    break;
+	  }
+	break;
+      case TYPE_BRCC:
+      case TYPE_BRCC_NO_DELAY_SLOT:
+      case TYPE_LOOP_END:
+	if (get_attr_type (succ) == TYPE_STORE)
+	  return 1;
+	break;
+      default:
+	break;
+      }
+
+  return 0;
+}
+
 /* The same functionality as arc_hazard.  It is called in machine
    reorg before any other optimization.  Hence, the NOP size is taken
    into account when doing branch shortening.  */
@@ -7830,10 +7945,8 @@ workaround_arc_anomaly (void)
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     {
       succ0 = next_real_insn (insn);
-      if (arc_hazard (insn, succ0))
-	{
-	  emit_insn_before (gen_nopv (), succ0);
-	}
+      if (arc_hazard (insn, succ0) || arc_check_release31a (insn, succ0))
+	emit_insn_before (gen_nopv (), succ0);
     }
 
   if (!TARGET_ARC700)
@@ -9324,56 +9437,6 @@ disi_highpart (rtx in)
   return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
 }
 
-/* Return length adjustment for INSN.
-   For ARC600:
-   A write to a core reg greater or equal to 32 must not be immediately
-   followed by a use.  Anticipate the length requirement to insert a nop
-   between PRED and SUCC to prevent a hazard.  */
-
-static int
-arc600_corereg_hazard (rtx_insn *pred, rtx_insn *succ)
-{
-  if (!TARGET_ARC600)
-    return 0;
-  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
-    pred = as_a <rtx_sequence *> (PATTERN (pred))->insn (1);
-  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
-    succ = as_a <rtx_sequence *> (PATTERN (succ))->insn (0);
-  if (recog_memoized (pred) == CODE_FOR_mulsi_600
-      || recog_memoized (pred) == CODE_FOR_umul_600
-      || recog_memoized (pred) == CODE_FOR_mac_600
-      || recog_memoized (pred) == CODE_FOR_mul64_600
-      || recog_memoized (pred) == CODE_FOR_mac64_600
-      || recog_memoized (pred) == CODE_FOR_umul64_600
-      || recog_memoized (pred) == CODE_FOR_umac64_600)
-    return 0;
-  subrtx_iterator::array_type array;
-  FOR_EACH_SUBRTX (iter, array, PATTERN (pred), NONCONST)
-    {
-      const_rtx x = *iter;
-      switch (GET_CODE (x))
-	{
-	case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
-	  break;
-	default:
-	  /* This is also fine for PRE/POST_MODIFY, because they
-	     contain a SET.  */
-	  continue;
-	}
-      rtx dest = XEXP (x, 0);
-      /* Check if this sets an extension register.  N.B. we use 61 for the
-	 condition codes, which is definitely not an extension register.  */
-      if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
-	  /* Check if the same register is used by the PAT.  */
-	  && (refers_to_regno_p
-	      (REGNO (dest),
-	       REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U,
-	       PATTERN (succ), 0)))
-	return 4;
-    }
-  return 0;
-}
-
 /* Given a rtx, check if it is an assembly instruction or not.  */
 
 static int
@@ -9408,23 +9471,6 @@ arc_asm_insn_p (rtx x)
   return 0;
 }
 
-/* For ARC600:
-   A write to a core reg greater or equal to 32 must not be immediately
-   followed by a use.  Anticipate the length requirement to insert a nop
-   between PRED and SUCC to prevent a hazard.  */
-
-int
-arc_hazard (rtx_insn *pred, rtx_insn *succ)
-{
-  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
-    return 0;
-
-  if (TARGET_ARC600)
-    return arc600_corereg_hazard (pred, succ);
-
-  return 0;
-}
-
 /* Return length adjustment for INSN.  */
 
 int
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 39b3580..7170445 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -645,22 +645,21 @@
 ;;   is made that makes conditional execution required.
 
 (define_attr "tune" "none,arc600,arc7xx,arc700_4_2_std,arc700_4_2_xmac, \
-core_3, archs4x, archs4xd, archs4xd_slow"
+archs4x, archs4xd"
   (const
-   (cond [(symbol_ref "arc_tune == TUNE_ARC600")
+   (cond [(symbol_ref "arc_tune == ARC_TUNE_ARC600")
 	  (const_string "arc600")
 	  (symbol_ref "arc_tune == ARC_TUNE_ARC7XX")
 	  (const_string "arc7xx")
-	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
+	  (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_STD")
 	  (const_string "arc700_4_2_std")
-	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
+	  (symbol_ref "arc_tune == ARC_TUNE_ARC700_4_2_XMAC")
 	  (const_string "arc700_4_2_xmac")
-	  (symbol_ref "arc_tune == ARC_TUNE_CORE_3")
-	  (const_string "core_3")
-	  (symbol_ref "arc_tune == TUNE_ARCHS4X")
+	  (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
+	       (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A"))
 	  (const_string "archs4x")
-	  (ior (symbol_ref "arc_tune == TUNE_ARCHS4XD")
-	       (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW"))
+	  (ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD")
+	       (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW"))
 	  (const_string "archs4xd")]
 	 (const_string "none"))))
 
@@ -671,13 +670,22 @@ core_3, archs4x, archs4xd, archs4xd_slow"
 
 (define_attr "tune_dspmpy" "none, slow, fast"
   (const
-  (cond [(ior (symbol_ref "arc_tune == TUNE_ARCHS4X")
-	      (symbol_ref "arc_tune == TUNE_ARCHS4XD"))
+  (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
+	      (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
 	 (const_string "fast")
-	 (symbol_ref "arc_tune == TUNE_ARCHS4XD_SLOW")
+	 (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD_SLOW")
 	 (const_string "slow")]
 	(const_string "none"))))
 
+(define_attr "tune_store" "none, normal, rel31a"
+  (const
+  (cond [(ior (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X")
+	      (symbol_ref "arc_tune == ARC_TUNE_ARCHS4XD"))
+	 (const_string "normal")
+	 (symbol_ref "arc_tune == ARC_TUNE_ARCHS4X_REL31A")
+	 (const_string "rel31a")]
+	(const_string "none"))))
+
 ;; Move instructions.
 (define_expand "movqi"
   [(set (match_operand:QI 0 "move_dest_operand" "")
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index eb85f49..0add5a2 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -276,6 +276,9 @@ Enum(arc_tune_attr) String(arc750d) Value(ARC_TUNE_ARC700_4_2_XMAC)
 EnumValue
 Enum(arc_tune_attr) String(core3) Value(ARC_TUNE_CORE_3)
 
+EnumValue
+Enum(arc_tune_attr) String(release31a) Value(ARC_TUNE_ARCHS4X_REL31A)
+
 mindexed-loads
 Target Var(TARGET_INDEXED_LOADS) Init(TARGET_INDEXED_LOADS_DEFAULT)
 Enable the use of indexed loads.
diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md
index 5136eba..1009833 100644
--- a/gcc/config/arc/arcHS4x.md
+++ b/gcc/config/arc/arcHS4x.md
@@ -27,14 +27,21 @@
 (define_cpu_unit "hs4x_mult" "ARCHS4x")
 (define_cpu_unit "hs4x_x1, hs4x_x2" "ARCHS4x")
 (define_cpu_unit "hs4x_y1, hs4x_y2" "ARCHS4x")
+(define_cpu_unit "hs4x_brcc0, hs4x_brcc1" "ARCHS4x")
 
 (define_insn_reservation "hs4x_brj_op" 1
   (and (match_test "TARGET_HS")
        (eq_attr "tune" "archs4x, archs4xd")
        (eq_attr "type" "call, call_no_delay_slot, uncond_branch, jump, \
-branch, brcc,brcc_no_delay_slot, sfunc"))
+branch, sfunc"))
   "hs4x_issue0")
 
+(define_insn_reservation "hs4x_brcc_op" 1
+  (and (match_test "TARGET_HS")
+       (eq_attr "tune" "archs4x, archs4xd")
+       (eq_attr "type" "brcc,brcc_no_delay_slot,loop_end"))
+  "hs4x_issue0 + hs4x_brcc0 + hs4x_brcc1")
+
 (define_insn_reservation "hs4x_data_load_op" 4
   (and (match_test "TARGET_HS")
        (eq_attr "tune" "archs4x, archs4xd")
@@ -43,10 +50,16 @@ branch, brcc,brcc_no_delay_slot, sfunc"))
 
 (define_insn_reservation "hs4x_data_store_op" 1
   (and (match_test "TARGET_HS")
-       (eq_attr "tune" "archs4x, archs4xd")
+       (eq_attr "tune_store" "normal")
        (eq_attr "type" "store"))
   "hs4x_issue1 + hs4x_ld_st")
 
+(define_insn_reservation "hs4x_data_store_1_op" 2
+  (and (match_test "TARGET_HS")
+       (eq_attr "tune_store" "rel31a")
+       (eq_attr "type" "store"))
+  "hs4x_issue1 + hs4x_ld_st + hs4x_brcc0, hs4x_brcc1")
+
 ;; Advanced ALU
 (define_insn_reservation "hs4x_adv_alue_op" 4
   (and (match_test "TARGET_HS")
diff --git a/gcc/config/arc/t-multilib b/gcc/config/arc/t-multilib
index 8d97ad1..921945e 100644
--- a/gcc/config/arc/t-multilib
+++ b/gcc/config/arc/t-multilib
@@ -21,9 +21,9 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
+MULTILIB_OPTIONS = mcpu=em/mcpu=em_mini/mcpu=arcem/mcpu=em4/mcpu=em4_dmips/mcpu=em4_fpus/mcpu=em4_fpuda/mcpu=quarkse_em/mcpu=hs/mcpu=archs/mcpu=hs34/mcpu=hs38/mcpu=hs38_linux/mcpu=hs4x/mcpu=hs4xd/mcpu=hs4x_rel31/mcpu=arc600/mcpu=arc600_norm/mcpu=arc600_mul64/mcpu=arc600_mul32x16/mcpu=arc601/mcpu=arc601_norm/mcpu=arc601_mul64/mcpu=arc601_mul32x16/mcpu=arc700/mcpu=nps400
 
-MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
+MULTILIB_DIRNAMES = em em_mini arcem em4 em4_dmips em4_fpus em4_fpuda quarkse_em hs archs hs34 hs38 hs38_linux hs4x hs4xd hs4x_rel31 arc600 arc600_norm arc600_mul64 arc600_mul32x16 arc601 arc601_norm arc601_mul64 arc601_mul32x16 arc700 nps400
 
 # Aliases:
 MULTILIB_MATCHES  = mcpu?arc600=mcpu?ARC600
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 84d6f0f..94fe57a 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -20053,6 +20053,15 @@ Compile for ARC HS38 CPU.
 @item hs38_linux
 Compile for ARC HS38 CPU with all hardware extensions on.
 
+@item hs4x
+Compile for ARC HS4x CPU.
+
+@item hs4xd
+Compile for ARC HS4xD CPU.
+
+@item hs4x_rel31
+Compile for ARC HS4x CPU release 3.10a.
+
 @item arc600_norm
 Compile for ARC 600 CPU with @code{norm} instructions enabled.
 
@@ -20662,6 +20671,13 @@ Tune for ARC725D CPU.
 @item ARC750D
 Tune for ARC750D CPU.
 
+@item core3
+Tune for ARCv2 core3 type CPU.  This option enable usage of
+@code{dbnz} instruction.
+
+@item release31a
+Tune for ARC4x release 3.10a.
+
 @end table
 
 @item -mmultcost=@var{num}
-- 
cgit v1.1


From 7df79970bfec96e186cd50ac951f7017c2109a13 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@embecosm.com>
Date: Mon, 18 Jul 2022 16:47:20 +0100
Subject: RISC-V/doc: Correct the name of `-mriscv-attribute'

Correct the name of the `-mriscv-attribute' invocation option, including
a typo in the negated form.

	gcc/
	* doc/invoke.texi (Option Summary): Fix `-mno-riscv-attribute'.
	(RISC-V Options): Likewise, and `-mriscv-attribute'.
---
 gcc/doc/invoke.texi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'gcc')

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 94fe57a..9f2f97c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1219,7 +1219,7 @@ See RS/6000 and PowerPC Options.
 -mcmodel=medlow  -mcmodel=medany @gol
 -mexplicit-relocs  -mno-explicit-relocs @gol
 -mrelax  -mno-relax @gol
--mriscv-attribute  -mmo-riscv-attribute @gol
+-mriscv-attribute  -mno-riscv-attribute @gol
 -malign-data=@var{type} @gol
 -mbig-endian  -mlittle-endian @gol
 -mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol
@@ -28377,8 +28377,8 @@ Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses. The default is to take advantage of
 linker relaxations.
 
-@item -memit-attribute
-@itemx -mno-emit-attribute
+@item -mriscv-attribute
+@itemx -mno-riscv-attribute
 Emit (do not emit) RISC-V attribute to record extra information into ELF
 objects.  This feature requires at least binutils 2.32.
 
-- 
cgit v1.1


From fa16bb8ac0aba681bc9242f9a9717824c4867f91 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@embecosm.com>
Date: Mon, 18 Jul 2022 16:47:20 +0100
Subject: RISC-V/doc: Correct the formatting of `-mstack-protector-guard-reg='

Add missing second space around the `-mstack-protector-guard-reg='
invocation option.

	gcc/
	* doc/invoke.texi (Option Summary): Add missing second space
	around `-mstack-protector-guard-reg='.
---
 gcc/doc/invoke.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'gcc')

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9f2f97c..1f6c73ec 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1222,7 +1222,7 @@ See RS/6000 and PowerPC Options.
 -mriscv-attribute  -mno-riscv-attribute @gol
 -malign-data=@var{type} @gol
 -mbig-endian  -mlittle-endian @gol
--mstack-protector-guard=@var{guard} -mstack-protector-guard-reg=@var{reg} @gol
+-mstack-protector-guard=@var{guard}  -mstack-protector-guard-reg=@var{reg} @gol
 -mstack-protector-guard-offset=@var{offset}}
 
 @emph{RL78 Options}
-- 
cgit v1.1


From e9ee752bbe2cc5632b803b01dc7c98ff214aede9 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@embecosm.com>
Date: Mon, 18 Jul 2022 16:47:21 +0100
Subject: RISC-V/doc: Add index references for `mrelax' and `mriscv-attribute'

Add missing index references for the `-mrelax' and `-mriscv-attribute'
invocation options.

	gcc/
	* doc/invoke.texi (RISC-V Options): Add index references for
	`mrelax' and `mriscv-attribute'.
---
 gcc/doc/invoke.texi | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'gcc')

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1f6c73ec..94689be 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -28373,12 +28373,14 @@ limit optimization.
 
 @item -mrelax
 @itemx -mno-relax
+@opindex mrelax
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses. The default is to take advantage of
 linker relaxations.
 
 @item -mriscv-attribute
 @itemx -mno-riscv-attribute
+@opindex mriscv-attribute
 Emit (do not emit) RISC-V attribute to record extra information into ELF
 objects.  This feature requires at least binutils 2.32.
 
-- 
cgit v1.1


From 5e47c9333df6df1aa9da861f07e68f985d7d28fb Mon Sep 17 00:00:00 2001
From: Andrew MacLeod <amacleod@redhat.com>
Date: Thu, 14 Jul 2022 12:35:55 -0400
Subject: Check if transitives need to be registered.

Whenever a relation is added, register_transitive is always called.
If neither operand was in a relation before, or this is not a new
relation, then there is no need to register transitives.

	PR tree-optimization/106280
	* value-relation.cc (dom_oracle::register_relation): Register
	transitives only when it is possible for there to be one.
	(dom_oracle::set_one_relation): Return NULL if this is an
	existing relation.
---
 gcc/value-relation.cc | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'gcc')

diff --git a/gcc/value-relation.cc b/gcc/value-relation.cc
index 13ce441..bd34425 100644
--- a/gcc/value-relation.cc
+++ b/gcc/value-relation.cc
@@ -967,8 +967,12 @@ dom_oracle::register_relation (basic_block bb, relation_kind k, tree op1,
     equiv_oracle::register_relation (bb, k, op1, op2);
   else
     {
+      // if neither op1 nor op2 are in a relation before this is registered,
+      // there will be no transitive.
+      bool check = bitmap_bit_p (m_relation_set, SSA_NAME_VERSION (op1))
+		   || bitmap_bit_p (m_relation_set, SSA_NAME_VERSION (op2));
       relation_chain *ptr = set_one_relation (bb, k, op1, op2);
-      if (ptr)
+      if (ptr && check)
 	register_transitives (bb, *ptr);
     }
 }
@@ -1010,13 +1014,16 @@ dom_oracle::set_one_relation (basic_block bb, relation_kind k, tree op1,
       // Check into whether we can simply replace the relation rather than
       // intersecting it.  THis may help with some optimistic iterative
       // updating algorithms.
-      ptr->intersect (vr);
+      bool new_rel = ptr->intersect (vr);
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, " to produce ");
 	  ptr->dump (dump_file);
-	  fprintf (dump_file, "\n");
+	  fprintf (dump_file, " %s.\n", new_rel ? "Updated" : "No Change");
 	}
+      // If there was no change, return no record..
+      if (!new_rel)
+	return NULL;
     }
   else
     {
-- 
cgit v1.1


From 79fb1124c8c31da3ca70ee3a07bf15f3d2d87ab7 Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Tue, 19 Jul 2022 00:16:32 +0000
Subject: Daily bump.

---
 gcc/ChangeLog           | 112 ++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/DATESTAMP           |   2 +-
 gcc/testsuite/ChangeLog |  21 +++++++++
 3 files changed, 134 insertions(+), 1 deletion(-)

(limited to 'gcc')

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 994875f..43b70ba 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,115 @@
+2022-07-18  Andrew MacLeod  <amacleod@redhat.com>
+
+	PR tree-optimization/106280
+	* value-relation.cc (dom_oracle::register_relation): Register
+	transitives only when it is possible for there to be one.
+	(dom_oracle::set_one_relation): Return NULL if this is an
+	existing relation.
+
+2022-07-18  Maciej W. Rozycki  <macro@embecosm.com>
+
+	* doc/invoke.texi (RISC-V Options): Add index references for
+	`mrelax' and `mriscv-attribute'.
+
+2022-07-18  Maciej W. Rozycki  <macro@embecosm.com>
+
+	* doc/invoke.texi (Option Summary): Add missing second space
+	around `-mstack-protector-guard-reg='.
+
+2022-07-18  Maciej W. Rozycki  <macro@embecosm.com>
+
+	* doc/invoke.texi (Option Summary): Fix `-mno-riscv-attribute'.
+	(RISC-V Options): Likewise, and `-mriscv-attribute'.
+
+2022-07-18  Claudiu Zissulescu  <claziss@gmail.com>
+
+	* config/arc/arc-arch.h (arc_tune_attr): Add
+	ARC_TUNE_ARCHS4X_REL31A variant.
+	* config/arc/arc.cc (arc_override_options): Tune options for
+	release 310a.
+	(arc_sched_issue_rate): Use correct enum.
+	(arc600_corereg_hazard): Textual change.
+	(arc_hazard): Add release 310a tunning.
+	* config/arc/arc.md (tune): Update and take into consideration new
+	tune option.
+	(tune_dspmpy): Likewise.
+	(tune_store): New attribute.
+	* config/arc/arc.opt (mtune): New tune option.
+	* config/arc/arcHS4x.md (hs4x_brcc0, hs4x_brcc1): New cpu units.
+	(hs4x_brcc_op): New instruction rezervation.
+	(hs4x_data_store_1_op): Likewise.
+	* config/arc/arc-cpus.def (hs4x_rel31): New cpu variant.
+	* config/arc/arc-tables.opt: Regenerate.
+	* config/arc/t-multilib: Likewise.
+	* doc/invoke.texi (ARC): Update mcpu and tune sections.
+
+2022-07-18  Richard Biener  <rguenther@suse.de>
+
+	* tree-loop-distribution.cc (loop_distribution::distribute_loop):
+	When computing cost-based merging do not disregard builtin
+	classified partitions in some cases.
+
+2022-07-18  Richard Sandiford  <richard.sandiford@arm.com>
+
+	PR target/106253
+	* config/arm/arm-builtins.cc (arm_builtin_vectorized_function):
+	Delete.
+	* config/arm/arm-protos.h (arm_builtin_vectorized_function): Delete.
+	* config/arm/arm.cc (TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION):
+	Delete.
+	* config/arm/arm_neon_builtins.def (copysignf): Delete.
+	* config/arm/iterators.md (nvrint_pattern): New attribute.
+	* config/arm/neon.md (<NEON_VRINT:nvrint_pattern><VCVTF:mode>2):
+	New pattern.
+	(l<NEON_VCVT:nvrint_pattern><su_optab><VCVTF:mode><v_cmp_result>2):
+	Likewise.
+	(neon_copysignf<mode>): Rename to...
+	(copysign<mode>3): ...this.
+
+2022-07-18  Claudiu Zissulescu  <claziss@gmail.com>
+
+	* config/arc/arc.cc (arc_expand_epilogue): Adjust the frame
+	pointer first when in interrupts.
+
+2022-07-18  Richard Biener  <rguenther@suse.de>
+
+	* tree-loop-distribution.cc (copy_loop_before): Add
+	the ability to replace the original LC PHI defs.
+	(generate_loops_for_partition): Pass through a flag
+	whether to redirect original LC PHI defs.
+	(generate_code_for_partition): Likewise.
+	(loop_distribution::distribute_loop): Compute the partition
+	that should provide the LC PHI defs for common reductions
+	and pass that down.
+
+2022-07-18  Richard Ball  <richard.ball@arm.com>
+
+	* config/aarch64/aarch64.cc (aarch64_evpc_trn): Use std:swap.
+	(aarch64_evpc_uzp): Likewise.
+	(aarch64_evpc_zip): Likewise.
+
+2022-07-18  Roger Sayle  <roger@nextmovesoftware.com>
+
+	PR target/106231
+	* config/i386/i386.md (*ctzsidi2_<s>ext): New insn_and_split
+	to recognize any_extend:DI of ctz:SI which is implicitly extended.
+	(*ctzsidi2_<s>ext_falsedep): New define_insn to model a DImode
+	extended ctz:SI that has preceding xor to break false dependency.
+
+2022-07-18  Roger Sayle  <roger@nextmovesoftware.com>
+
+	* config/i386/predicates.md (x86_64_const_vector_operand):
+	Check the operand's mode matches the specified mode argument.
+
+2022-07-18  Roger Sayle  <roger@nextmovesoftware.com>
+
+	* config/i386/sse.md (kunpckhi): Add UNSPEC_MASKOP unspec.
+	(kunpcksi): Likewise, add UNSPEC_MASKOP unspec.
+	(kunpckdi): Likewise, add UNSPEC_MASKOP unspec.
+	(vec_pack_trunc_qi): Update to specify the now required
+	UNSPEC_MASKOP unspec.
+	(vec_pack_trunc_<mode>): Likewise.
+
 2022-07-16  Takayuki 'January June' Suwa  <jjsuwa_sys3175@yahoo.co.jp>
 
 	* config/xtensa/xtensa.md
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 2ac5479..a394c7a 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20220718
+20220719
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index cc8ea71..36913da 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,24 @@
+2022-07-18  Richard Biener  <rguenther@suse.de>
+
+	* gcc.dg/tree-ssa/ldist-24.c: XFAIL.
+	* gcc.dg/tree-ssa/ldist-36.c: Adjust expected outcome.
+
+2022-07-18  Richard Sandiford  <richard.sandiford@arm.com>
+
+	PR target/106253
+	* gcc.target/arm/vect_unary_1.c: New test.
+	* gcc.target/arm/vect_binary_1.c: Likewise.
+
+2022-07-18  Claudiu Zissulescu  <claziss@gmail.com>
+
+	* gcc.target/arc/interrupt-13.c: New file.
+
+2022-07-18  Roger Sayle  <roger@nextmovesoftware.com>
+
+	PR target/106231
+	* gcc.target/i386/pr106231-1.c: New test case.
+	* gcc.target/i386/pr106231-2.c: New test case.
+
 2022-07-15  H.J. Lu  <hjl.tools@gmail.com>
 
 	PR target/85620
-- 
cgit v1.1


From 2180cdd8a0e65c2790a7732c82de87f83478487b Mon Sep 17 00:00:00 2001
From: Takayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp>
Date: Mon, 18 Jul 2022 21:43:45 +0900
Subject: xtensa: Correct the relative RTX cost that corresponds to the Move
 Immediate "MOVI" instruction

This patch corrects the overestimation of the relative cost of
'(set (reg) (const_int N))' where N fits into the instruction itself.

In fact, such overestimation confuses the RTL loop invariant motion pass.
As a result, it brings almost no negative impact from the speed point of
view, but addtiional reg-reg move instructions and register allocation
pressure about the size.

    /* example, optimized for size */
    extern int foo(void);
    extern int array[16];
    void test_0(void) {
      unsigned int i;
      for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
        array[i] = 1024;
    }
    void test_1(void) {
      unsigned int i;
      for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
        array[i] = array[i] ? 1024 : 0;
    }
    void test_2(void) {
      unsigned int i;
      for (i = 0; i < sizeof(array)/sizeof(*array); ++i)
        array[i] = foo() ? 0 : 1024;
    }

    ;; before
	.literal_position
	.literal .LC0, array
    test_0:
	l32r	a3, .LC0
	movi.n	a2, 0
	movi	a4, 0x400	// OK
    .L2:
	s32i.n	a4, a3, 0
	addi.n	a2, a2, 1
	addi.n	a3, a3, 4
	bnei	a2, 16, .L2
	ret.n
	.literal_position
	.literal .LC1, array
    test_1:
	l32r	a2, .LC1
	movi.n	a3, 0
	movi	a5, 0x400	// NG
    .L6:
	l32i.n	a4, a2, 0
	beqz.n	a4, .L5
	mov.n	a4, a5		// should be "movi a4, 0x400"
    .L5:
	s32i.n	a4, a2, 0
	addi.n	a3, a3, 1
	addi.n	a2, a2, 4
	bnei	a3, 16, .L6
	ret.n
	.literal_position
	.literal .LC2, array
    test_2:
	addi	sp, sp, -32
	s32i.n	a12, sp, 24
	l32r	a12, .LC2
	s32i.n	a13, sp, 20
	s32i.n	a14, sp, 16
	s32i.n	a15, sp, 12
	s32i.n	a0, sp, 28
	addi	a13, a12, 64
	movi.n	a15, 0		// NG
	movi	a14, 0x400	// and wastes callee-saved registers (only 4)
    .L11:
	call0	foo
	mov.n	a3, a14		// should be "movi a3, 0x400"
	movnez	a3, a15, a2
	s32i.n	a3, a12, 0
	addi.n	a12, a12, 4
	bne	a12, a13, .L11
	l32i.n	a0, sp, 28
	l32i.n	a12, sp, 24
	l32i.n	a13, sp, 20
	l32i.n	a14, sp, 16
	l32i.n	a15, sp, 12
	addi	sp, sp, 32
	ret.n

    ;; after
	.literal_position
	.literal .LC0, array
    test_0:
	l32r	a3, .LC0
	movi.n	a2, 0
	movi	a4, 0x400	// OK
    .L2:
	s32i.n	a4, a3, 0
	addi.n	a2, a2, 1
	addi.n	a3, a3, 4
	bnei	a2, 16, .L2
	ret.n
	.literal_position
	.literal .LC1, array
    test_1:
	l32r	a2, .LC1
	movi.n	a3, 0
    .L6:
	l32i.n	a4, a2, 0
	beqz.n	a4, .L5
	movi	a4, 0x400	// OK
    .L5:
	s32i.n	a4, a2, 0
	addi.n	a3, a3, 1
	addi.n	a2, a2, 4
	bnei	a3, 16, .L6
	ret.n
	.literal_position
	.literal .LC2, array
    test_2:
	addi	sp, sp, -16
	s32i.n	a12, sp, 8
	l32r	a12, .LC2
	s32i.n	a13, sp, 4
	s32i.n	a0, sp, 12
	addi	a13, a12, 64
    .L11:
	call0	foo
	movi.n	a3, 0		// OK
	movi	a4, 0x400	// and less register allocation pressure
	moveqz	a3, a4, a2
	s32i.n	a3, a12, 0
	addi.n	a12, a12, 4
	bne	a12, a13, .L11
	l32i.n	a0, sp, 12
	l32i.n	a12, sp, 8
	l32i.n	a13, sp, 4
	addi	sp, sp, 16
	ret.n

gcc/ChangeLog:

	* config/xtensa/xtensa.cc (xtensa_rtx_costs):
	Change the relative cost of '(set (reg) (const_int N))' where
	N fits into signed 12-bit from 4 to 0 if optimizing for size.
	And use the appropriate macro instead of the bare number 4.
---
 gcc/config/xtensa/xtensa.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'gcc')

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 9433745..a851a7a 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -4073,7 +4073,7 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	case SET:
 	  if (xtensa_simm12b (INTVAL (x)))
 	    {
-	      *total = 4;
+	      *total = speed ? COSTS_N_INSNS (1) : 0;
 	      return true;
 	    }
 	  break;
-- 
cgit v1.1


From 40f6e5912288256ee8ac41474f2dce7b6881c111 Mon Sep 17 00:00:00 2001
From: Roger Sayle <roger@nextmovesoftware.com>
Date: Tue, 19 Jul 2022 08:39:43 +0100
Subject: PR c/106264: Silence warnings from __builtin_modf et al.

This middle-end patch resolves PR c/106264 which is a spurious warning
regression caused by the tree-level expansion of modf, frexp and remquo
producing "expression has no-effect" when the built-in function's result
is ignored.  When these built-ins were first expanded at tree-level,
fold_builtin_n would blindly set TREE_NO_WARNING for all built-ins. Now
that we're more discerning, we should precisely call suppress_warning
selectively on those COMPOUND_EXPRs that need them.

2022-07-19  Roger Sayle  <roger@nextmovesoftware.com>
	    Richard Biener  <rguenther@suse.de>

gcc/ChangeLog
	PR c/106264
	* builtins.cc (fold_builtin_frexp): Call suppress_warning on
	COMPOUND_EXPR to silence spurious warning if result isn't used.
	(fold_builtin_modf): Likewise.
	(do_mpfr_remquo): Likewise.

gcc/testsuite/ChangeLog
	PR c/106264
	* gcc.dg/pr106264.c: New test case.
---
 gcc/builtins.cc                 | 19 +++++++++++++------
 gcc/testsuite/gcc.dg/pr106264.c | 27 +++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr106264.c

(limited to 'gcc')

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 35b9197..91b9c9f 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -8625,7 +8625,7 @@ fold_builtin_frexp (location_t loc, tree arg0, tree arg1, tree rettype)
   if (TYPE_MAIN_VARIANT (TREE_TYPE (arg1)) == integer_type_node)
     {
       const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (arg0);
-      tree frac, exp;
+      tree frac, exp, res;
 
       switch (value->cl)
       {
@@ -8656,7 +8656,9 @@ fold_builtin_frexp (location_t loc, tree arg0, tree arg1, tree rettype)
       /* Create the COMPOUND_EXPR (*arg1 = trunc, frac). */
       arg1 = fold_build2_loc (loc, MODIFY_EXPR, rettype, arg1, exp);
       TREE_SIDE_EFFECTS (arg1) = 1;
-      return fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1, frac);
+      res = fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1, frac);
+      suppress_warning (res, OPT_Wunused_value);
+      return res;
     }
 
   return NULL_TREE;
@@ -8682,6 +8684,7 @@ fold_builtin_modf (location_t loc, tree arg0, tree arg1, tree rettype)
     {
       const REAL_VALUE_TYPE *const value = TREE_REAL_CST_PTR (arg0);
       REAL_VALUE_TYPE trunc, frac;
+      tree res;
 
       switch (value->cl)
       {
@@ -8711,8 +8714,10 @@ fold_builtin_modf (location_t loc, tree arg0, tree arg1, tree rettype)
       arg1 = fold_build2_loc (loc, MODIFY_EXPR, rettype, arg1,
 			  build_real (rettype, trunc));
       TREE_SIDE_EFFECTS (arg1) = 1;
-      return fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1,
-			  build_real (rettype, frac));
+      res = fold_build2_loc (loc, COMPOUND_EXPR, rettype, arg1,
+			     build_real (rettype, frac));
+      suppress_warning (res, OPT_Wunused_value);
+      return res;
     }
 
   return NULL_TREE;
@@ -10673,8 +10678,10 @@ do_mpfr_remquo (tree arg0, tree arg1, tree arg_quo)
 						  integer_quo));
 		  TREE_SIDE_EFFECTS (result_quo) = 1;
 		  /* Combine the quo assignment with the rem.  */
-		  result = non_lvalue (fold_build2 (COMPOUND_EXPR, type,
-						    result_quo, result_rem));
+		  result = fold_build2 (COMPOUND_EXPR, type,
+					result_quo, result_rem);
+		  suppress_warning (result, OPT_Wunused_value);
+		  result = non_lvalue (result);
 		}
 	    }
 	}
diff --git a/gcc/testsuite/gcc.dg/pr106264.c b/gcc/testsuite/gcc.dg/pr106264.c
new file mode 100644
index 0000000..6b4af49
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr106264.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wall" } */
+double frexp (double, int*);
+double modf (double, double*);
+double remquo (double, double, int*);
+
+int f (void)
+{
+  int y;
+  frexp (1.0, &y);
+  return y;
+}
+
+double g (void)
+{
+  double y;
+  modf (1.0, &y);
+  return y;
+}
+
+int h (void)
+{
+  int y;
+  remquo (1.0, 1.0, &y);
+  return y;
+}
+
-- 
cgit v1.1


From 0f129766fdb687394f0eea04f69268b5cc034cda Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Tue, 19 Jul 2022 10:02:40 +0200
Subject: lto/106334 - relax assert during WPA tree merging

The dwarf2out map of tree to symbol + offset is populated too early
when streaming in trees so that when WPA tree merging decides to
recycle them the mapping prevails and if we are unlucky the same
address is used for another tree with a symbol + offset DIE to
record.  The following mitigates the resulting ICE by relaxing the
assert, allowing re-use of a slot during WPA.  Delaying the register
would be better but it's already somewhat hairy and uglifying this
further doesn't look too important right now.

	PR lto/106334
	* dwarf2out.cc (dwarf2out_register_external_die): Allow
	map entry re-use during WPA.
---
 gcc/dwarf2out.cc | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'gcc')

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index e3920c8..3ac39c1 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -6069,7 +6069,11 @@ dwarf2out_register_external_die (tree decl, const char *sym,
 
   if (!external_die_map)
     external_die_map = hash_map<tree, sym_off_pair>::create_ggc (1000);
-  gcc_checking_assert (!external_die_map->get (decl));
+  /* When we do tree merging during WPA we can end up re-using GC memory
+     as there's currently no way to unregister external DIEs.  Ideally
+     we'd register them only after merging finished but allowing override
+     here is easiest.  See PR106334.  */
+  gcc_checking_assert (flag_wpa || !external_die_map->get (decl));
   sym_off_pair p = { IDENTIFIER_POINTER (get_identifier (sym)), off };
   external_die_map->put (decl, p);
 }
-- 
cgit v1.1


From e4ff11a8f2e80adb8ada69bf35ee6a1ab18a9c85 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Tue, 19 Jul 2022 09:57:22 +0200
Subject: middle-end/106331 - fix mem attributes for string op arguments

get_memory_rtx tries hard to come up with a MEM_EXPR to record
in the memory attributes but in the last fallback fails to properly
account for an unknown offset and thus, as visible in this testcase,
incorrect alignment computed from set_mem_attributes.  The following
rectifies both parts.

	PR middle-end/106331
	* builtins.cc (get_memory_rtx): Compute alignment from
	the original address and set MEM_OFFSET to unknown when
	we create a MEM_EXPR from the base object of the address.

	* gfortran.dg/pr106331.f90: New testcase.
---
 gcc/builtins.cc                        | 13 +++++++++----
 gcc/testsuite/gfortran.dg/pr106331.f90 |  7 +++++++
 2 files changed, 16 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr106331.f90

(limited to 'gcc')

diff --git a/gcc/builtins.cc b/gcc/builtins.cc
index 91b9c9f..0d13197 100644
--- a/gcc/builtins.cc
+++ b/gcc/builtins.cc
@@ -1360,7 +1360,7 @@ expand_builtin_prefetch (tree exp)
 rtx
 get_memory_rtx (tree exp, tree len)
 {
-  tree orig_exp = exp;
+  tree orig_exp = exp, base;
   rtx addr, mem;
 
   /* When EXP is not resolved SAVE_EXPR, MEM_ATTRS can be still derived
@@ -1391,10 +1391,11 @@ get_memory_rtx (tree exp, tree len)
   if (is_gimple_mem_ref_addr (TREE_OPERAND (exp, 0)))
     set_mem_attributes (mem, exp, 0);
   else if (TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR
-	   && (exp = get_base_address (TREE_OPERAND (TREE_OPERAND (exp, 0),
-						     0))))
+	   && (base = get_base_address (TREE_OPERAND (TREE_OPERAND (exp, 0),
+						      0))))
     {
-      exp = build_fold_addr_expr (exp);
+      unsigned int align = get_pointer_alignment (TREE_OPERAND (exp, 0));
+      exp = build_fold_addr_expr (base);
       exp = fold_build2 (MEM_REF,
 			 build_array_type (char_type_node,
 					   build_range_type (sizetype,
@@ -1402,6 +1403,10 @@ get_memory_rtx (tree exp, tree len)
 							     NULL)),
 			 exp, build_int_cst (ptr_type_node, 0));
       set_mem_attributes (mem, exp, 0);
+      /* Since we stripped parts make sure the offset is unknown and the
+	 alignment is computed from the original address.  */
+      clear_mem_offset (mem);
+      set_mem_align (mem, align);
     }
   set_mem_alias_set (mem, 0);
   return mem;
diff --git a/gcc/testsuite/gfortran.dg/pr106331.f90 b/gcc/testsuite/gfortran.dg/pr106331.f90
new file mode 100644
index 0000000..3873863
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr106331.f90
@@ -0,0 +1,7 @@
+! { dg-do run }
+! { dg-options "-Og" }
+
+PROGRAM main
+  CHARACTER(LEN=24) :: a(2)
+  a = ''
+END PROGRAM
-- 
cgit v1.1


From 4c3231302577445417715a7c22e879e4159376d3 Mon Sep 17 00:00:00 2001
From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
Date: Tue, 19 Jul 2022 17:43:26 +0530
Subject: forwprop: Use lhs type instead of arg0 in folding VEC_PERM_EXPR.

gcc/ChangeLog:

	* tree-ssa-forwprop.cc (simplify_permutation): Use lhs type
	instead of TREE_TYPE (arg0) as result type in folding VEC_PERM_EXPR.
---
 gcc/tree-ssa-forwprop.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'gcc')

diff --git a/gcc/tree-ssa-forwprop.cc b/gcc/tree-ssa-forwprop.cc
index fdc4bc8..d04cf4b 100644
--- a/gcc/tree-ssa-forwprop.cc
+++ b/gcc/tree-ssa-forwprop.cc
@@ -2661,7 +2661,7 @@ simplify_permutation (gimple_stmt_iterator *gsi)
 
       /* Shuffle of a constructor.  */
       bool ret = false;
-      tree res_type = TREE_TYPE (arg0);
+      tree res_type = TREE_TYPE (gimple_assign_lhs (stmt));
       tree opt = fold_ternary (VEC_PERM_EXPR, res_type, arg0, arg1, op2);
       if (!opt
 	  || (TREE_CODE (opt) != CONSTRUCTOR && TREE_CODE (opt) != VECTOR_CST))
-- 
cgit v1.1


From edf0c132b19f73e5739715c2ac90c4ae1e96dc31 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Tue, 19 Jul 2022 15:40:58 +0200
Subject: Remote trailing : for subheading.

gcc/ChangeLog:

	* doc/extend.texi: Remove trailing :.
---
 gcc/doc/extend.texi | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'gcc')

diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index dfbe33a..4222e76 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -11385,7 +11385,7 @@ keyword after the declarator.
 It is up to you to make sure that the assembler names you choose do not
 conflict with any other assembler symbols, or reference registers.
 
-@subsubheading Assembler names for data:
+@subsubheading Assembler names for data
 
 This sample shows how to specify the assembler name for data:
 
@@ -11407,7 +11407,7 @@ since such variables do not have assembler names.  If you are
 trying to put the variable in a particular register, see 
 @ref{Explicit Register Variables}.
 
-@subsubheading Assembler names for functions:
+@subsubheading Assembler names for functions
 
 To specify the assembler name for functions, write a declaration for the 
 function before its definition and put @code{asm} there, like this:
-- 
cgit v1.1