From 77e6870d7fc85a4139f7f27ee3287b103d374ab2 Mon Sep 17 00:00:00 2001
From: Bernd Edlinger <edlinger@gcc.gnu.org>
Date: Thu, 1 Dec 2016 06:06:04 +0000
Subject: crossconfig.m4 (*-linux*): Add link-check for memalign.

2016-12-01  Bernd Edlinger  <bernd.edlinger@hotmail.de>

        * crossconfig.m4 (*-linux*): Add link-check for memalign.
        * configure: Regenerated.

From-SVN: r243095
---
 libstdc++-v3/ChangeLog      |  7 ++++++-
 libstdc++-v3/configure      | 13 +++++++++++++
 libstdc++-v3/crossconfig.m4 |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index cd4f5ae..b5f94c0 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,4 +1,9 @@
-2016-11-31  Tim Shen  <timshen@google.com>
+2016-12-01  Bernd Edlinger  <bernd.edlinger@hotmail.de>
+
+	* crossconfig.m4 (*-linux*): Add link-check for memalign.
+	* configure: Regenerated.
+
+2016-12-01  Tim Shen  <timshen@google.com>
 
 	PR libstdc++/71500
 	* include/bits/regex.h (basic_regex::basic_regex): Use ECMAScript
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index f32197e..1f72e3f 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -59826,6 +59826,19 @@ _ACEOF
 fi
 done
 
+    for ac_func in aligned_alloc posix_memalign memalign _aligned_malloc
+do :
+  as_ac_var=`$as_echo "ac_cv_func_$ac_func" | $as_tr_sh`
+ac_fn_c_check_func "$LINENO" "$ac_func" "$as_ac_var"
+eval as_val=\$$as_ac_var
+   if test "x$as_val" = x""yes; then :
+  cat >>confdefs.h <<_ACEOF
+#define `$as_echo "HAVE_$ac_func" | $as_tr_cpp` 1
+_ACEOF
+
+fi
+done
+
 
 
diff --git a/libstdc++-v3/crossconfig.m4 b/libstdc++-v3/crossconfig.m4
index 6abc84f..4eaf208 100644
--- a/libstdc++-v3/crossconfig.m4
+++ b/libstdc++-v3/crossconfig.m4
@@ -157,6 +157,7 @@ case "${host}" in
     AC_DEFINE(_GLIBCXX_USE_RANDOM_TR1)
     GCC_CHECK_TLS
     AC_CHECK_FUNCS(__cxa_thread_atexit_impl)
+    AC_CHECK_FUNCS(aligned_alloc posix_memalign memalign _aligned_malloc)
     AM_ICONV
     ;;
   *-mingw32*)
-- 
cgit v1.1


From f73ee211c30c292316ffa6f55eca3531c67a4de7 Mon Sep 17 00:00:00 2001
From: Alan Modra <amodra@gmail.com>
Date: Thu, 1 Dec 2016 17:44:04 +1030
Subject: [RS6000] fix rtl-checking internal compiler error

	* gcc/config/rs6000/rs6000.c (insn_is_swappable_p): Properly
	look inside UNSPEC_VSX_XXSPLTW vec.

From-SVN: r243097
---
 gcc/ChangeLog              | 5 +++++
 gcc/config/rs6000/rs6000.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e2f7b25..e36f9d8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Alan Modra  <amodra@gmail.com>
+
+	* gcc/config/rs6000/rs6000.c (insn_is_swappable_p): Properly
+	look inside UNSPEC_VSX_XXSPLTW vec.
+
 2016-12-01  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	PR rtl-optimization/78607
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 9fe98b7..7f307b1 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -40675,7 +40675,7 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
 	    if (GET_CODE (use_body) != SET
 		|| GET_CODE (SET_SRC (use_body)) != UNSPEC
 		|| XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
-		|| XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
+		|| XVECEXP (SET_SRC (use_body), 0, 1) != const0_rtx)
 	      return 0;
 	  }
 	}
-- 
cgit v1.1


From d9b2d86c74a2153b07fb9d63baf220d385043a9d Mon Sep 17 00:00:00 2001
From: Ville Voutilainen <ville.voutilainen@gmail.com>
Date: Thu, 1 Dec 2016 09:14:19 +0200
Subject: The convertible_to traits need to use a variadic catch-all for the
 false-cases.

The convertible_to traits need to use a variadic catch-all for the
false-cases.
* include/std/istream (__is_convertible_to_basic_istream):
Change the parameter of the false-case of __check to a variadic.
* include/std/ostream (__is_convertible_to_basic_ostream):
Likewise.

From-SVN: r243098
---
 libstdc++-v3/ChangeLog           | 9 +++++++++
 libstdc++-v3/include/std/istream | 2 +-
 libstdc++-v3/include/std/ostream | 2 +-
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index b5f94c0..cc0b3ae 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,12 @@
+2016-12-01  Ville Voutilainen  <ville.voutilainen@gmail.com>
+
+	The convertible_to traits need to use a variadic catch-all for the
+	false-cases.
+	* include/std/istream (__is_convertible_to_basic_istream):
+	Change the parameter of the false-case of __check to a variadic.
+	* include/std/ostream (__is_convertible_to_basic_ostream):
+	Likewise.
+
 2016-12-01  Bernd Edlinger  <bernd.edlinger@hotmail.de>
 
 	* crossconfig.m4 (*-linux*): Add link-check for memalign.
diff --git a/libstdc++-v3/include/std/istream b/libstdc++-v3/include/std/istream
index 319e226..1d77d30 100644
--- a/libstdc++-v3/include/std/istream
+++ b/libstdc++-v3/include/std/istream
@@ -915,7 +915,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       template<typename _Ch, typename _Up>
       static basic_istream<_Ch, _Up>& __check(basic_istream<_Ch, _Up>*);
 
-      static void __check(void*);
+      static void __check(...);
     public:
       using istream_type =
 	decltype(__check(declval<typename remove_reference<_Tp>::type*>()));
diff --git a/libstdc++-v3/include/std/ostream b/libstdc++-v3/include/std/ostream
index 70fd10b..9dea778 100644
--- a/libstdc++-v3/include/std/ostream
+++ b/libstdc++-v3/include/std/ostream
@@ -619,7 +619,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     template<typename _Ch, typename _Up>
     static basic_ostream<_Ch, _Up>& __check(basic_ostream<_Ch, _Up>*);
 
-    static void __check(void*);
+    static void __check(...);
   public:
     using ostream_type =
       decltype(__check(declval<typename remove_reference<_Tp>::type*>()));
-- 
cgit v1.1


From 43d0b501eec49d6d4092fe0e5299aedf1d743124 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 1 Dec 2016 08:56:49 +0100
Subject: re PR target/78614 (ICE error: invalid rtl sharing found in the insn
 (verify_rtx_sharing) gcc/emit-rtl.c:2743)

	PR target/78614
	* config/rs6000/rs6000.c (rs6000_frame_related): Call
	set_used_flags (pat) before any simplifications.  Clear used flag on
	PARALLEL copy.  Don't guard add_reg_note call.  Call
	copy_rtx_if_shared on pat before storing it into
	REG_FRAME_RELATED_EXPR.

From-SVN: r243099
---
 gcc/ChangeLog              | 9 +++++++++
 gcc/config/rs6000/rs6000.c | 5 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e36f9d8..b3cc6305 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2016-12-01  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/78614
+	* config/rs6000/rs6000.c (rs6000_frame_related): Call
+	set_used_flags (pat) before any simplifications.  Clear used flag on
+	PARALLEL copy.  Don't guard add_reg_note call.  Call
+	copy_rtx_if_shared on pat before storing it into
+	REG_FRAME_RELATED_EXPR.
+
 2016-12-01  Alan Modra  <amodra@gmail.com>
 
 	* gcc/config/rs6000/rs6000.c (insn_is_swappable_p): Properly
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 7f307b1..e572620 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -27174,6 +27174,7 @@ rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
      Call simplify_replace_rtx on the SETs rather than the whole insn
      so as to leave the other stuff alone (for example USE of r12).  */
 
+  set_used_flags (pat);
   if (GET_CODE (pat) == SET)
     {
       if (repl)
@@ -27185,6 +27186,7 @@ rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
     {
       pat = shallow_copy_rtx (pat);
       XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
+      RTX_FLAG (pat, used) = 0;
 
       for (int i = 0; i < XVECLEN (pat, 0); i++)
 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
@@ -27207,8 +27209,7 @@ rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
     gcc_unreachable ();
 
   RTX_FRAME_RELATED_P (insn) = 1;
-  if (repl || reg2)
-    add_reg_note (insn, REG_FRAME_RELATED_EXPR, pat);
+  add_reg_note (insn, REG_FRAME_RELATED_EXPR, copy_rtx_if_shared (pat));
 
   return insn;
 }
-- 
cgit v1.1


From d26b3eb7658b48e6dadec752755f864652f19591 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 1 Dec 2016 10:24:55 +0100
Subject: re PR debug/78587 (dwarf2out.c:1517:45: runtime error: negation of
 -9223372036854775808 cannot be represented in type 'long int [4]'; cast to an
 unsigned type to negate this value to itself)

	PR debug/78587
	* dwarf2out.c (loc_descr_plus_const): For negative offset use
	uint_loc_descriptor instead of int_loc_descriptor and perform negation
	in unsigned HOST_WIDE_INT type.
	(scompare_loc_descriptor): Shift UINTVAL left instead of INTVAL.

	* gcc.dg/debug/pr78587.c: New test.

From-SVN: r243100
---
 gcc/ChangeLog                        |  6 ++++++
 gcc/dwarf2out.c                      |  7 ++++---
 gcc/testsuite/ChangeLog              |  5 +++++
 gcc/testsuite/gcc.dg/debug/pr78587.c | 23 +++++++++++++++++++++++
 4 files changed, 38 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/debug/pr78587.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b3cc6305..ef945b1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
 2016-12-01  Jakub Jelinek  <jakub@redhat.com>
 
+	PR debug/78587
+	* dwarf2out.c (loc_descr_plus_const): For negative offset use
+	uint_loc_descriptor instead of int_loc_descriptor and perform negation
+	in unsigned HOST_WIDE_INT type.
+	(scompare_loc_descriptor): Shift UINTVAL left instead of INTVAL.
+
 	PR target/78614
 	* config/rs6000/rs6000.c (rs6000_frame_related): Call
 	set_used_flags (pat) before any simplifications.  Clear used flag on
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 66a4919..bc328ab 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1514,7 +1514,8 @@ loc_descr_plus_const (dw_loc_descr_ref *list_head, HOST_WIDE_INT offset)
 
   else
     {
-      loc->dw_loc_next = int_loc_descriptor (-offset);
+      loc->dw_loc_next
+	= uint_loc_descriptor (-(unsigned HOST_WIDE_INT) offset);
       add_loc_descr (&loc->dw_loc_next, new_loc_descr (DW_OP_minus, 0, 0));
     }
 }
@@ -13837,7 +13838,7 @@ scompare_loc_descriptor (enum dwarf_location_atom op, rtx rtl,
       if (CONST_INT_P (XEXP (rtl, 1))
 	  && GET_MODE_BITSIZE (op_mode) < HOST_BITS_PER_WIDE_INT
 	  && (size_of_int_loc_descriptor (shift) + 1
-	      + size_of_int_loc_descriptor (INTVAL (XEXP (rtl, 1)) << shift)
+	      + size_of_int_loc_descriptor (UINTVAL (XEXP (rtl, 1)) << shift)
 	      >= size_of_int_loc_descriptor (GET_MODE_MASK (op_mode)) + 1
 		 + size_of_int_loc_descriptor (INTVAL (XEXP (rtl, 1))
 					       & GET_MODE_MASK (op_mode))))
@@ -13852,7 +13853,7 @@ scompare_loc_descriptor (enum dwarf_location_atom op, rtx rtl,
   add_loc_descr (&op0, int_loc_descriptor (shift));
   add_loc_descr (&op0, new_loc_descr (DW_OP_shl, 0, 0));
   if (CONST_INT_P (XEXP (rtl, 1)))
-    op1 = int_loc_descriptor (INTVAL (XEXP (rtl, 1)) << shift);
+    op1 = int_loc_descriptor (UINTVAL (XEXP (rtl, 1)) << shift);
   else
     {
       add_loc_descr (&op1, int_loc_descriptor (shift));
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bf4bd8a..2d1c182 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Jakub Jelinek  <jakub@redhat.com>
+
+	PR debug/78587
+	* gcc.dg/debug/pr78587.c: New test.
+
 2016-12-01  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	PR rtl-optimization/78607
diff --git a/gcc/testsuite/gcc.dg/debug/pr78587.c b/gcc/testsuite/gcc.dg/debug/pr78587.c
new file mode 100644
index 0000000..b368a2a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/debug/pr78587.c
@@ -0,0 +1,23 @@
+/* PR debug/78587 */
+/* { dg-do compile } */
+/* { dg-additional-options "-w" } */
+
+extern void bar (void);
+
+void
+foo (long long x)
+{
+  x ^= 9223372036854775808ULL;
+  bar ();
+}
+
+struct S { int w[4]; } a[1], b;
+
+void
+baz ()
+{
+  int e = (int) baz;
+  if (e <= -80)
+    e = 0;
+  b = a[e];
+}
-- 
cgit v1.1


From ccbf6355186e78ac85245962be0d33c0c4c28ac6 Mon Sep 17 00:00:00 2001
From: Andre Vehreschild <vehre@gcc.gnu.org>
Date: Thu, 1 Dec 2016 10:53:25 +0100
Subject: coarray_lib_alloc_4.f90: Fix for 32-bits.

gcc/testsuite/ChangeLog:

2016-12-01  Andre Vehreschild  <vehre@gcc.gnu.org>

	* gfortran.dg/coarray_lib_alloc_4.f90: Fix for 32-bits.

From-SVN: r243101
---
 gcc/testsuite/ChangeLog                           | 4 ++++
 gcc/testsuite/gfortran.dg/coarray_lib_alloc_4.f90 | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2d1c182..98a14c6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  Andre Vehreschild  <vehre@gcc.gnu.org>
+
+	* gfortran.dg/coarray_lib_alloc_4.f90: Fix for 32-bits.
+
 2016-12-01  Jakub Jelinek  <jakub@redhat.com>
 
 	PR debug/78587
diff --git a/gcc/testsuite/gfortran.dg/coarray_lib_alloc_4.f90 b/gcc/testsuite/gfortran.dg/coarray_lib_alloc_4.f90
index aea9fbf..7b72707 100644
--- a/gcc/testsuite/gfortran.dg/coarray_lib_alloc_4.f90
+++ b/gcc/testsuite/gfortran.dg/coarray_lib_alloc_4.f90
@@ -38,7 +38,7 @@ program test_caf_alloc
 end
 
 ! { dg-final { scan-tree-dump-times "_gfortran_caf_is_present \\(xx\\.token, 2 - \\(integer\\(kind=4\\)\\) xx\\.dim\\\[0\\\]\\.lbound, &caf_ref\\.\[0-9\]+\\)|_gfortran_caf_is_present \\(xx\\.token, 2 - xx\\.dim\\\[0\\\]\\.lbound, &caf_ref\\.\[0-9\]+\\)" 10 "original" } }
-! { dg-final { scan-tree-dump-times "_gfortran_caf_register \\(72, 1, &xx\\.token, \\(void \\*\\) &xx, 0B, 0B, 0\\)" 1 "original" } }
+! { dg-final { scan-tree-dump-times "_gfortran_caf_register \\(\[0-9\]+, 1, &xx\\.token, \\(void \\*\\) &xx, 0B, 0B, 0\\)" 1 "original" } }
 ! { dg-final { scan-tree-dump-times "_gfortran_caf_register \\(\[0-9\]+, 7" 2 "original" } }
 ! { dg-final { scan-tree-dump-times "_gfortran_caf_register \\(\[0-9\]+, 8" 2 "original" } }
 ! { dg-final { scan-tree-dump-times "_gfortran_caf_deregister \\(&xx\\.token, 0, 0B, 0B, 0\\)" 1 "original" } }
-- 
cgit v1.1


From 1636ebdc8a21300aa5aa6b8cfd423d397394c7a5 Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <avr@gjlay.de>
Date: Thu, 1 Dec 2016 10:09:56 +0000
Subject: avr.c (avr_print_operand): Use SYMBOL_REF_P if possible.

gcc/
	* config/avr/avr.c (avr_print_operand): Use SYMBOL_REF_P if possible.
	(avr_handle_addr_attribute, avr_asm_output_aligned_decl_common)
	(avr_asm_asm_output_aligned_bss, avr_addr_space_convert): Dito.

From-SVN: r243104
---
 gcc/ChangeLog        |  6 ++++++
 gcc/config/avr/avr.c | 10 +++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ef945b1..2683757 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-01  Georg-Johann Lay  <avr@gjlay.de>
+
+	* config/avr/avr.c (avr_print_operand): Use SYMBOL_REF_P if possible.
+	(avr_handle_addr_attribute, avr_asm_output_aligned_decl_common)
+	(avr_asm_asm_output_aligned_bss, avr_addr_space_convert): Dito.
+
 2016-12-01  Jakub Jelinek  <jakub@redhat.com>
 
 	PR debug/78587
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index d0c54c2..db3c55f 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -2726,7 +2726,7 @@ avr_print_operand (FILE *file, rtx x, int code)
     }
   else if (code == 'i')
     {
-      if (GET_CODE (x) == SYMBOL_REF && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_IO))
+      if (SYMBOL_REF_P (x) && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_IO))
 	avr_print_operand_address
 	  (file, VOIDmode, plus_constant (HImode, x, -avr_arch->sfr_offset));
       else
@@ -9585,7 +9585,7 @@ avr_handle_addr_attribute (tree *node, tree name, tree args,
 rtx
 avr_eval_addr_attrib (rtx x)
 {
-  if (GET_CODE (x) == SYMBOL_REF
+  if (SYMBOL_REF_P (x)
       && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_ADDRESS))
     {
       tree decl = SYMBOL_REF_DECL (x);
@@ -9896,7 +9896,7 @@ avr_asm_output_aligned_decl_common (FILE * stream,
   rtx symbol;
 
   if (mem != NULL_RTX && MEM_P (mem)
-      && GET_CODE ((symbol = XEXP (mem, 0))) == SYMBOL_REF
+      && SYMBOL_REF_P ((symbol = XEXP (mem, 0)))
       && (SYMBOL_REF_FLAGS (symbol) & (SYMBOL_FLAG_IO | SYMBOL_FLAG_ADDRESS)))
     {
 
@@ -9941,7 +9941,7 @@ avr_asm_asm_output_aligned_bss (FILE *file, tree decl, const char *name,
   rtx symbol;
 
   if (mem != NULL_RTX && MEM_P (mem)
-      && GET_CODE ((symbol = XEXP (mem, 0))) == SYMBOL_REF
+      && SYMBOL_REF_P ((symbol = XEXP (mem, 0)))
       && (SYMBOL_REF_FLAGS (symbol) & (SYMBOL_FLAG_IO | SYMBOL_FLAG_ADDRESS)))
     {
       if (!(SYMBOL_REF_FLAGS (symbol) & SYMBOL_FLAG_ADDRESS))
@@ -12715,7 +12715,7 @@ avr_addr_space_convert (rtx src, tree type_from, tree type_to)
          but are located in flash.  In that case we patch the incoming
          address space.  */
 
-      if (SYMBOL_REF == GET_CODE (sym)
+      if (SYMBOL_REF_P (sym)
           && ADDR_SPACE_FLASH == AVR_SYMBOL_GET_ADDR_SPACE (sym))
         {
           as_from = ADDR_SPACE_FLASH;
-- 
cgit v1.1


From b0da97091dbeca1e2653208febaa747f4e5a85bb Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <avr@gjlay.de>
Date: Thu, 1 Dec 2016 10:21:31 +0000
Subject: tiny-memx.c: Only perform if target avr_tiny.

gcc/testsuite/
	* gcc.target/avr/tiny-memx.c: Only perform if target avr_tiny.
	* gcc.target/avr/tiny-caller-save.c: Dito.

From-SVN: r243105
---
 gcc/testsuite/ChangeLog                         | 5 +++++
 gcc/testsuite/gcc.target/avr/tiny-caller-save.c | 2 +-
 gcc/testsuite/gcc.target/avr/tiny-memx.c        | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 98a14c6..ab55b43 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Georg-Johann Lay  <avr@gjlay.de>
+
+	* gcc.target/avr/tiny-memx.c: Only perform if target avr_tiny.
+	* gcc.target/avr/tiny-caller-save.c: Dito.
+
 2016-12-01  Andre Vehreschild  <vehre@gcc.gnu.org>
 
 	* gfortran.dg/coarray_lib_alloc_4.f90: Fix for 32-bits.
diff --git a/gcc/testsuite/gcc.target/avr/tiny-caller-save.c b/gcc/testsuite/gcc.target/avr/tiny-caller-save.c
index 63fad3a..ff35161 100644
--- a/gcc/testsuite/gcc.target/avr/tiny-caller-save.c
+++ b/gcc/testsuite/gcc.target/avr/tiny-caller-save.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target avr_tiny } } */
 /* { dg-options "-mmcu=avrtiny -gdwarf -Os" } */
 
 /* This is a stripped down piece of libgcc2.c that triggerd an ICE for avr with
diff --git a/gcc/testsuite/gcc.target/avr/tiny-memx.c b/gcc/testsuite/gcc.target/avr/tiny-memx.c
index cdda86b..f691dcf 100644
--- a/gcc/testsuite/gcc.target/avr/tiny-memx.c
+++ b/gcc/testsuite/gcc.target/avr/tiny-memx.c
@@ -1,4 +1,4 @@
-/* { dg-do compile } */
+/* { dg-do compile { target avr_tiny } } */
 /* { dg-options "-mmcu=avrtiny" } */
 
 const __memx char ascmonth[] = "Jan"; /* { dg-error "not supported" } */
-- 
cgit v1.1


From a1fc386ac283d309eb6af8cf7b009d528bc52a9a Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 1 Dec 2016 12:15:44 +0000
Subject: tree-ssa-alias.c (indirect_refs_may_alias_p): Do not treat arrays
 with same type as objects that cannot overlap.

2016-12-01  Richard Biener  <rguenther@suse.de>

	* tree-ssa-alias.c (indirect_refs_may_alias_p): Do not
	treat arrays with same type as objects that cannot overlap.

	* gcc.dg/torture/alias-2.c: New testcase.

From-SVN: r243106
---
 gcc/ChangeLog                          |  5 +++++
 gcc/testsuite/ChangeLog                |  4 ++++
 gcc/testsuite/gcc.dg/torture/alias-2.c | 17 +++++++++++++++++
 3 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/torture/alias-2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2683757..19cb0ce 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Richard Biener  <rguenther@suse.de>
+
+	* tree-ssa-alias.c (indirect_refs_may_alias_p): Do not
+	treat arrays with same type as objects that cannot overlap.
+
 2016-12-01  Georg-Johann Lay  <avr@gjlay.de>
 
 	* config/avr/avr.c (avr_print_operand): Use SYMBOL_REF_P if possible.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ab55b43..447d9fb 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  Richard Biener  <rguenther@suse.de>
+
+	* gcc.dg/torture/alias-2.c: New testcase.
+
 2016-12-01  Georg-Johann Lay  <avr@gjlay.de>
 
 	* gcc.target/avr/tiny-memx.c: Only perform if target avr_tiny.
diff --git a/gcc/testsuite/gcc.dg/torture/alias-2.c b/gcc/testsuite/gcc.dg/torture/alias-2.c
new file mode 100644
index 0000000..329d46a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/alias-2.c
@@ -0,0 +1,17 @@
+/* { dg-do run } */
+
+/* We do not want to treat int[3] as an object that cannot overlap
+   itself but treat it as arbitrary sub-array of a larger array object.  */
+int ar1(int (*p)[3], int (*q)[3])
+{
+  (*p)[0] = 1;
+  (*q)[1] = 2;
+  return (*p)[0];
+}
+int main()
+{
+  int a[4];
+  if (ar1 ((int (*)[3])&a[1], (int (*)[3])&a[0]) != 2)
+    __builtin_abort ();
+  return 0;
+}
-- 
cgit v1.1


From a41e62e743af77da51413bd6f865f4c270f11674 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenth@gcc.gnu.org>
Date: Thu, 1 Dec 2016 12:22:32 +0000
Subject: tree-ssa-alias.c (indirect_refs_may_alias_p): Do not treat arrays
 with same type as objects that cannot overlap.

2016-12-01  Richard Biener  <rguenther@suse.de>

	* tree-ssa-alias.c (indirect_refs_may_alias_p): Do not
	treat arrays with same type as objects that cannot overlap.

	* gcc.dg/torture/alias-2.c: New testcase.

From-SVN: r243107
---
 gcc/tree-ssa-alias.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index ebae6cf..10f1677 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -1355,7 +1355,10 @@ indirect_refs_may_alias_p (tree ref1 ATTRIBUTE_UNUSED, tree base1,
       && same_type_for_tbaa (TREE_TYPE (base1), TREE_TYPE (ptrtype1)) == 1
       && same_type_for_tbaa (TREE_TYPE (base2), TREE_TYPE (ptrtype2)) == 1
       && same_type_for_tbaa (TREE_TYPE (ptrtype1),
-			     TREE_TYPE (ptrtype2)) == 1)
+			     TREE_TYPE (ptrtype2)) == 1
+      /* But avoid treating arrays as "objects", instead assume they
+         can overlap by an exact multiple of their element size.  */
+      && TREE_CODE (TREE_TYPE (ptrtype1)) != ARRAY_TYPE)
     return ranges_overlap_p (offset1, max_size1, offset2, max_size2);
 
   /* Do type-based disambiguation.  */
-- 
cgit v1.1


From 825fba526ce816bd26887457e2f0f4ca05b1ccdd Mon Sep 17 00:00:00 2001
From: Matthias Klose <doko@gcc.gnu.org>
Date: Thu, 1 Dec 2016 12:31:49 +0000
Subject: configure.ac: Don't use pkg-config to check for bdw-gc.

<toplevel>

        * configure.ac: Don't use pkg-config to check for bdw-gc.
        * configure: Regenerate.

config/

        * pkg.m4: Remove.

libobjc/

        * configure.ac: Don't use pkg-config to check for bdw-gc.
        * configure: Regenerate.

gcc/

        * doc/install.texi: Don't use pkg-config to check for bdw-gc.

From-SVN: r243108
---
 ChangeLog            |   5 +
 config/ChangeLog     |   4 +
 config/pkg.m4        | 825 ---------------------------------------------------
 configure            | 144 +--------
 configure.ac         |   7 +-
 gcc/ChangeLog        |   5 +
 gcc/doc/install.texi |   3 +-
 libobjc/ChangeLog    |   5 +
 libobjc/configure    | 152 +---------
 libobjc/configure.ac |  11 +-
 10 files changed, 32 insertions(+), 1129 deletions(-)
 delete mode 100644 config/pkg.m4

diff --git a/ChangeLog b/ChangeLog
index 7876d60..a3320f1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Matthias Klose  <doko@ubuntu.com>
+
+	* configure.ac: Don't use pkg-config to check for bdw-gc.
+	* configure: Regenerate.
+
 2016-11-30  Matthias Klose  <doko@ubuntu.com>
 
 	* Makefile.def: Remove reference to boehm-gc target module.
diff --git a/config/ChangeLog b/config/ChangeLog
index ed59787..8dcb483 100644
--- a/config/ChangeLog
+++ b/config/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  Matthias Klose  <doko@ubuntu.com>
+
+	* pkg.m4: Remove.
+
 2016-11-30  Matthias Klose  <doko@ubuntu.com>
 
 	* pkg.m4: New file.
diff --git a/config/pkg.m4 b/config/pkg.m4
deleted file mode 100644
index 0301d27..0000000
--- a/config/pkg.m4
+++ /dev/null
@@ -1,825 +0,0 @@
-dnl pkg.m4 - Macros to locate and utilise pkg-config.   -*- Autoconf -*-
-dnl serial 11 (pkg-config-0.29)
-dnl
-dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
-dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
-dnl
-dnl This program is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 2 of the License, or
-dnl (at your option) any later version.
-dnl
-dnl This program is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program; if not, write to the Free Software
-dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-dnl 02111-1307, USA.
-dnl
-dnl As a special exception to the GNU General Public License, if you
-dnl distribute this file as part of a program that contains a
-dnl configuration script generated by Autoconf, you may include it under
-dnl the same distribution terms that you use for the rest of that
-dnl program.
-
-dnl PKG_PREREQ(MIN-VERSION)
-dnl -----------------------
-dnl Since: 0.29
-dnl
-dnl Verify that the version of the pkg-config macros are at least
-dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's
-dnl installed version of pkg-config, this checks the developer's version
-dnl of pkg.m4 when generating configure.
-dnl
-dnl To ensure that this macro is defined, also add:
-dnl m4_ifndef([PKG_PREREQ],
-dnl     [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])])
-dnl
-dnl See the "Since" comment for each macro you use to see what version
-dnl of the macros you require.
-m4_defun([PKG_PREREQ],
-[m4_define([PKG_MACROS_VERSION], [0.29])
-m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
-    [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
-])dnl PKG_PREREQ
-
-dnl PKG_PROG_PKG_CONFIG([MIN-VERSION])
-dnl ----------------------------------
-dnl Since: 0.16
-dnl
-dnl Search for the pkg-config tool and set the PKG_CONFIG variable to
-dnl first found in the path. Checks that the version of pkg-config found
-dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is
-dnl used since that's the first version where most current features of
-dnl pkg-config existed.
-AC_DEFUN([PKG_PROG_PKG_CONFIG],
-[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
-m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
-m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$])
-AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
-AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
-AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
-
-if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
-	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
-fi
-if test -n "$PKG_CONFIG"; then
-	_pkg_min_version=m4_default([$1], [0.9.0])
-	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
-	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
-		AC_MSG_RESULT([yes])
-	else
-		AC_MSG_RESULT([no])
-		PKG_CONFIG=""
-	fi
-fi[]dnl
-])dnl PKG_PROG_PKG_CONFIG
-
-dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-dnl -------------------------------------------------------------------
-dnl Since: 0.18
-dnl
-dnl Check to see whether a particular set of modules exists. Similar to
-dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
-dnl
-dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-dnl only at the first occurence in configure.ac, so if the first place
-dnl it's called might be skipped (such as if it is within an "if", you
-dnl have to call PKG_CHECK_EXISTS manually
-AC_DEFUN([PKG_CHECK_EXISTS],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-if test -n "$PKG_CONFIG" && \
-    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
-  m4_default([$2], [:])
-m4_ifvaln([$3], [else
-  $3])dnl
-fi])
-
-dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
-dnl ---------------------------------------------
-dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting
-dnl pkg_failed based on the result.
-m4_define([_PKG_CONFIG],
-[if test -n "$$1"; then
-    pkg_cv_[]$1="$$1"
- elif test -n "$PKG_CONFIG"; then
-    PKG_CHECK_EXISTS([$3],
-                     [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`
-		      test "x$?" != "x0" && pkg_failed=yes ],
-		     [pkg_failed=yes])
- else
-    pkg_failed=untried
-fi[]dnl
-])dnl _PKG_CONFIG
-
-dnl _PKG_SHORT_ERRORS_SUPPORTED
-dnl ---------------------------
-dnl Internal check to see if pkg-config supports short errors.
-AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
-        _pkg_short_errors_supported=yes
-else
-        _pkg_short_errors_supported=no
-fi[]dnl
-])dnl _PKG_SHORT_ERRORS_SUPPORTED
-
-
-dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-dnl   [ACTION-IF-NOT-FOUND])
-dnl --------------------------------------------------------------
-dnl Since: 0.4.0
-dnl
-dnl Note that if there is a possibility the first call to
-dnl PKG_CHECK_MODULES might not happen, you should be sure to include an
-dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
-AC_DEFUN([PKG_CHECK_MODULES],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
-AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
-
-pkg_failed=no
-AC_MSG_CHECKING([for $1])
-
-_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
-_PKG_CONFIG([$1][_LIBS], [libs], [$2])
-
-m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
-and $1[]_LIBS to avoid the need to call pkg-config.
-See the pkg-config man page for more details.])
-
-if test $pkg_failed = yes; then
-   	AC_MSG_RESULT([no])
-        _PKG_SHORT_ERRORS_SUPPORTED
-        if test $_pkg_short_errors_supported = yes; then
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
-        else 
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
-        fi
-	# Put the nasty error message in config.log where it belongs
-	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
-
-	m4_default([$4], [AC_MSG_ERROR(
-[Package requirements ($2) were not met:
-
-$$1_PKG_ERRORS
-
-Consider adjusting the PKG_CONFIG_PATH environment variable if you
-installed software in a non-standard prefix.
-
-_PKG_TEXT])[]dnl
-        ])
-elif test $pkg_failed = untried; then
-     	AC_MSG_RESULT([no])
-	m4_default([$4], [AC_MSG_FAILURE(
-[The pkg-config script could not be found or is too old.  Make sure it
-is in your PATH or set the PKG_CONFIG environment variable to the full
-path to pkg-config.
-
-_PKG_TEXT
-
-To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
-        ])
-else
-	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
-	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
-        AC_MSG_RESULT([yes])
-	$3
-fi[]dnl
-])dnl PKG_CHECK_MODULES
-
-
-dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-dnl   [ACTION-IF-NOT-FOUND])
-dnl ---------------------------------------------------------------------
-dnl Since: 0.29
-dnl
-dnl Checks for existence of MODULES and gathers its build flags with
-dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags
-dnl and VARIABLE-PREFIX_LIBS from --libs.
-dnl
-dnl Note that if there is a possibility the first call to
-dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to
-dnl include an explicit call to PKG_PROG_PKG_CONFIG in your
-dnl configure.ac.
-AC_DEFUN([PKG_CHECK_MODULES_STATIC],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-_save_PKG_CONFIG=$PKG_CONFIG
-PKG_CONFIG="$PKG_CONFIG --static"
-PKG_CHECK_MODULES($@)
-PKG_CONFIG=$_save_PKG_CONFIG[]dnl
-])dnl PKG_CHECK_MODULES_STATIC
-
-
-dnl PKG_INSTALLDIR([DIRECTORY])
-dnl -------------------------
-dnl Since: 0.27
-dnl
-dnl Substitutes the variable pkgconfigdir as the location where a module
-dnl should install pkg-config .pc files. By default the directory is
-dnl $libdir/pkgconfig, but the default can be changed by passing
-dnl DIRECTORY. The user can override through the --with-pkgconfigdir
-dnl parameter.
-AC_DEFUN([PKG_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([pkgconfigdir],
-    [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],,
-    [with_pkgconfigdir=]pkg_default)
-AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-])dnl PKG_INSTALLDIR
-
-
-dnl PKG_NOARCH_INSTALLDIR([DIRECTORY])
-dnl --------------------------------
-dnl Since: 0.27
-dnl
-dnl Substitutes the variable noarch_pkgconfigdir as the location where a
-dnl module should install arch-independent pkg-config .pc files. By
-dnl default the directory is $datadir/pkgconfig, but the default can be
-dnl changed by passing DIRECTORY. The user can override through the
-dnl --with-noarch-pkgconfigdir parameter.
-AC_DEFUN([PKG_NOARCH_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([noarch-pkgconfigdir],
-    [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],,
-    [with_noarch_pkgconfigdir=]pkg_default)
-AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-])dnl PKG_NOARCH_INSTALLDIR
-
-
-dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
-dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-dnl -------------------------------------------
-dnl Since: 0.28
-dnl
-dnl Retrieves the value of the pkg-config variable for the given module.
-AC_DEFUN([PKG_CHECK_VAR],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
-
-_PKG_CONFIG([$1], [variable="][$3]["], [$2])
-AS_VAR_COPY([$1], [pkg_cv_][$1])
-
-AS_VAR_IF([$1], [""], [$5], [$4])dnl
-])dnl PKG_CHECK_VAR
-dnl pkg.m4 - Macros to locate and utilise pkg-config.   -*- Autoconf -*-
-dnl serial 11 (pkg-config-0.29)
-dnl
-dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
-dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
-dnl
-dnl This program is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 2 of the License, or
-dnl (at your option) any later version.
-dnl
-dnl This program is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program; if not, write to the Free Software
-dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-dnl 02111-1307, USA.
-dnl
-dnl As a special exception to the GNU General Public License, if you
-dnl distribute this file as part of a program that contains a
-dnl configuration script generated by Autoconf, you may include it under
-dnl the same distribution terms that you use for the rest of that
-dnl program.
-
-dnl PKG_PREREQ(MIN-VERSION)
-dnl -----------------------
-dnl Since: 0.29
-dnl
-dnl Verify that the version of the pkg-config macros are at least
-dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's
-dnl installed version of pkg-config, this checks the developer's version
-dnl of pkg.m4 when generating configure.
-dnl
-dnl To ensure that this macro is defined, also add:
-dnl m4_ifndef([PKG_PREREQ],
-dnl     [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])])
-dnl
-dnl See the "Since" comment for each macro you use to see what version
-dnl of the macros you require.
-m4_defun([PKG_PREREQ],
-[m4_define([PKG_MACROS_VERSION], [0.29])
-m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
-    [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
-])dnl PKG_PREREQ
-
-dnl PKG_PROG_PKG_CONFIG([MIN-VERSION])
-dnl ----------------------------------
-dnl Since: 0.16
-dnl
-dnl Search for the pkg-config tool and set the PKG_CONFIG variable to
-dnl first found in the path. Checks that the version of pkg-config found
-dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is
-dnl used since that's the first version where most current features of
-dnl pkg-config existed.
-AC_DEFUN([PKG_PROG_PKG_CONFIG],
-[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
-m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
-m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$])
-AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
-AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
-AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
-
-if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
-	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
-fi
-if test -n "$PKG_CONFIG"; then
-	_pkg_min_version=m4_default([$1], [0.9.0])
-	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
-	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
-		AC_MSG_RESULT([yes])
-	else
-		AC_MSG_RESULT([no])
-		PKG_CONFIG=""
-	fi
-fi[]dnl
-])dnl PKG_PROG_PKG_CONFIG
-
-dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-dnl -------------------------------------------------------------------
-dnl Since: 0.18
-dnl
-dnl Check to see whether a particular set of modules exists. Similar to
-dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
-dnl
-dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-dnl only at the first occurence in configure.ac, so if the first place
-dnl it's called might be skipped (such as if it is within an "if", you
-dnl have to call PKG_CHECK_EXISTS manually
-AC_DEFUN([PKG_CHECK_EXISTS],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-if test -n "$PKG_CONFIG" && \
-    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
-  m4_default([$2], [:])
-m4_ifvaln([$3], [else
-  $3])dnl
-fi])
-
-dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
-dnl ---------------------------------------------
-dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting
-dnl pkg_failed based on the result.
-m4_define([_PKG_CONFIG],
-[if test -n "$$1"; then
-    pkg_cv_[]$1="$$1"
- elif test -n "$PKG_CONFIG"; then
-    PKG_CHECK_EXISTS([$3],
-                     [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`
-		      test "x$?" != "x0" && pkg_failed=yes ],
-		     [pkg_failed=yes])
- else
-    pkg_failed=untried
-fi[]dnl
-])dnl _PKG_CONFIG
-
-dnl _PKG_SHORT_ERRORS_SUPPORTED
-dnl ---------------------------
-dnl Internal check to see if pkg-config supports short errors.
-AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
-        _pkg_short_errors_supported=yes
-else
-        _pkg_short_errors_supported=no
-fi[]dnl
-])dnl _PKG_SHORT_ERRORS_SUPPORTED
-
-
-dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-dnl   [ACTION-IF-NOT-FOUND])
-dnl --------------------------------------------------------------
-dnl Since: 0.4.0
-dnl
-dnl Note that if there is a possibility the first call to
-dnl PKG_CHECK_MODULES might not happen, you should be sure to include an
-dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
-AC_DEFUN([PKG_CHECK_MODULES],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
-AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
-
-pkg_failed=no
-AC_MSG_CHECKING([for $1])
-
-_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
-_PKG_CONFIG([$1][_LIBS], [libs], [$2])
-
-m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
-and $1[]_LIBS to avoid the need to call pkg-config.
-See the pkg-config man page for more details.])
-
-if test $pkg_failed = yes; then
-   	AC_MSG_RESULT([no])
-        _PKG_SHORT_ERRORS_SUPPORTED
-        if test $_pkg_short_errors_supported = yes; then
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
-        else 
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
-        fi
-	# Put the nasty error message in config.log where it belongs
-	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
-
-	m4_default([$4], [AC_MSG_ERROR(
-[Package requirements ($2) were not met:
-
-$$1_PKG_ERRORS
-
-Consider adjusting the PKG_CONFIG_PATH environment variable if you
-installed software in a non-standard prefix.
-
-_PKG_TEXT])[]dnl
-        ])
-elif test $pkg_failed = untried; then
-     	AC_MSG_RESULT([no])
-	m4_default([$4], [AC_MSG_FAILURE(
-[The pkg-config script could not be found or is too old.  Make sure it
-is in your PATH or set the PKG_CONFIG environment variable to the full
-path to pkg-config.
-
-_PKG_TEXT
-
-To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
-        ])
-else
-	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
-	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
-        AC_MSG_RESULT([yes])
-	$3
-fi[]dnl
-])dnl PKG_CHECK_MODULES
-
-
-dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-dnl   [ACTION-IF-NOT-FOUND])
-dnl ---------------------------------------------------------------------
-dnl Since: 0.29
-dnl
-dnl Checks for existence of MODULES and gathers its build flags with
-dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags
-dnl and VARIABLE-PREFIX_LIBS from --libs.
-dnl
-dnl Note that if there is a possibility the first call to
-dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to
-dnl include an explicit call to PKG_PROG_PKG_CONFIG in your
-dnl configure.ac.
-AC_DEFUN([PKG_CHECK_MODULES_STATIC],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-_save_PKG_CONFIG=$PKG_CONFIG
-PKG_CONFIG="$PKG_CONFIG --static"
-PKG_CHECK_MODULES($@)
-PKG_CONFIG=$_save_PKG_CONFIG[]dnl
-])dnl PKG_CHECK_MODULES_STATIC
-
-
-dnl PKG_INSTALLDIR([DIRECTORY])
-dnl -------------------------
-dnl Since: 0.27
-dnl
-dnl Substitutes the variable pkgconfigdir as the location where a module
-dnl should install pkg-config .pc files. By default the directory is
-dnl $libdir/pkgconfig, but the default can be changed by passing
-dnl DIRECTORY. The user can override through the --with-pkgconfigdir
-dnl parameter.
-AC_DEFUN([PKG_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([pkgconfigdir],
-    [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],,
-    [with_pkgconfigdir=]pkg_default)
-AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-])dnl PKG_INSTALLDIR
-
-
-dnl PKG_NOARCH_INSTALLDIR([DIRECTORY])
-dnl --------------------------------
-dnl Since: 0.27
-dnl
-dnl Substitutes the variable noarch_pkgconfigdir as the location where a
-dnl module should install arch-independent pkg-config .pc files. By
-dnl default the directory is $datadir/pkgconfig, but the default can be
-dnl changed by passing DIRECTORY. The user can override through the
-dnl --with-noarch-pkgconfigdir parameter.
-AC_DEFUN([PKG_NOARCH_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([noarch-pkgconfigdir],
-    [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],,
-    [with_noarch_pkgconfigdir=]pkg_default)
-AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-])dnl PKG_NOARCH_INSTALLDIR
-
-
-dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
-dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-dnl -------------------------------------------
-dnl Since: 0.28
-dnl
-dnl Retrieves the value of the pkg-config variable for the given module.
-AC_DEFUN([PKG_CHECK_VAR],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
-
-_PKG_CONFIG([$1], [variable="][$3]["], [$2])
-AS_VAR_COPY([$1], [pkg_cv_][$1])
-
-AS_VAR_IF([$1], [""], [$5], [$4])dnl
-])dnl PKG_CHECK_VAR
-dnl pkg.m4 - Macros to locate and utilise pkg-config.   -*- Autoconf -*-
-dnl serial 11 (pkg-config-0.29)
-dnl
-dnl Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
-dnl Copyright © 2012-2015 Dan Nicholson <dbn.lists@gmail.com>
-dnl
-dnl This program is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 2 of the License, or
-dnl (at your option) any later version.
-dnl
-dnl This program is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-dnl General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program; if not, write to the Free Software
-dnl Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-dnl 02111-1307, USA.
-dnl
-dnl As a special exception to the GNU General Public License, if you
-dnl distribute this file as part of a program that contains a
-dnl configuration script generated by Autoconf, you may include it under
-dnl the same distribution terms that you use for the rest of that
-dnl program.
-
-dnl PKG_PREREQ(MIN-VERSION)
-dnl -----------------------
-dnl Since: 0.29
-dnl
-dnl Verify that the version of the pkg-config macros are at least
-dnl MIN-VERSION. Unlike PKG_PROG_PKG_CONFIG, which checks the user's
-dnl installed version of pkg-config, this checks the developer's version
-dnl of pkg.m4 when generating configure.
-dnl
-dnl To ensure that this macro is defined, also add:
-dnl m4_ifndef([PKG_PREREQ],
-dnl     [m4_fatal([must install pkg-config 0.29 or later before running autoconf/autogen])])
-dnl
-dnl See the "Since" comment for each macro you use to see what version
-dnl of the macros you require.
-m4_defun([PKG_PREREQ],
-[m4_define([PKG_MACROS_VERSION], [0.29])
-m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1,
-    [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])])
-])dnl PKG_PREREQ
-
-dnl PKG_PROG_PKG_CONFIG([MIN-VERSION])
-dnl ----------------------------------
-dnl Since: 0.16
-dnl
-dnl Search for the pkg-config tool and set the PKG_CONFIG variable to
-dnl first found in the path. Checks that the version of pkg-config found
-dnl is at least MIN-VERSION. If MIN-VERSION is not specified, 0.9.0 is
-dnl used since that's the first version where most current features of
-dnl pkg-config existed.
-AC_DEFUN([PKG_PROG_PKG_CONFIG],
-[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
-m4_pattern_allow([^PKG_CONFIG(_(PATH|LIBDIR|SYSROOT_DIR|ALLOW_SYSTEM_(CFLAGS|LIBS)))?$])
-m4_pattern_allow([^PKG_CONFIG_(DISABLE_UNINSTALLED|TOP_BUILD_DIR|DEBUG_SPEW)$])
-AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])
-AC_ARG_VAR([PKG_CONFIG_PATH], [directories to add to pkg-config's search path])
-AC_ARG_VAR([PKG_CONFIG_LIBDIR], [path overriding pkg-config's built-in search path])
-
-if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
-	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
-fi
-if test -n "$PKG_CONFIG"; then
-	_pkg_min_version=m4_default([$1], [0.9.0])
-	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
-	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
-		AC_MSG_RESULT([yes])
-	else
-		AC_MSG_RESULT([no])
-		PKG_CONFIG=""
-	fi
-fi[]dnl
-])dnl PKG_PROG_PKG_CONFIG
-
-dnl PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-dnl -------------------------------------------------------------------
-dnl Since: 0.18
-dnl
-dnl Check to see whether a particular set of modules exists. Similar to
-dnl PKG_CHECK_MODULES(), but does not set variables or print errors.
-dnl
-dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-dnl only at the first occurence in configure.ac, so if the first place
-dnl it's called might be skipped (such as if it is within an "if", you
-dnl have to call PKG_CHECK_EXISTS manually
-AC_DEFUN([PKG_CHECK_EXISTS],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-if test -n "$PKG_CONFIG" && \
-    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
-  m4_default([$2], [:])
-m4_ifvaln([$3], [else
-  $3])dnl
-fi])
-
-dnl _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
-dnl ---------------------------------------------
-dnl Internal wrapper calling pkg-config via PKG_CONFIG and setting
-dnl pkg_failed based on the result.
-m4_define([_PKG_CONFIG],
-[if test -n "$$1"; then
-    pkg_cv_[]$1="$$1"
- elif test -n "$PKG_CONFIG"; then
-    PKG_CHECK_EXISTS([$3],
-                     [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`
-		      test "x$?" != "x0" && pkg_failed=yes ],
-		     [pkg_failed=yes])
- else
-    pkg_failed=untried
-fi[]dnl
-])dnl _PKG_CONFIG
-
-dnl _PKG_SHORT_ERRORS_SUPPORTED
-dnl ---------------------------
-dnl Internal check to see if pkg-config supports short errors.
-AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
-        _pkg_short_errors_supported=yes
-else
-        _pkg_short_errors_supported=no
-fi[]dnl
-])dnl _PKG_SHORT_ERRORS_SUPPORTED
-
-
-dnl PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-dnl   [ACTION-IF-NOT-FOUND])
-dnl --------------------------------------------------------------
-dnl Since: 0.4.0
-dnl
-dnl Note that if there is a possibility the first call to
-dnl PKG_CHECK_MODULES might not happen, you should be sure to include an
-dnl explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
-AC_DEFUN([PKG_CHECK_MODULES],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
-AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
-
-pkg_failed=no
-AC_MSG_CHECKING([for $1])
-
-_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
-_PKG_CONFIG([$1][_LIBS], [libs], [$2])
-
-m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
-and $1[]_LIBS to avoid the need to call pkg-config.
-See the pkg-config man page for more details.])
-
-if test $pkg_failed = yes; then
-   	AC_MSG_RESULT([no])
-        _PKG_SHORT_ERRORS_SUPPORTED
-        if test $_pkg_short_errors_supported = yes; then
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1`
-        else 
-	        $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1`
-        fi
-	# Put the nasty error message in config.log where it belongs
-	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
-
-	m4_default([$4], [AC_MSG_ERROR(
-[Package requirements ($2) were not met:
-
-$$1_PKG_ERRORS
-
-Consider adjusting the PKG_CONFIG_PATH environment variable if you
-installed software in a non-standard prefix.
-
-_PKG_TEXT])[]dnl
-        ])
-elif test $pkg_failed = untried; then
-     	AC_MSG_RESULT([no])
-	m4_default([$4], [AC_MSG_FAILURE(
-[The pkg-config script could not be found or is too old.  Make sure it
-is in your PATH or set the PKG_CONFIG environment variable to the full
-path to pkg-config.
-
-_PKG_TEXT
-
-To get pkg-config, see <http://pkg-config.freedesktop.org/>.])[]dnl
-        ])
-else
-	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
-	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
-        AC_MSG_RESULT([yes])
-	$3
-fi[]dnl
-])dnl PKG_CHECK_MODULES
-
-
-dnl PKG_CHECK_MODULES_STATIC(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
-dnl   [ACTION-IF-NOT-FOUND])
-dnl ---------------------------------------------------------------------
-dnl Since: 0.29
-dnl
-dnl Checks for existence of MODULES and gathers its build flags with
-dnl static libraries enabled. Sets VARIABLE-PREFIX_CFLAGS from --cflags
-dnl and VARIABLE-PREFIX_LIBS from --libs.
-dnl
-dnl Note that if there is a possibility the first call to
-dnl PKG_CHECK_MODULES_STATIC might not happen, you should be sure to
-dnl include an explicit call to PKG_PROG_PKG_CONFIG in your
-dnl configure.ac.
-AC_DEFUN([PKG_CHECK_MODULES_STATIC],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-_save_PKG_CONFIG=$PKG_CONFIG
-PKG_CONFIG="$PKG_CONFIG --static"
-PKG_CHECK_MODULES($@)
-PKG_CONFIG=$_save_PKG_CONFIG[]dnl
-])dnl PKG_CHECK_MODULES_STATIC
-
-
-dnl PKG_INSTALLDIR([DIRECTORY])
-dnl -------------------------
-dnl Since: 0.27
-dnl
-dnl Substitutes the variable pkgconfigdir as the location where a module
-dnl should install pkg-config .pc files. By default the directory is
-dnl $libdir/pkgconfig, but the default can be changed by passing
-dnl DIRECTORY. The user can override through the --with-pkgconfigdir
-dnl parameter.
-AC_DEFUN([PKG_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${libdir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([pkgconfigdir],
-    [AS_HELP_STRING([--with-pkgconfigdir], pkg_description)],,
-    [with_pkgconfigdir=]pkg_default)
-AC_SUBST([pkgconfigdir], [$with_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-])dnl PKG_INSTALLDIR
-
-
-dnl PKG_NOARCH_INSTALLDIR([DIRECTORY])
-dnl --------------------------------
-dnl Since: 0.27
-dnl
-dnl Substitutes the variable noarch_pkgconfigdir as the location where a
-dnl module should install arch-independent pkg-config .pc files. By
-dnl default the directory is $datadir/pkgconfig, but the default can be
-dnl changed by passing DIRECTORY. The user can override through the
-dnl --with-noarch-pkgconfigdir parameter.
-AC_DEFUN([PKG_NOARCH_INSTALLDIR],
-[m4_pushdef([pkg_default], [m4_default([$1], ['${datadir}/pkgconfig'])])
-m4_pushdef([pkg_description],
-    [pkg-config arch-independent installation directory @<:@]pkg_default[@:>@])
-AC_ARG_WITH([noarch-pkgconfigdir],
-    [AS_HELP_STRING([--with-noarch-pkgconfigdir], pkg_description)],,
-    [with_noarch_pkgconfigdir=]pkg_default)
-AC_SUBST([noarch_pkgconfigdir], [$with_noarch_pkgconfigdir])
-m4_popdef([pkg_default])
-m4_popdef([pkg_description])
-])dnl PKG_NOARCH_INSTALLDIR
-
-
-dnl PKG_CHECK_VAR(VARIABLE, MODULE, CONFIG-VARIABLE,
-dnl [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
-dnl -------------------------------------------
-dnl Since: 0.28
-dnl
-dnl Retrieves the value of the pkg-config variable for the given module.
-AC_DEFUN([PKG_CHECK_VAR],
-[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
-AC_ARG_VAR([$1], [value of $3 for $2, overriding pkg-config])dnl
-
-_PKG_CONFIG([$1], [variable="][$3]["], [$2])
-AS_VAR_COPY([$1], [pkg_cv_][$1])
-
-AS_VAR_IF([$1], [""], [$5], [$4])dnl
-])dnl PKG_CHECK_VAR
diff --git a/configure b/configure
index cec133e..fb79e73 100755
--- a/configure
+++ b/configure
@@ -643,9 +643,6 @@ DEBUG_PREFIX_CFLAGS_FOR_TARGET
 SYSROOT_CFLAGS_FOR_TARGET
 extra_host_zlib_configure_flags
 extra_host_libiberty_configure_flags
-PKG_CONFIG_LIBDIR
-PKG_CONFIG_PATH
-PKG_CONFIG
 stage1_languages
 extra_linker_plugin_flags
 extra_linker_plugin_configure_flags
@@ -811,9 +808,6 @@ CPPFLAGS
 CXX
 CXXFLAGS
 CCC
-PKG_CONFIG
-PKG_CONFIG_PATH
-PKG_CONFIG_LIBDIR
 build_configargs
 host_configargs
 target_configargs
@@ -1587,11 +1581,6 @@ Some influential environment variables:
               you have headers in a nonstandard directory <include dir>
   CXX         C++ compiler command
   CXXFLAGS    C++ compiler flags
-  PKG_CONFIG  path to pkg-config utility
-  PKG_CONFIG_PATH
-              directories to add to pkg-config's search path
-  PKG_CONFIG_LIBDIR
-              path overriding pkg-config's built-in search path
   build_configargs
               additional configure arguments for build directories
   host_configargs
@@ -6441,137 +6430,8 @@ case ,${enable_languages},:${enable_objc_gc} in *,objc,*:yes|*,objc,*:auto)
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for bdw garbage collector" >&5
 $as_echo_n "checking for bdw garbage collector... " >&6; }
   if test "x$with_target_bdw_gc$with_target_bdw_gc_include$with_target_bdw_gc_lib" = x; then
-
-
-
-
-
-
-
-if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
-	if test -n "$ac_tool_prefix"; then
-  # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
-set dummy ${ac_tool_prefix}pkg-config; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_PKG_CONFIG+set}" = set; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $PKG_CONFIG in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  ;;
-esac
-fi
-PKG_CONFIG=$ac_cv_path_PKG_CONFIG
-if test -n "$PKG_CONFIG"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
-$as_echo "$PKG_CONFIG" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_path_PKG_CONFIG"; then
-  ac_pt_PKG_CONFIG=$PKG_CONFIG
-  # Extract the first word of "pkg-config", so it can be a program name with args.
-set dummy pkg-config; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_ac_pt_PKG_CONFIG+set}" = set; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $ac_pt_PKG_CONFIG in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  ;;
-esac
-fi
-ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG
-if test -n "$ac_pt_PKG_CONFIG"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5
-$as_echo "$ac_pt_PKG_CONFIG" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-  if test "x$ac_pt_PKG_CONFIG" = x; then
-    PKG_CONFIG=""
-  else
-    case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
-    PKG_CONFIG=$ac_pt_PKG_CONFIG
-  fi
-else
-  PKG_CONFIG="$ac_cv_path_PKG_CONFIG"
-fi
-
-fi
-if test -n "$PKG_CONFIG"; then
-	_pkg_min_version=0.9.0
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5
-$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; }
-	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-	else
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-		PKG_CONFIG=""
-	fi
-fi
-if test -n "$PKG_CONFIG" && \
-    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"bdw-gc\""; } >&5
-  ($PKG_CONFIG --exists --print-errors "bdw-gc") 2>&5
-  ac_status=$?
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: using bdw-gc pkg-config module" >&5
-$as_echo "using bdw-gc pkg-config module" >&6; }
-else
-  as_fn_error "no --with-target-bdw-gc options and no bdw-gc pkg-config module found" "$LINENO" 5
-fi
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: using bdw-gc in default locations" >&5
+$as_echo "using bdw-gc in default locations" >&6; }
   else
         if test "x$with_target_bdw_gc_include" = x && test "x$with_target_bdw_gc_lib" != x; then
       as_fn_error "found --with-target-bdw-gc-lib but --with-target-bdw-gc-include missing" "$LINENO" 5
diff --git a/configure.ac b/configure.ac
index ed89ea2..51ee705 100644
--- a/configure.ac
+++ b/configure.ac
@@ -29,7 +29,6 @@ m4_include([ltsugar.m4])
 m4_include([ltversion.m4])
 m4_include([lt~obsolete.m4])
 m4_include([config/isl.m4])
-m4_include([config/pkg.m4])
 
 AC_INIT(move-if-change)
 AC_PREREQ(2.64)
@@ -2076,10 +2075,8 @@ AC_ARG_WITH([target-bdw-gc-lib],
 case ,${enable_languages},:${enable_objc_gc} in *,objc,*:yes|*,objc,*:auto)
   AC_MSG_CHECKING([for bdw garbage collector])
   if test "x$with_target_bdw_gc$with_target_bdw_gc_include$with_target_bdw_gc_lib" = x; then
-    dnl no bdw-gw options, fall back to the bdw-gc pkg-config module
-    PKG_CHECK_EXISTS(bdw-gc,
-      AC_MSG_RESULT([using bdw-gc pkg-config module]),
-      AC_MSG_ERROR([no --with-target-bdw-gc options and no bdw-gc pkg-config module found]))
+    dnl no bdw-gw options, assume default locations
+    AC_MSG_RESULT([using bdw-gc in default locations])
   else
     dnl bdw-gw options, first error checking, complete checking in libobjc
     if test "x$with_target_bdw_gc_include" = x && test "x$with_target_bdw_gc_lib" != x; then
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 19cb0ce..a164a01 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  Matthias Klose  <doko@ubuntu.com>
+
+	* doc/install.texi: Don't use pkg-config to check for bdw-gc.
+
 2016-12-01  Richard Biener  <rguenther@suse.de>
 
 	* tree-ssa-alias.c (indirect_refs_may_alias_p): Do not
@@ -34,6 +38,7 @@
 	PR rtl-optimization/78607
 	* combine.c (try_combine): Emit a barrier after a unconditional trap.
 
+>>>>>>> .r243107
 2016-11-30  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/78602
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 140ff80..b911d76 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -2204,8 +2204,7 @@ The options @option{--with-target-bdw-gc-include} and
 @option{--with-target-bdw-gc-lib} must always be specified together
 for each multilib variant and they take precedence over
 @option{--with-target-bdw-gc}.  If none of these options are
-specified, the values are taken from the @command{pkg-config}
-@samp{bdw-gc} module.
+specified, the library is assumed in default locations.
 @end table
 
 @html
diff --git a/libobjc/ChangeLog b/libobjc/ChangeLog
index f6eadaf..60dfc16 100644
--- a/libobjc/ChangeLog
+++ b/libobjc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Matthias Klose  <doko@ubuntu.com>
+
+	* configure.ac: Don't use pkg-config to check for bdw-gc.
+	* configure: Regenerate.
+
 2016-11-30  Matthias Klose  <doko@ubuntu.com>
 
 	* configure.ac: Set BDW_GC_CFLAGS and BDW_GC_LIBS after checking
diff --git a/libobjc/configure b/libobjc/configure
index c617f27..62bdc2b 100755
--- a/libobjc/configure
+++ b/libobjc/configure
@@ -604,9 +604,6 @@ OBJC_BOEHM_GC_LIBS
 OBJC_BOEHM_GC_INCLUDES
 OBJC_BOEHM_GC
 OBJC_GCFLAGS
-PKG_CONFIG_LIBDIR
-PKG_CONFIG_PATH
-PKG_CONFIG
 SET_MAKE
 CPP
 OTOOL64
@@ -733,10 +730,7 @@ with_target_bdw_gc_lib
 host_alias
 target_alias
 CPP
-CPPFLAGS
-PKG_CONFIG
-PKG_CONFIG_PATH
-PKG_CONFIG_LIBDIR'
+CPPFLAGS'
 
 
 # Initialize some variables set by options.
@@ -1395,11 +1389,6 @@ Some influential environment variables:
   CPPFLAGS    C/C++/Objective C preprocessor flags, e.g. -I<include dir> if
               you have headers in a nonstandard directory <include dir>
   CPP         C preprocessor
-  PKG_CONFIG  path to pkg-config utility
-  PKG_CONFIG_PATH
-              directories to add to pkg-config's search path
-  PKG_CONFIG_LIBDIR
-              path overriding pkg-config's built-in search path
 
 Use these variables to override the choices made by `configure' or to help
 it to find libraries and programs with nonstandard names/locations.
@@ -10601,7 +10590,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 10604 "configure"
+#line 10593 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -10707,7 +10696,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 10710 "configure"
+#line 10699 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11540,139 +11529,8 @@ no)
   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for bdw garbage collector" >&5
 $as_echo_n "checking for bdw garbage collector... " >&6; }
   if test "x$with_target_bdw_gc$with_target_bdw_gc_include$with_target_bdw_gc_lib" = x; then
-
-
-
-
-
-
-
-if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
-	if test -n "$ac_tool_prefix"; then
-  # Extract the first word of "${ac_tool_prefix}pkg-config", so it can be a program name with args.
-set dummy ${ac_tool_prefix}pkg-config; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_PKG_CONFIG+set}" = set; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $PKG_CONFIG in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_PKG_CONFIG="$PKG_CONFIG" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  ;;
-esac
-fi
-PKG_CONFIG=$ac_cv_path_PKG_CONFIG
-if test -n "$PKG_CONFIG"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $PKG_CONFIG" >&5
-$as_echo "$PKG_CONFIG" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-
-fi
-if test -z "$ac_cv_path_PKG_CONFIG"; then
-  ac_pt_PKG_CONFIG=$PKG_CONFIG
-  # Extract the first word of "pkg-config", so it can be a program name with args.
-set dummy pkg-config; ac_word=$2
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
-$as_echo_n "checking for $ac_word... " >&6; }
-if test "${ac_cv_path_ac_pt_PKG_CONFIG+set}" = set; then :
-  $as_echo_n "(cached) " >&6
-else
-  case $ac_pt_PKG_CONFIG in
-  [\\/]* | ?:[\\/]*)
-  ac_cv_path_ac_pt_PKG_CONFIG="$ac_pt_PKG_CONFIG" # Let the user override the test with a path.
-  ;;
-  *)
-  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
-for as_dir in $PATH
-do
-  IFS=$as_save_IFS
-  test -z "$as_dir" && as_dir=.
-    for ac_exec_ext in '' $ac_executable_extensions; do
-  if { test -f "$as_dir/$ac_word$ac_exec_ext" && $as_test_x "$as_dir/$ac_word$ac_exec_ext"; }; then
-    ac_cv_path_ac_pt_PKG_CONFIG="$as_dir/$ac_word$ac_exec_ext"
-    $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5
-    break 2
-  fi
-done
-  done
-IFS=$as_save_IFS
-
-  ;;
-esac
-fi
-ac_pt_PKG_CONFIG=$ac_cv_path_ac_pt_PKG_CONFIG
-if test -n "$ac_pt_PKG_CONFIG"; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_pt_PKG_CONFIG" >&5
-$as_echo "$ac_pt_PKG_CONFIG" >&6; }
-else
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-
-  if test "x$ac_pt_PKG_CONFIG" = x; then
-    PKG_CONFIG=""
-  else
-    case $cross_compiling:$ac_tool_warned in
-yes:)
-{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5
-$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;}
-ac_tool_warned=yes ;;
-esac
-    PKG_CONFIG=$ac_pt_PKG_CONFIG
-  fi
-else
-  PKG_CONFIG="$ac_cv_path_PKG_CONFIG"
-fi
-
-fi
-if test -n "$PKG_CONFIG"; then
-	_pkg_min_version=0.9.0
-	{ $as_echo "$as_me:${as_lineno-$LINENO}: checking pkg-config is at least version $_pkg_min_version" >&5
-$as_echo_n "checking pkg-config is at least version $_pkg_min_version... " >&6; }
-	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-	else
-		{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-		PKG_CONFIG=""
-	fi
-fi
-if test -n "$PKG_CONFIG" && \
-    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"bdw-gc\""; } >&5
-  ($PKG_CONFIG --exists --print-errors "bdw-gc") 2>&5
-  ac_status=$?
-  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
-  test $ac_status = 0; }; then
-  { $as_echo "$as_me:${as_lineno-$LINENO}: result: using bdw-gc pkg-config module" >&5
-$as_echo "using bdw-gc pkg-config module" >&6; }
-       BDW_GC_CFLAGS=`$PKG_CONFIG --cflags bdw-gc`
-       BDW_GC_LIBS=`$PKG_CONFIG --libs bdw-gc`
-else
-  as_fn_error "no --with-target-bdw-gc options and no bdw-gc pkg-config module found" "$LINENO" 5
-fi
+        BDW_GC_CFLAGS=
+    BDW_GC_LIBS="-lgc"
   else
         if test "x$with_target_bdw_gc_include" = x && test "x$with_target_bdw_gc_lib" != x; then
       as_fn_error "found --with-target-bdw-gc-lib but --with-target-bdw-gc-include missing" "$LINENO" 5
diff --git a/libobjc/configure.ac b/libobjc/configure.ac
index 2303143..3c13b33 100644
--- a/libobjc/configure.ac
+++ b/libobjc/configure.ac
@@ -18,8 +18,6 @@
 #along with GCC; see the file COPYING3.  If not see
 #<http://www.gnu.org/licenses/>.
 
-m4_include([../config/pkg.m4])
-
 AC_PREREQ(2.64)
 AC_INIT(package-unused, version-unused,, libobjc)
 AC_CONFIG_SRCDIR([objc/objc.h])
@@ -223,12 +221,9 @@ no)
 *)
   AC_MSG_CHECKING([for bdw garbage collector])
   if test "x$with_target_bdw_gc$with_target_bdw_gc_include$with_target_bdw_gc_lib" = x; then
-    dnl no bdw-gw options, fall back to the bdw-gc pkg-config module
-    PKG_CHECK_EXISTS(bdw-gc,
-      [AC_MSG_RESULT([using bdw-gc pkg-config module])
-       BDW_GC_CFLAGS=`$PKG_CONFIG --cflags bdw-gc`
-       BDW_GC_LIBS=`$PKG_CONFIG --libs bdw-gc`],
-      AC_MSG_ERROR([no --with-target-bdw-gc options and no bdw-gc pkg-config module found]))
+    dnl no bdw-gw options, assuming bdw-gc in default locations
+    BDW_GC_CFLAGS=
+    BDW_GC_LIBS="-lgc"
   else
     dnl bdw-gw options passed by configure flags
     if test "x$with_target_bdw_gc_include" = x && test "x$with_target_bdw_gc_lib" != x; then
-- 
cgit v1.1


From 8e9c33cd3bd55c2364a2efc7fc2de0957d7b991d Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje@gcc.gnu.org>
Date: Thu, 1 Dec 2016 08:48:22 -0500
Subject: Remove svn conflict marker.

From-SVN: r243110
---
 gcc/ChangeLog | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a164a01..4d15a73 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -38,7 +38,6 @@
 	PR rtl-optimization/78607
 	* combine.c (try_combine): Emit a barrier after a unconditional trap.
 
->>>>>>> .r243107
 2016-11-30  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/78602
-- 
cgit v1.1


From 67586d38f5b1858fba96973e9341f7d65f64ea9c Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Thu, 1 Dec 2016 14:04:13 +0000
Subject: Fix rtl-optimization/78596 - combine.c:12561:14: runtime error: left
 shift of negative value

	PR rtl-optimization/78596
	* combine.c (simplify_comparison): Cast to unsigned to avoid
	left shifting of negative value.

From-SVN: r243111
---
 gcc/ChangeLog | 6 ++++++
 gcc/combine.c | 3 ++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4d15a73..b90cbc6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-01  Markus Trippelsdorf  <markus@trippelsdorf.de>
+
+	PR rtl-optimization/78596
+	* combine.c (simplify_comparison): Cast to unsigned to avoid
+	left shifting of negative value.
+
 2016-12-01  Matthias Klose  <doko@ubuntu.com>
 
 	* doc/install.texi: Don't use pkg-config to check for bdw-gc.
diff --git a/gcc/combine.c b/gcc/combine.c
index faafcb7..b429453 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -12561,7 +12561,8 @@ simplify_comparison (enum rtx_code code, rtx *pop0, rtx *pop1)
 		  if (GET_CODE (op0) == LSHIFTRT)
 		    code = unsigned_condition (code);
 
-		  const_op <<= INTVAL (XEXP (op0, 1));
+		  const_op = (unsigned HOST_WIDE_INT) const_op
+			      << INTVAL (XEXP (op0, 1));
 		  if (low_bits != 0
 		      && (code == GT || code == GTU
 			  || code == LE || code == LEU))
-- 
cgit v1.1


From be5ddbb86fbc4d7651f4c748528ecab6d31cd035 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
Date: Thu, 1 Dec 2016 14:48:49 +0000
Subject: Import libcilkrts Build 4467 (PR target/68945)

	PR target/68945
	Merge from upstream, version 2.0.4467.0.
	Fix typo in git URL.
	* aclocal.m4, configure, Makefile.in: Regenerate.

From-SVN: r243112
---
 libcilkrts/ChangeLog                             |   7 ++
 libcilkrts/Makefile.am                           |   2 +-
 libcilkrts/Makefile.in                           |   4 +-
 libcilkrts/README                                | 104 +++++++++++++++++---
 libcilkrts/configure                             |  58 ++++++++++--
 libcilkrts/configure.ac                          |  14 ++-
 libcilkrts/configure.tgt                         |   6 +-
 libcilkrts/include/cilk/cilk_undocumented.h      |   3 -
 libcilkrts/include/internal/cilk_version.h       |  10 +-
 libcilkrts/include/internal/rev.mk               |   2 +-
 libcilkrts/runtime/cilk-abi.c                    |   1 -
 libcilkrts/runtime/cilk_fiber-unix.cpp           |   2 +-
 libcilkrts/runtime/cilk_fiber.h                  |   5 +-
 libcilkrts/runtime/config/arm/os-fence.h         |  19 +++-
 libcilkrts/runtime/config/sparc/cilk-abi-vla.c   | 115 +++++++++++++++++++++++
 libcilkrts/runtime/config/sparc/os-fence.h       |  64 +++++++++++++
 libcilkrts/runtime/config/sparc/os-unix-sysdep.c | 115 +++++++++++++++++++++++
 libcilkrts/runtime/except-gcc.cpp                |   2 +
 libcilkrts/runtime/global_state.cpp              |   1 -
 libcilkrts/runtime/global_state.h                |   1 -
 libcilkrts/runtime/jmpbuf.h                      |  14 ++-
 libcilkrts/runtime/linux-symbols.ver             |   1 -
 libcilkrts/runtime/local_state.h                 |   7 ++
 libcilkrts/runtime/mac-symbols.txt               |   1 -
 libcilkrts/runtime/os-unix.c                     |   5 +-
 libcilkrts/runtime/record-replay.cpp             |   2 +-
 libcilkrts/runtime/scheduler.c                   |  18 +++-
 libcilkrts/runtime/scheduler.h                   |   3 +
 libcilkrts/runtime/sysdep-unix.c                 |  12 +--
 29 files changed, 530 insertions(+), 68 deletions(-)
 create mode 100644 libcilkrts/runtime/config/sparc/cilk-abi-vla.c
 create mode 100644 libcilkrts/runtime/config/sparc/os-fence.h
 create mode 100644 libcilkrts/runtime/config/sparc/os-unix-sysdep.c

diff --git a/libcilkrts/ChangeLog b/libcilkrts/ChangeLog
index 68ca82a..443f0a2 100644
--- a/libcilkrts/ChangeLog
+++ b/libcilkrts/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-01  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
+
+	PR target/68945
+	Merge from upstream, version 2.0.4467.0.
+	Fix typo in git URL.
+	* aclocal.m4, configure, Makefile.in: Regenerate.
+
 2016-11-15  Matthias Klose  <doko@ubuntu.com>
 
 	* configure: Regenerate.
diff --git a/libcilkrts/Makefile.am b/libcilkrts/Makefile.am
index 3736a63..1dec6aa 100644
--- a/libcilkrts/Makefile.am
+++ b/libcilkrts/Makefile.am
@@ -54,7 +54,7 @@ GENERAL_FLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/runtime -I$(top_srcdir)/
 # Enable Intel Cilk Plus extension
 GENERAL_FLAGS += -fcilkplus
 
-# Always generate unwind tables
+#Always generate unwind tables
 GENERAL_FLAGS += -funwind-tables
 
 AM_CFLAGS = $(XCFLAGS) $(GENERAL_FLAGS) -std=c99
diff --git a/libcilkrts/Makefile.in b/libcilkrts/Makefile.in
index ff88e9d..6520b16 100644
--- a/libcilkrts/Makefile.in
+++ b/libcilkrts/Makefile.in
@@ -396,7 +396,7 @@ ACLOCAL_AMFLAGS = -I .. -I ../config
 
 # Enable Intel Cilk Plus extension
 
-# Always generate unwind tables
+#Always generate unwind tables
 GENERAL_FLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/runtime \
 	-I$(top_srcdir)/runtime/config/$(config_dir) \
 	-I$(top_srcdir)/runtime/sslib -DIN_CILK_RUNTIME=1 -fcilkplus \
@@ -455,7 +455,7 @@ libcilkrts_la_SOURCES = \
   runtime/sysdep-unix.c            \
   runtime/worker_mutex.c
 
-CILK_REVISION = 4420
+CILK_REVISION = 4467
 
 # Load the $(REVISION) value.
 
diff --git a/libcilkrts/README b/libcilkrts/README
index d3503f4..54f8b04 100644
--- a/libcilkrts/README
+++ b/libcilkrts/README
@@ -1,14 +1,16 @@
-Intel(R) Cilk(TM) Plus runtime library
+Intel(R) Cilk(TM) Plus Runtime Library
 
 Index:
-1. BUILDING
-2. USING
-3. DOXYGEN DOCUMENTATION
-4. QUESTIONS OR BUGS
-5. CONTRIBUTIONS
+1. BUILDING WITH AUTOMAKE
+2. BUILDING WITH CMAKE
+3. INSTALLING TO VXWORKS
+4. USING
+5. DOXYGEN DOCUMENTATION
+6. QUESTIONS OR BUGS
+7. CONTRIBUTIONS
 
 #
-#  1. BUILDING:
+#  1. BUILDING WITH AUTOMAKE:
 #
 
 To distribute applications that use the Intel Cilk Plus language
@@ -40,22 +42,87 @@ configure script:
 
 % ./configure --prefix=/your/path/to/lib
 
-It is also possible to use CMake if the above method does not apply
-well in your environment. Instruction is available in CMakeLists.txt.
+#
+#  2. BUILDING WITH CMAKE:
+#
+
+To distribute applications that use the Intel Cilk Plus language
+extensions to non-development systems, you need to build the Intel
+Cilk Plus runtime library and distribute it with your application.
+This instruction describes the build process using CMake*, which
+supports Linux*, Windows*, and OS X*.  It is fine to use this process
+to build a Linux library, but it is highly recommended to use the
+more mature build process described above when building on Linux.
+
+You need the CMake tool and a C/C++ compiler that supports the Intel
+Cilk Plus language extensions, and the requirements for each operating
+systems are:
+
+Common:
+    CMake 3.0.0 or later
+    Make tools such as make (Linux, OS X) or nmake (Windows)
+Linux:
+    GCC* 4.9.2 or later, or Intel(R) C++ Compiler v12.1 or later
+Windows:
+    Intel C++ Compiler v12.1 or later
+    Visual Studio* 2010 or later
+OS X:
+    Cilk-enabled branch of Clang*/LLVM* (http://cilkplus.github.io),
+    or Intel C++ Compiler v12.1 or later
+
+The common steps to build the libraries are 1) invoke cmake with
+appropriate options, 2) invoke a make tool available on the system.
+The following examples show build processes on OS X and Windows.
+
+OS X:
+    % mkdir ./build && cd ./build
+    % cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ \
+            -DCMAKE_INSTALL_PREFIX=./install ..
+    % make && make install
+
+Windows:
+    % mkdir .\build && cd .\build
+    % cmake -G "NMake Makefiles" -DCMAKE_C_COMPILER=icl \
+            -DCMAKE_CXX_COMPILER=icl -DCMAKE_INSTALL_PREFIX=.\install ..
+    % nmake && nmake install
+
+#
+#  3. INSTALLING TO VXWORKS OS
+#
+
+For Windows host, run VxWorks_Install.bat.
+For Linux host, run VxWorks_Install.sh.
+
+You may need to give environment variable WIND_BASE to indicate VxWorks
+installation path.
+
+Create a VSB project, and you will see a layer named "CILKPLUS_KERNEL".
+Enable it and build the project.
+
+Create a VIP project with ICC and add component INCLUDE_CILKPLUS, then you
+will get support of Intel Cilk Plus features in VxWorks.
 
 #
-#  2. USING:
+#  4. USING:
 #
 
 The Intel(R) C++ Compiler will automatically try to bring in the
 Intel Cilk Plus runtime in any program that uses the relevant
-features.  GCC requires explicit linking of both the library and
-its dependencies (libpthread, libdl).  For example:
+features.  GCC and Clang requires an explicit compiler option,
+-fcilkplus, to enable Intel Cilk Plus language extensions.
+For example,
+
+% gcc -fcilkplus -o foo.exe foo.c
+% clang -fcilkplus -o foo.exe foo.c 
+
+Older GCC versions (e.g., 4.8 cilkplus branch) requires explicit linking
+of both the library and its dependencies (libpthread, libdl).
+For example:
 
 % gcc foo.c -lcilkrts -lpthread -ldl
 
 #
-#  3. DOXYGEN DOCUMENTATION:
+#  5. DOXYGEN DOCUMENTATION:
 #
 
 The library source has Doxygen markup.  Generate HTML documentation
@@ -64,7 +131,7 @@ based on the markup by changing directory into runtime and running:
 % doxygen doxygen.cfg
 
 #
-#  4. QUESTIONS OR BUGS:
+#  6. QUESTIONS OR BUGS:
 #
 
 Issues with the Intel Cilk Plus runtime can be addressed in the Intel
@@ -72,7 +139,7 @@ Cilk Plus forums:
 http://software.intel.com/en-us/forums/intel-cilk-plus/
 
 #
-#  5. CONTRIBUTIONS:
+#  7. CONTRIBUTIONS:
 #
 
 The Intel Cilk Plus runtime library is dual licensed. The upstream copy
@@ -85,6 +152,13 @@ contributed to the upstream version via http://cilkplus.org/.
 Thanks to Tobias Burnus for showing us the magic to make gcc and g++
 automatically include the Cilk Plus runtime.
 
+Thanks to Eric Olson for sharing his patch for Raspberry Pi* with us.
+
+Thanks to Rainer Orth for submitting patches for exception handling and
+enabling Cilk Plus on the SPARC* architecture.
+
 ------------------------
 Intel and Cilk are trademarks of Intel Corporation in the U.S. and/or
 other countries.
+
+*Other names and brands may be claimed as the property of others.
diff --git a/libcilkrts/configure b/libcilkrts/configure
index c04df2b..ecf88ee 100644
--- a/libcilkrts/configure
+++ b/libcilkrts/configure
@@ -5676,6 +5676,50 @@ _ACEOF
 fi
 
 
+# Check for dl functions
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for dladdr in -ldl" >&5
+$as_echo_n "checking for dladdr in -ldl... " >&6; }
+if test "${ac_cv_lib_dl_dladdr+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char dladdr ();
+int
+main ()
+{
+return dladdr ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_dl_dladdr=yes
+else
+  ac_cv_lib_dl_dladdr=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dladdr" >&5
+$as_echo "$ac_cv_lib_dl_dladdr" >&6; }
+if test "x$ac_cv_lib_dl_dladdr" = x""yes; then :
+
+$as_echo "#define HAVE_DLADDR 1" >>confdefs.h
+
+fi
+
+
 # Check whether the target supports protected visibility.
 save_CFLAGS="$CFLAGS"
 CFLAGS="$CFLAGS -Werror"
@@ -5766,11 +5810,7 @@ esac
 # contains information on what's needed
 case "${target}" in
 
-  x86_64-*-*)
-    config_dir="x86"
-    ;;
-
-  i?86-*-*)
+  i?86-*-* | x86_64-*-*)
     config_dir="x86"
     ;;
 
@@ -5778,6 +5818,10 @@ case "${target}" in
     config_dir="arm"
     ;;
 
+  sparc*-*-*)
+    config_dir="sparc"
+    ;;
+
   *)
     config_dir="generic"
     ;;
@@ -11893,7 +11937,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 11896 "configure"
+#line 11940 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -11999,7 +12043,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 12002 "configure"
+#line 12046 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/libcilkrts/configure.ac b/libcilkrts/configure.ac
index 39efeb4..8270e0b 100644
--- a/libcilkrts/configure.ac
+++ b/libcilkrts/configure.ac
@@ -69,6 +69,10 @@ AC_PROG_CXX
 AC_CONFIG_FILES([Makefile libcilkrts.spec])
 AC_FUNC_ALLOCA
 
+# Check for dl functions
+AC_CHECK_LIB(dl, dladdr,
+             [AC_DEFINE(HAVE_DLADDR, 1, [Define if you have dladdr()])])
+
 # Check whether the target supports protected visibility.
 save_CFLAGS="$CFLAGS"
 CFLAGS="$CFLAGS -Werror"
@@ -141,11 +145,7 @@ esac
 # contains information on what's needed
 case "${target}" in
 
-  x86_64-*-*)
-    config_dir="x86"
-    ;;
-
-  i?86-*-*)
+  i?86-*-* | x86_64-*-*)
     config_dir="x86"
     ;;
 
@@ -153,6 +153,10 @@ case "${target}" in
     config_dir="arm"
     ;;
 
+  sparc*-*-*)
+    config_dir="sparc"
+    ;;
+
   *)
     config_dir="generic"
     ;;
diff --git a/libcilkrts/configure.tgt b/libcilkrts/configure.tgt
index 7f0befc..71f69b0 100644
--- a/libcilkrts/configure.tgt
+++ b/libcilkrts/configure.tgt
@@ -44,12 +44,12 @@
 
 # Disable Cilk Runtime library for unsupported architectures.
 case "${target}" in
-  x86_64-*-*)
-    ;;
-  i?86-*-*)
+  i?86-*-* | x86_64-*-*)
     ;;
   arm-*-*)
     ;;
+  sparc*-*-*)
+    ;;
   *-*-*)
     UNSUPPORTED=1
     ;;
diff --git a/libcilkrts/include/cilk/cilk_undocumented.h b/libcilkrts/include/cilk/cilk_undocumented.h
index 5f4a8c5..71a51ec 100644
--- a/libcilkrts/include/cilk/cilk_undocumented.h
+++ b/libcilkrts/include/cilk/cilk_undocumented.h
@@ -106,9 +106,6 @@ size_t __cilkrts_get_stack_size(void);
 CILK_EXPORT __CILKRTS_NOTHROW
 void __cilkrts_dump_stats(void);
 
-CILK_EXPORT __CILKRTS_NOTHROW
-int __cilkrts_irml_version(void);
-
 struct __cilk_tbb_unwatch_thunk;
 struct __cilk_tbb_stack_op_thunk;
 
diff --git a/libcilkrts/include/internal/cilk_version.h b/libcilkrts/include/internal/cilk_version.h
index 95e1f2e..c997b02 100644
--- a/libcilkrts/include/internal/cilk_version.h
+++ b/libcilkrts/include/internal/cilk_version.h
@@ -49,10 +49,10 @@
 
 #define VERSION_MAJOR 2
 #define VERSION_MINOR 0
-#define VERSION_BUILD 4420
+#define VERSION_BUILD 4467
 #define VERSION_REV 0
-#define VERSION_STRING "2,0,4420,0"
-#define VERSION_HASH "3b2d6aa9059c"
+#define VERSION_STRING "2,0,4467,0"
+#define VERSION_HASH "b7e54d87bd17"
 #define VERSION_BRANCH "eng"
-#define TBB_REV_NUMBER "14788"
-#define VERSION_YEAR "2015"
+#define TBB_REV_NUMBER ""
+#define VERSION_YEAR "2016"
diff --git a/libcilkrts/include/internal/rev.mk b/libcilkrts/include/internal/rev.mk
index 96ffdc4..cd78865 100644
--- a/libcilkrts/include/internal/rev.mk
+++ b/libcilkrts/include/internal/rev.mk
@@ -49,4 +49,4 @@
 #
 # It was automatically generated by cilkrts/include/internal/Makefile
 
-CILK_REVISION = 4420
+CILK_REVISION = 4467
diff --git a/libcilkrts/runtime/cilk-abi.c b/libcilkrts/runtime/cilk-abi.c
index 35bb413..8487873 100644
--- a/libcilkrts/runtime/cilk-abi.c
+++ b/libcilkrts/runtime/cilk-abi.c
@@ -709,7 +709,6 @@ __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
     return 0;   /* Success! */
 }
 
-
 // This function must be called only within a continuation, within the stack
 // frame of the continuation itself.
 CILK_API_INT __cilkrts_synched(void)
diff --git a/libcilkrts/runtime/cilk_fiber-unix.cpp b/libcilkrts/runtime/cilk_fiber-unix.cpp
index d59bfca..c38c49f 100644
--- a/libcilkrts/runtime/cilk_fiber-unix.cpp
+++ b/libcilkrts/runtime/cilk_fiber-unix.cpp
@@ -220,7 +220,7 @@ NORETURN cilk_fiber_sysdep::run()
         // enough extra space from the top of the stack we are
         // switching to for any temporaries required for this run()
         // function.
-        JMPBUF_SP(m_resume_jmpbuf) = m_stack_base - frame_size;
+        JMPBUF_SP(m_resume_jmpbuf) = CILK_ADJUST_SP(m_stack_base - frame_size);
 
         // GCC doesn't allow us to call __builtin_longjmp in the same function
         // that calls __builtin_setjmp, so it's been moved into it's own
diff --git a/libcilkrts/runtime/cilk_fiber.h b/libcilkrts/runtime/cilk_fiber.h
index d91687a..43057f2 100644
--- a/libcilkrts/runtime/cilk_fiber.h
+++ b/libcilkrts/runtime/cilk_fiber.h
@@ -73,9 +73,12 @@
  *
  * A value of 0 means no debugging.
  * Higher values generate more debugging output.
+ *
  */
-#define FIBER_DEBUG 0
 
+#ifndef FIBER_DEBUG
+#define FIBER_DEBUG 0
+#endif
 /**
  * @brief Flag for validating reference counts.
  * 
diff --git a/libcilkrts/runtime/config/arm/os-fence.h b/libcilkrts/runtime/config/arm/os-fence.h
index 67e157a..779a2dc 100644
--- a/libcilkrts/runtime/config/arm/os-fence.h
+++ b/libcilkrts/runtime/config/arm/os-fence.h
@@ -47,6 +47,14 @@
  *  for your assistance in helping us improve Cilk Plus.
  **************************************************************************/
 
+// __atomic_* intrinsics are available since GCC 4.7.
+#define HAVE_ATOMIC_INTRINSICS defined(__GNUC__) && \
+                               (__GNUC__ * 10 + __GNUC_MINOR__ >= 47)
+
+// GCC before 4.4 does not implement __sync_synchronize properly
+#define HAVE_SYNC_INTRINSICS defined(__GNUC__) && \
+                             (__GNUC__ * 10 + __GNUC_MINOR__ >= 44)
+
 /*
  * void __cilkrts_fence(void)
  *
@@ -60,5 +68,12 @@
  * the CPUID instruction).
  */
 
-// COMMON_SYSDEP void __cilkrts_fence(void); ///< MFENCE instruction
-# define __cilkrts_fence() __asm__ __volatile__ ("mcr   p15,0,%[t],c7,c10,4\n" :: [t] "r" (0) : "memory");
+#if HAVE_ATOMIC_INTRINSICS
+#   define __cilkrts_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#elif HAVE_SYNC_INTRINSICS
+#   define __cilkrts_fence() __sync_synchronize()
+#else
+#   define __cilkrts_fence()
+// Leaving this code just in case.
+//# define __cilkrts_fence() __asm__ __volatile__ ("mcr   p15,0,%[t],c7,c10,4\n" :: [t] "r" (0) : "memory");
+#endif
diff --git a/libcilkrts/runtime/config/sparc/cilk-abi-vla.c b/libcilkrts/runtime/config/sparc/cilk-abi-vla.c
new file mode 100644
index 0000000..cf88d99
--- /dev/null
+++ b/libcilkrts/runtime/config/sparc/cilk-abi-vla.c
@@ -0,0 +1,115 @@
+/* cilk-abi-vla.cpp                  -*-C++-*-
+ *
+ *************************************************************************
+ *
+ *  Copyright (C) 2013-2016, Intel Corporation
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  *********************************************************************
+ *  
+ *  PLEASE NOTE: This file is a downstream copy of a file mainitained in
+ *  a repository at cilkplus.org. Changes made to this file that are not
+ *  submitted through the contribution process detailed at
+ *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
+ *  time that a new version is released. Changes only submitted to the
+ *  GNU compiler collection or posted to the git repository at
+ *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
+ *  not tracked.
+ *  
+ *  We welcome your contributions to this open source project. Thank you
+ *  for your assistance in helping us improve Cilk Plus.
+ *
+ **************************************************************************/
+
+/*
+ * Implementation of Variable Length Array (VLA) ABI.
+ *
+ * The compiler calls these functions to allocate Variable Length Arrays
+ * at runtime.  The compiler must guarantee that __cilkrts_stack_free() is
+ * called to cleanup any memory allocated by __cilkrts_stack_alloc().
+ *
+ * This generic implementation always allocates the memory from the heap.
+ * Optimally, the implementation should expand the frame of the calling
+ * function if possible, since that will be faster.  See the x86 version
+ * for one possible implementation.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "internal/abi.h"
+#include "cilk-abi-vla-internal.h"
+
+// Allocate space for a variable length array
+CILK_ABI(__cilkrts_void_ptr)
+__cilkrts_stack_alloc(
+    __cilkrts_stack_frame *sf,
+    size_t size,
+    size_t distance_from_sp_to_alloca_area,
+    uint32_t align,     // align is always >= minimum stack alignment and
+                        // >= ptr_size as well, and must be a power of 2.
+    uint32_t needs_tag  // non-zero if the pointer being returned needs to
+                        // be tagged
+)
+{
+    // full_size will be a multiple of align, and contains
+    // enough extra space to allocate a marker.
+    size_t full_size = (size + align - 1) & ~(align - 1);
+
+    // Allocate memory from the heap.  The compiler is responsible
+    // for guaranteeing us a chance to free it before the function
+    // exits
+
+    return (void *)vla_internal_heap_alloc(sf, full_size, align);
+}
+
+// Free the space allocated for a variable length array.
+CILK_ABI(void)
+__cilkrts_stack_free(
+    __cilkrts_stack_frame *sf,
+    void *p,
+    size_t size,
+    size_t distance_from_sp_to_alloca_area,
+    uint32_t align, // same requirements as for align in allocation,
+                    // and must match alignment that was passed when
+                    // doing the allocation 
+    uint32_t known_from_stack  // non-zero if this is known to be allocated
+                               // on the stack, and therefore has no tag
+)
+{
+    // full_size will be a multiple of align, and contains
+    // enough extra space to allocate a marker if one was needed.
+    size_t full_size = (size + align - 1) & ~(align - 1);
+
+    // Just free the allocated memory to the heap since we don't know
+    // how to expand/contract the calling frame
+    vla_internal_heap_free(p, full_size);
+}
diff --git a/libcilkrts/runtime/config/sparc/os-fence.h b/libcilkrts/runtime/config/sparc/os-fence.h
new file mode 100644
index 0000000..24e7993
--- /dev/null
+++ b/libcilkrts/runtime/config/sparc/os-fence.h
@@ -0,0 +1,64 @@
+/* os.h                  -*-C++-*-
+ *
+ *************************************************************************
+ *
+ *  Copyright (C) 2009-2016, Intel Corporation
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  *********************************************************************
+ *  
+ *  PLEASE NOTE: This file is a downstream copy of a file mainitained in
+ *  a repository at cilkplus.org. Changes made to this file that are not
+ *  submitted through the contribution process detailed at
+ *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
+ *  time that a new version is released. Changes only submitted to the
+ *  GNU compiler collection or posted to the git repository at
+ *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
+ *  not tracked.
+ *  
+ *  We welcome your contributions to this open source project. Thank you
+ *  for your assistance in helping us improve Cilk Plus.
+ **************************************************************************/
+
+// GCC before 4.4 does not implement __sync_synchronize properly
+#define HAVE_SYNC_INTRINSICS defined(__GNUC__) && \
+                             (__GNUC__ * 10 + __GNUC_MINOR__ >= 44)
+
+/*
+ * void __cilkrts_fence(void)
+ */
+
+#if HAVE_SYNC_INTRINSICS
+#   define __cilkrts_fence() __sync_synchronize()
+#elif defined(__GNUC__)
+#   define __cilkrts_fence() __asm__ volatile ("membar #StoreLoad" ::: "memory")
+#else
+COMMON_SYSDEP void __cilkrts_fence(void);
+#endif
diff --git a/libcilkrts/runtime/config/sparc/os-unix-sysdep.c b/libcilkrts/runtime/config/sparc/os-unix-sysdep.c
new file mode 100644
index 0000000..997e9f6
--- /dev/null
+++ b/libcilkrts/runtime/config/sparc/os-unix-sysdep.c
@@ -0,0 +1,115 @@
+/* os-unix-sysdep.c                  -*-C-*-
+ *
+ *************************************************************************
+ *
+ *  Copyright (C) 2009-2016, Intel Corporation
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *  
+ *    * Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    * Redistributions in binary form must reproduce the above copyright
+ *      notice, this list of conditions and the following disclaimer in
+ *      the documentation and/or other materials provided with the
+ *      distribution.
+ *    * Neither the name of Intel Corporation nor the names of its
+ *      contributors may be used to endorse or promote products derived
+ *      from this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
+ *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
+ *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  *********************************************************************
+ *  
+ *  PLEASE NOTE: This file is a downstream copy of a file mainitained in
+ *  a repository at cilkplus.org. Changes made to this file that are not
+ *  submitted through the contribution process detailed at
+ *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
+ *  time that a new version is released. Changes only submitted to the
+ *  GNU compiler collection or posted to the git repository at
+ *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
+ *  not tracked.
+ *  
+ *  We welcome your contributions to this open source project. Thank you
+ *  for your assistance in helping us improve Cilk Plus.
+ *************************************************************************
+ *
+ * This file contains system-specific code for sparc-based systems
+ */
+
+#include "os.h"
+#include "sysdep.h"
+
+/*
+ * The cycle counter is used for debugging.  This function is only called if
+ * CILK_PROFILE is defined when the runtime is built.
+ */
+COMMON_SYSDEP unsigned long long __cilkrts_getticks(void)
+{
+    unsigned long long tick;
+#ifdef __sparcv9
+    __asm__ volatile("rd %%tick, %0" : "=r"(tick));
+#else
+    __asm__ volatile("rd %%tick, %L0\n"
+                     "srlx %L0, 32, %H0"
+                     : "=r"(tick));
+#endif
+    return tick;
+}
+
+/*
+ * A "short pause" - called from the Cilk runtime's spinloops.
+ */
+COMMON_SYSDEP void __cilkrts_short_pause(void)
+{
+    /* Spin around for 8 cycles.  */
+    __asm__ volatile("rd %ccr, %g0");
+    __asm__ volatile("rd %ccr, %g0");
+    __asm__ volatile("rd %ccr, %g0");
+    __asm__ volatile("rd %ccr, %g0");
+}
+
+/*
+ * Interlocked exchange - used to implement the Cilk runtime's spinloops
+ */
+COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x)
+{
+    x = __sync_lock_test_and_set(ptr, x);
+    return x;
+}
+
+
+/*
+ * Restore the floating point state that is stored in a stack frame at each
+ * spawn.  This should be called each time a frame is resumed.
+ *
+ * Only valid for IA32 and Intel64 processors.
+ */
+void restore_x86_fp_state (__cilkrts_stack_frame *sf)
+{
+}
+
+
+/*
+ * Save the floating point state to the __cilkrts_stack_frame at each spawn.
+ *
+ * Architecture-specific - Should only be needed on IA32 and Intel64
+ * processors.
+ */
+void sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
+{
+}
+
diff --git a/libcilkrts/runtime/except-gcc.cpp b/libcilkrts/runtime/except-gcc.cpp
index 4940acb..7fc6e6b 100644
--- a/libcilkrts/runtime/except-gcc.cpp
+++ b/libcilkrts/runtime/except-gcc.cpp
@@ -60,7 +60,9 @@
 #include <stdint.h>
 #include <typeinfo>
 
+#ifndef DEBUG_EXCEPTIONS
 #define DEBUG_EXCEPTIONS 0
+#endif
 
 struct pending_exception_info
 {
diff --git a/libcilkrts/runtime/global_state.cpp b/libcilkrts/runtime/global_state.cpp
index 2af6697..6c77b5f 100644
--- a/libcilkrts/runtime/global_state.cpp
+++ b/libcilkrts/runtime/global_state.cpp
@@ -579,7 +579,6 @@ global_state_t* cilkg_init_global_state()
     g->stack_size = cilkos_validate_stack_size(g->stack_size);
     g->failure_to_allocate_stack = 0;
 
-
     return g;
 }
 
diff --git a/libcilkrts/runtime/global_state.h b/libcilkrts/runtime/global_state.h
index 527a4b5..f5937b8 100644
--- a/libcilkrts/runtime/global_state.h
+++ b/libcilkrts/runtime/global_state.h
@@ -215,7 +215,6 @@ struct global_state_t { /* COMMON_PORTABLE */
     /// Global fiber pool
     cilk_fiber_pool fiber_pool;
 
-
     /**
      * @brief Track whether the runtime has failed to allocate a
      * stack.
diff --git a/libcilkrts/runtime/jmpbuf.h b/libcilkrts/runtime/jmpbuf.h
index 0ce7ff8..9ae2fd2 100644
--- a/libcilkrts/runtime/jmpbuf.h
+++ b/libcilkrts/runtime/jmpbuf.h
@@ -107,6 +107,18 @@
  */
 #define SP(SF) JMPBUF_SP((SF)->ctx)
 
+/**
+ * @brief Some architecture-dependent stack adjustment.
+ */
+#if defined(__sparcv9)
+    // Subtract sparc v9 stack bias so the actual stack starts at the
+    // allocated area.
+#   define CILK_ADJUST_SP(SP) ((SP) - 2047)
+#   define CILK_UNADJUST_SP(SP) ((SP) + 2047)
+#else
+#   define CILK_ADJUST_SP(SP) (SP)
+#   define CILK_UNADJUST_SP(SP) (SP)
+#endif
 
 __CILKRTS_BEGIN_EXTERN_C
 
@@ -120,7 +132,7 @@ __CILKRTS_BEGIN_EXTERN_C
  */
 inline char *__cilkrts_get_sp(__cilkrts_stack_frame *sf)
 {
-    return (char *)SP(sf);
+    return (char *)CILK_UNADJUST_SP(SP(sf));
 }
 
 /**
diff --git a/libcilkrts/runtime/linux-symbols.ver b/libcilkrts/runtime/linux-symbols.ver
index d656842..57b4e8a 100644
--- a/libcilkrts/runtime/linux-symbols.ver
+++ b/libcilkrts/runtime/linux-symbols.ver
@@ -71,7 +71,6 @@ CILKABI0
     __cilkrts_hyperobject_dealloc;
     __cilkrts_hyperobject_noop_destroy;
     __cilkrts_init;
-    __cilkrts_irml_version;
     __cilkrts_leave_frame;
     __cilkrts_metacall;
     __cilkrts_rethrow;
diff --git a/libcilkrts/runtime/local_state.h b/libcilkrts/runtime/local_state.h
index d16599f..9b28685 100644
--- a/libcilkrts/runtime/local_state.h
+++ b/libcilkrts/runtime/local_state.h
@@ -360,6 +360,13 @@ struct local_state  /* COMMON_PORTABLE */
     unsigned int steal_failure_count;
 
     /**
+     * Record indicating that the worker stole work at least once.
+     *
+     * [local read/write]
+     */
+    int has_stolen;
+
+    /**
      * 1 if work was stolen from another worker.  When true, this will flag
      * setup_for_execution_pedigree to increment the pedigree when we resume
      * execution to match the increment that would have been done on a return
diff --git a/libcilkrts/runtime/mac-symbols.txt b/libcilkrts/runtime/mac-symbols.txt
index efec3e9..29bf481 100644
--- a/libcilkrts/runtime/mac-symbols.txt
+++ b/libcilkrts/runtime/mac-symbols.txt
@@ -33,7 +33,6 @@ ___cilkrts_hyperobject_alloc
 ___cilkrts_hyperobject_dealloc
 ___cilkrts_hyperobject_noop_destroy
 ___cilkrts_init
-
 ___cilkrts_leave_frame
 ___cilkrts_metacall
 ___cilkrts_resume
diff --git a/libcilkrts/runtime/os-unix.c b/libcilkrts/runtime/os-unix.c
index d339daf..c419fb6 100644
--- a/libcilkrts/runtime/os-unix.c
+++ b/libcilkrts/runtime/os-unix.c
@@ -432,7 +432,9 @@ COMMON_SYSDEP void __cilkrts_idle(void)
 #elif defined(__MIC__)
     _mm_delay_32(1024);
 #elif defined(__linux__) || \
-      defined(__APPLE__)
+      defined(__APPLE__) || \
+      defined(__CYGWIN__)
+      
     usleep(10000);
 #else
 # error "Unsupported architecture"
@@ -452,6 +454,7 @@ COMMON_SYSDEP void __cilkrts_yield(void)
 {
 #if defined(__ANDROID__)  || \
     defined(__APPLE__)    || \
+    defined(__CYGWIN__)   || \
     defined(__FreeBSD__)  || \
     defined(__VXWORKS__)  || \
     (defined(__sun__) && defined(__svr4__))
diff --git a/libcilkrts/runtime/record-replay.cpp b/libcilkrts/runtime/record-replay.cpp
index 293c99a..d92d28f 100644
--- a/libcilkrts/runtime/record-replay.cpp
+++ b/libcilkrts/runtime/record-replay.cpp
@@ -262,7 +262,7 @@ char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode)
     if (pnode->parent)
     {
         p = walk_pedigree_nodes(p, pnode->parent);
-        p += cilk_snprintf_s(p, PEDIGREE_BUFF_SIZE, "%s", (char *) "_");
+        p += cilk_snprintf_s(p, PEDIGREE_BUFF_SIZE, "%s", (char *)"_");
     }
     return p + cilk_snprintf_l(p, PEDIGREE_BUFF_SIZE, "%" PRIu64, pnode->rank);
 }
diff --git a/libcilkrts/runtime/scheduler.c b/libcilkrts/runtime/scheduler.c
index 538c431..82c9e02 100644
--- a/libcilkrts/runtime/scheduler.c
+++ b/libcilkrts/runtime/scheduler.c
@@ -1789,20 +1789,27 @@ static full_frame* check_for_work(__cilkrts_worker *w)
         if (NULL == ff) {
             // Punish the worker for failing to steal.
             // No quantum for you!
-            if (w->l->steal_failure_count > 30000) {
-                // Punish more if the worker has been doing unsuccessful steals
-                // for a long time. After return from the idle state, it will
-                // be given a grace period to react quickly.
+            unsigned int max_fails = w->g->max_steal_failures << 1;
+            if (w->l->has_stolen == 0 &&
+                w->l->steal_failure_count % max_fails == max_fails - 1) {
+                // Idle briefly if the worker has never stolen anything for
+                // the given grace period
                 __cilkrts_idle();
-                w->l->steal_failure_count -= 300;
             } else {
                 __cilkrts_yield();
             }
             w->l->steal_failure_count++;
+            if (w->l->steal_failure_count > (max_fails << 8)) {
+                // Reset the flag after certain amount of failures
+                // - This will reduce cpu time in top-level synched regions
+                // - max_fails can be controlled by user (CILK_STEAL_FAILURES)
+                w->l->has_stolen = 0;
+            }
         } else {
             // Reset steal_failure_count since there is obviously still work to
             // be done.
             w->l->steal_failure_count = 0;
+            w->l->has_stolen = 1;
         }
     }
     return ff;
@@ -2912,6 +2919,7 @@ __cilkrts_worker *make_worker(global_state_t *g,
     w->l->stats = NULL;
 #endif    
     w->l->steal_failure_count = 0;
+    w->l->has_stolen = 0;
 
     w->l->work_stolen = 0;
 
diff --git a/libcilkrts/runtime/scheduler.h b/libcilkrts/runtime/scheduler.h
index 74c4509..9546001 100644
--- a/libcilkrts/runtime/scheduler.h
+++ b/libcilkrts/runtime/scheduler.h
@@ -85,7 +85,10 @@ __CILKRTS_BEGIN_EXTERN_C
  * Print debugging messages and assertions for parallel reducers. 0 is
  * no debugging.  A higher value generates more output.
  */
+
+#ifndef REDPAR_DEBUG
 #define REDPAR_DEBUG 0
+#endif
 
 /**
  * @brief Lock the worker mutex to allow exclusive access to the
diff --git a/libcilkrts/runtime/sysdep-unix.c b/libcilkrts/runtime/sysdep-unix.c
index 611934a..0b99008 100644
--- a/libcilkrts/runtime/sysdep-unix.c
+++ b/libcilkrts/runtime/sysdep-unix.c
@@ -465,7 +465,7 @@ char* sysdep_reset_jump_buffers_for_resume(cilk_fiber* fiber,
 
     CILK_ASSERT(fiber);
     void* sp = (void*)get_sp_for_executing_sf(cilk_fiber_get_stack_base(fiber), ff, sf);
-    SP(sf) = sp;
+    SP(sf) = CILK_ADJUST_SP(sp);
 
     /* Debugging: make sure stack is accessible. */
     ((volatile char *)sp)[-1];
@@ -495,7 +495,7 @@ NORETURN sysdep_longjmp_to_sf(char* new_sp,
 #endif
 
     // Set the stack pointer.
-    SP(sf) = new_sp;
+    SP(sf) = CILK_ADJUST_SP(new_sp);
 
 #ifdef RESTORE_X86_FP_STATE
     if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) {
@@ -568,7 +568,7 @@ static const char *get_runtime_path ()
 {
     // dladdr is a glibc extension. If it's available, use it to find the path
     // for libcilkrts.so
-#ifdef _GNU_SOURCE
+#if HAVE_DLADDR
     Dl_info info;
     if (0 != dladdr(dummy_function, &info))
         return info.dli_fname;
@@ -689,7 +689,6 @@ static void write_version_file (global_state_t *g, int n)
     // ==================
     // System cores: 8
     // Cilk workers requested: 8
-    // Thread creator: Private
 
     fprintf(fp, "\nThread information\n");
     fprintf(fp, "==================\n");
@@ -699,11 +698,6 @@ static void write_version_file (global_state_t *g, int n)
     fprintf(fp, "System cores: %d\n", (int)sysconf(_SC_NPROCESSORS_ONLN));
 #endif    
     fprintf(fp, "Cilk workers requested: %d\n", n);
-#if (PARALLEL_THREAD_CREATE)
-        fprintf(fp, "Thread creator: Private (parallel)\n");
-#else
-        fprintf(fp, "Thread creator: Private\n");
-#endif
 
     if (fp != stderr && fp != stdout) fclose(fp);
     else fflush(fp); // flush the handle buffer if it is stdout or stderr.
-- 
cgit v1.1


From 56e1a4d7127256bb3f476a6d93954b1948b03985 Mon Sep 17 00:00:00 2001
From: Markus Trippelsdorf <markus@trippelsdorf.de>
Date: Thu, 1 Dec 2016 14:59:03 +0000
Subject: Fix PR tree-optimization/78598 - tree-ssa-loop-prefetch.c:835:16:
 runtime error: signed integer overflow

Using bootstrap-ubsan gcc to build mplayer shows:

tree-ssa-loop-prefetch.c:835:16: runtime error: signed integer overflow:
288230376151711743 * 64 cannot be represented in type 'long int'

Here signed und unsigned integers are mixed in a division resulting in
bogus values: (-83 + 64ULL -1) / 64ULL) == 288230376151711743

Fixed by casting the unsigned parameter to signed.

	PR tree-optimization/78598
	* tree-ssa-loop-prefetch.c (ddown): Cast to signed to avoid
	overflows.

From-SVN: r243113
---
 gcc/ChangeLog                | 6 ++++++
 gcc/tree-ssa-loop-prefetch.c | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b90cbc6..17e9831 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
 2016-12-01  Markus Trippelsdorf  <markus@trippelsdorf.de>
 
+	PR tree-optimization/78598
+	* tree-ssa-loop-prefetch.c (ddown): Cast to signed to avoid
+	overflows.
+
+2016-12-01  Markus Trippelsdorf  <markus@trippelsdorf.de>
+
 	PR rtl-optimization/78596
 	* combine.c (simplify_comparison): Cast to unsigned to avoid
 	left shifting of negative value.
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 0a2ee5e..ead2543 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -700,9 +700,9 @@ ddown (HOST_WIDE_INT x, unsigned HOST_WIDE_INT by)
   gcc_assert (by > 0);
 
   if (x >= 0)
-    return x / by;
+    return x / (HOST_WIDE_INT) by;
   else
-    return (x + by - 1) / by;
+    return (x + (HOST_WIDE_INT) by - 1) / (HOST_WIDE_INT) by;
 }
 
 /* Given a CACHE_LINE_SIZE and two inductive memory references
-- 
cgit v1.1


From d95fe8017d40a6f0df671fd3e6a5fdc5b1d5319d Mon Sep 17 00:00:00 2001
From: James Greenhalgh <james.greenhalgh@arm.com>
Date: Thu, 1 Dec 2016 15:33:29 +0000
Subject: [Patch testsuite obvious] Use setjmp, not sigsetjmp in
 gcc.dg/pr78582.c

gcc/testsuite/

	* gcc.dg/pr78582.c (main): Call setjmp, not sigsetjmp.

From-SVN: r243116
---
 gcc/testsuite/ChangeLog        | 4 ++++
 gcc/testsuite/gcc.dg/pr78582.c | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 447d9fb..2a04091 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	* gcc.dg/pr78582.c (main): Call setjmp, not sigsetjmp.
+
 2016-12-01  Richard Biener  <rguenther@suse.de>
 
 	* gcc.dg/torture/alias-2.c: New testcase.
diff --git a/gcc/testsuite/gcc.dg/pr78582.c b/gcc/testsuite/gcc.dg/pr78582.c
index 3084e3b..5284e3f 100644
--- a/gcc/testsuite/gcc.dg/pr78582.c
+++ b/gcc/testsuite/gcc.dg/pr78582.c
@@ -10,7 +10,7 @@ int
 main (int argc, char argv, char env)
 {
   int a;
-  sigsetjmp (0, 0);
+  setjmp (0);
   argc = a = argc;
   reader_loop ();
 
-- 
cgit v1.1


From a9c21e2a4f44175eab8588d794a3ea3ce2fa8d0b Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <avr@gjlay.de>
Date: Thu, 1 Dec 2016 15:56:58 +0000
Subject: avr.c: Fix coding rule glitches.

	* config/avr/avr.c: Fix coding rule glitches.

From-SVN: r243118
---
 gcc/ChangeLog        |   4 ++
 gcc/config/avr/avr.c | 152 +++++++++++++++++++++++++--------------------------
 2 files changed, 80 insertions(+), 76 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 17e9831..5b2570b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  Georg-Johann Lay  <avr@gjlay.de>
+
+	* config/avr/avr.c: Fix coding rule glitches.
+
 2016-12-01  Markus Trippelsdorf  <markus@trippelsdorf.de>
 
 	PR tree-optimization/78598
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index db3c55f..b7fd8798 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -388,7 +388,7 @@ avr_parallel_insn_from_insns (rtx_insn *i[6])
    If this is the case, fill in the insns from casesi to INSNS[1..5] and
    the SImode extension to INSNS[0].  Moreover, extract the operands of
    pattern casesi_<mode>_sequence forged from the sequence to recog_data.  */
-   
+
 static bool
 avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[6])
 {
@@ -702,7 +702,7 @@ avr_set_core_architecture (void)
           break;
         }
       else if (0 == strcmp (mcu->name, avr_mmcu)
-               // Is this a proper architecture ? 
+               // Is this a proper architecture ?
                && NULL == mcu->macro)
         {
           avr_arch = &avr_arch_types[mcu->arch_id];
@@ -1078,7 +1078,7 @@ avr_set_current_function (tree decl)
 
       if (!STR_PREFIX_P (name, "__vector"))
         warning_at (loc, OPT_Wmisspelled_isr, "%qs appears to be a misspelled "
-                           "%s handler, missing __vector prefix", name, isr);
+                    "%s handler, missing __vector prefix", name, isr);
     }
 
   /* Don't print the above diagnostics more than once.  */
@@ -1163,7 +1163,7 @@ avr_regs_to_save (HARD_REG_SET *set)
               /* Don't record frame pointer registers here.  They are treated
                  indivitually in prologue.  */
               && !(frame_pointer_needed
-                   && (reg == REG_Y || reg == (REG_Y+1)))))
+                   && (reg == REG_Y || reg == REG_Y + 1))))
         {
           if (set)
             SET_HARD_REG_BIT (*set, reg);
@@ -1374,7 +1374,7 @@ sequent_regs_live (void)
       else
         cur_seq = 0;
 
-      if (df_regs_ever_live_p (REG_Y+1))
+      if (df_regs_ever_live_p (REG_Y + 1))
         {
           ++live_seq;
           ++cur_seq;
@@ -1807,7 +1807,8 @@ avr_expand_prologue (void)
   avr_prologue_setup_frame (size, set);
 
   if (flag_stack_usage_info)
-    current_function_static_stack_size = cfun->machine->stack_usage + INCOMING_FRAME_SP_OFFSET;
+    current_function_static_stack_size
+      = cfun->machine->stack_usage + INCOMING_FRAME_SP_OFFSET;
 }
 
 
@@ -1840,9 +1841,9 @@ avr_asm_function_end_prologue (FILE *file)
              avr_outgoing_args_size());
 
   fprintf (file, "/* frame size = " HOST_WIDE_INT_PRINT_DEC " */\n",
-                 get_frame_size());
+           get_frame_size());
   fprintf (file, "/* stack size = %d */\n",
-                 cfun->machine->stack_usage);
+           cfun->machine->stack_usage);
   /* Create symbol stack offset here so all functions have it. Add 1 to stack
      usage for offset so that SP + .L__stack_offset = return address.  */
   fprintf (file, ".L__stack_usage = %d\n", cfun->machine->stack_usage);
@@ -2522,7 +2523,7 @@ avr_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
           rtx x = addr;
           if (GET_CODE (x) == CONST)
             x = XEXP (x, 0);
-          if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x,1)))
+          if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)))
             {
               /* Assembler gs() will implant word address.  Make offset
                  a byte offset inside gs() for assembler.  This is
@@ -2532,14 +2533,14 @@ avr_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
                  from symbol which may not be what the user really wanted.  */
 
               fprintf (file, "gs(");
-              output_addr_const (file, XEXP (x,0));
+              output_addr_const (file, XEXP (x, 0));
               fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC ")",
                        2 * INTVAL (XEXP (x, 1)));
               if (AVR_3_BYTE_PC)
                 if (warning (0, "pointer offset from symbol maybe incorrect"))
                   {
                     output_addr_const (stderr, addr);
-                    fprintf(stderr,"\n");
+                    fprintf (stderr, "\n");
                   }
             }
           else
@@ -2617,12 +2618,12 @@ avr_print_operand (FILE *file, rtx x, int code)
     }
   else if (code == 'E' || code == 'F')
     {
-      rtx op = XEXP(x, 0);
+      rtx op = XEXP (x, 0);
       fprintf (file, "%s", reg_names[REGNO (op) + ef]);
     }
   else if (code == 'I' || code == 'J')
     {
-      rtx op = XEXP(XEXP(x, 0), 0);
+      rtx op = XEXP (XEXP (x, 0), 0);
       fprintf (file, "%s", reg_names[REGNO (op) + ij]);
     }
   else if (REG_P (x))
@@ -2714,12 +2715,12 @@ avr_print_operand (FILE *file, rtx x, int code)
         }
       else if (GET_CODE (addr) == PLUS)
         {
-          avr_print_operand_address (file, VOIDmode, XEXP (addr,0));
+          avr_print_operand_address (file, VOIDmode, XEXP (addr, 0));
           if (REGNO (XEXP (addr, 0)) == REG_X)
             fatal_insn ("internal compiler error.  Bad address:"
                         ,addr);
           fputc ('+', file);
-          avr_print_operand (file, XEXP (addr,1), code);
+          avr_print_operand (file, XEXP (addr, 1), code);
         }
       else
         avr_print_operand_address (file, VOIDmode, addr);
@@ -2753,7 +2754,7 @@ avr_print_operand (FILE *file, rtx x, int code)
                                 code);
       fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
     }
-  else if (GET_CODE (x) == CONST_DOUBLE)
+  else if (CONST_DOUBLE_P (x))
     {
       long val;
       if (GET_MODE (x) != SFmode)
@@ -2781,15 +2782,15 @@ avr_print_operand (FILE *file, rtx x, int code)
 
 static bool
 avr_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
-				     unsigned int align ATTRIBUTE_UNUSED,
-				     enum by_pieces_operation op,
-				     bool speed_p)
+                                    unsigned int align ATTRIBUTE_UNUSED,
+                                    enum by_pieces_operation op,
+                                    bool speed_p)
 {
-
-  if (op != MOVE_BY_PIECES || (speed_p && (size > (MOVE_MAX_PIECES))))
+  if (op != MOVE_BY_PIECES
+      || (speed_p && size > MOVE_MAX_PIECES))
     return default_use_by_pieces_infrastructure_p (size, align, op, speed_p);
 
-  return size <= (MOVE_MAX_PIECES);
+  return size <= MOVE_MAX_PIECES;
 }
 
 
@@ -2951,9 +2952,9 @@ avr_jump_mode (rtx x, rtx_insn *insn)
   int cur_addr = INSN_ADDRESSES (INSN_UID (insn));
   int jump_distance = cur_addr - dest_addr;
 
-  if (-63 <= jump_distance && jump_distance <= 62)
+  if (IN_RANGE (jump_distance, -63, 62))
     return 1;
-  else if (-2046 <= jump_distance && jump_distance <= 2045)
+  else if (IN_RANGE (jump_distance, -2046, 2045))
     return 2;
   else if (AVR_HAVE_JMP_CALL)
     return 3;
@@ -3113,9 +3114,9 @@ avr_simplify_comparison_p (machine_mode mode, RTX_CODE op, rtx x)
    register in which function arguments are sometimes passed.  */
 
 int
-avr_function_arg_regno_p(int r)
+avr_function_arg_regno_p (int r)
 {
-  return (AVR_TINY ? r >= 20 && r <= 25 : r >= 8 && r <= 25);
+  return AVR_TINY ? IN_RANGE (r, 20, 25) : IN_RANGE (r, 8, 25);
 }
 
 
@@ -3526,8 +3527,8 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen)
         }
       else
         {
-          avr_asm_len ("mov %5,%2"         CR_TAB
-                       "ldi %2,%4"         CR_TAB
+          avr_asm_len ("mov %5,%2"   CR_TAB
+                       "ldi %2,%4"   CR_TAB
                        "out %i6,%2"  CR_TAB
                        "mov %2,%5", xop, plen, 4);
         }
@@ -3595,7 +3596,7 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen)
 
           if (REGNO (dest) == REG_Z - 2)
             return avr_asm_len ("%4lpm %5,%a2+" CR_TAB
-                                "%4lpm %C0,%a2"          CR_TAB
+                                "%4lpm %C0,%a2" CR_TAB
                                 "mov %D0,%5", xop, plen, 3);
           else
             {
@@ -3801,8 +3802,8 @@ avr_out_movqi_r_mr_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
   avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB
                "ld %0,%b1" , op, plen, -3);
 
-  if (!reg_overlap_mentioned_p (dest, XEXP (x,0))
-      && !reg_unused_after (insn, XEXP (x,0)))
+  if (!reg_overlap_mentioned_p (dest, XEXP (x, 0))
+      && !reg_unused_after (insn, XEXP (x, 0)))
     avr_asm_len (TINY_SBIW (%I1, %J1, %o1), op, plen, 2);
 
   return "";
@@ -3858,8 +3859,8 @@ out_movqi_r_mr (rtx_insn *insn, rtx op[], int *plen)
           avr_asm_len ("adiw r26,%o1" CR_TAB
                        "ld %0,X", op, plen, -2);
 
-          if (!reg_overlap_mentioned_p (dest, XEXP (x,0))
-              && !reg_unused_after (insn, XEXP (x,0)))
+          if (!reg_overlap_mentioned_p (dest, XEXP (x, 0))
+              && !reg_unused_after (insn, XEXP (x, 0)))
             {
               avr_asm_len ("sbiw r26,%o1", op, plen, 1);
             }
@@ -3891,7 +3892,7 @@ avr_out_movhi_r_mr_reg_no_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
 			"ld %B0,%1"          CR_TAB
 			"mov %A0,__tmp_reg__", op, plen, -3);
 
-  avr_asm_len ("ld %A0,%1+"                  CR_TAB
+  avr_asm_len ("ld %A0,%1+" CR_TAB
                "ld %B0,%1", op, plen, -2);
 
   if (!reg_unused_after (insn, base))
@@ -4228,12 +4229,12 @@ out_movsi_r_mr (rtx_insn *insn, rtx op[], int *l)
                           "ld %D0,X"           CR_TAB
                           "mov %C0,__tmp_reg__");
           else if (reg_unused_after (insn, base))
-            return  *l=4, ("ld %A0,X+"  CR_TAB
+            return  *l=4, ("ld %A0,X+" CR_TAB
                            "ld %B0,X+" CR_TAB
                            "ld %C0,X+" CR_TAB
                            "ld %D0,X");
           else
-            return  *l=5, ("ld %A0,X+"  CR_TAB
+            return  *l=5, ("ld %A0,X+" CR_TAB
                            "ld %B0,X+" CR_TAB
                            "ld %C0,X+" CR_TAB
                            "ld %D0,X"  CR_TAB
@@ -4873,7 +4874,7 @@ avr_out_load_psi (rtx_insn *insn, rtx *op, int *plen)
 
           return avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
                               "sbci r29,hi8(-%o1)" CR_TAB
-                              "ld  %A0,Y"           CR_TAB
+                              "ld  %A0,Y"          CR_TAB
                               "ldd %B0,Y+1"        CR_TAB
                               "ldd %C0,Y+2"        CR_TAB
                               "subi r28,lo8(%o1)"  CR_TAB
@@ -5196,7 +5197,7 @@ avr_out_movqi_mr_r_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
                    "st %b0,%1", op, plen, -3);
     }
 
-  if (!reg_unused_after (insn, XEXP (x,0)))
+  if (!reg_unused_after (insn, XEXP (x, 0)))
       avr_asm_len (TINY_SBIW (%I0, %J0, %o0), op, plen, 2);
 
   return "";
@@ -5243,7 +5244,7 @@ out_movqi_mr_r (rtx_insn *insn, rtx op[], int *plen)
                               "subi r28,lo8(%o0)"  CR_TAB
                               "sbci r29,hi8(%o0)", op, plen, -5);
         }
-      else if (REGNO (XEXP (x,0)) == REG_X)
+      else if (REGNO (XEXP (x, 0)) == REG_X)
         {
           if (reg_overlap_mentioned_p (src, XEXP (x, 0)))
             {
@@ -5257,7 +5258,7 @@ out_movqi_mr_r (rtx_insn *insn, rtx op[], int *plen)
                            "st X,%1", op, plen, -2);
             }
 
-          if (!reg_unused_after (insn, XEXP (x,0)))
+          if (!reg_unused_after (insn, XEXP (x, 0)))
             avr_asm_len ("sbiw r26,%o0", op, plen, 1);
 
           return "";
@@ -5403,7 +5404,7 @@ avr_out_movhi_mr_r_reg_no_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
                        "st %0,__tmp_reg__", op, plen, -5)
         : avr_asm_len ("mov __tmp_reg__,%B1"   CR_TAB
                        TINY_ADIW (%E0, %F0, 1) CR_TAB
-                       "st %0,__tmp_reg__"      CR_TAB
+                       "st %0,__tmp_reg__"     CR_TAB
                        TINY_SBIW (%E0, %F0, 1) CR_TAB
                        "st %0, %A1", op, plen, -7);
     }
@@ -6200,9 +6201,9 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *len)
 	      return ("swap %A0"    CR_TAB
 		      "swap %B0"    CR_TAB
 		      "ldi %3,0xf0" CR_TAB
-		      "and %B0,%3"      CR_TAB
+		      "and %B0,%3"  CR_TAB
 		      "eor %B0,%A0" CR_TAB
-		      "and %A0,%3"      CR_TAB
+		      "and %A0,%3"  CR_TAB
 		      "eor %B0,%A0");
 	    }
 	  break;  /* optimize_size ? 6 : 8 */
@@ -6230,9 +6231,9 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *len)
 		      "swap %A0"    CR_TAB
 		      "swap %B0"    CR_TAB
 		      "ldi %3,0xf0" CR_TAB
-		      "and %B0,%3"      CR_TAB
+		      "and %B0,%3"  CR_TAB
 		      "eor %B0,%A0" CR_TAB
-		      "and %A0,%3"      CR_TAB
+		      "and %A0,%3"  CR_TAB
 		      "eor %B0,%A0");
 	    }
 	  break;  /* 10 */
@@ -6344,7 +6345,7 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *len)
 	  if (AVR_HAVE_MUL)
 	    {
 	      *len = 6;
-	      return ("set"            CR_TAB
+	      return ("set"        CR_TAB
 		      "bld r1,5"   CR_TAB
 		      "mul %A0,r1" CR_TAB
 		      "mov %B0,r0" CR_TAB
@@ -7095,9 +7096,9 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len)
 	      return ("swap %B0"    CR_TAB
 		      "swap %A0"    CR_TAB
 		      "ldi %3,0x0f" CR_TAB
-		      "and %A0,%3"      CR_TAB
+		      "and %A0,%3"  CR_TAB
 		      "eor %A0,%B0" CR_TAB
-		      "and %B0,%3"      CR_TAB
+		      "and %B0,%3"  CR_TAB
 		      "eor %A0,%B0");
 	    }
 	  break;  /* optimize_size ? 6 : 8 */
@@ -7125,9 +7126,9 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len)
 		      "swap %B0"    CR_TAB
 		      "swap %A0"    CR_TAB
 		      "ldi %3,0x0f" CR_TAB
-		      "and %A0,%3"      CR_TAB
+		      "and %A0,%3"  CR_TAB
 		      "eor %A0,%B0" CR_TAB
-		      "and %B0,%3"      CR_TAB
+		      "and %B0,%3"  CR_TAB
 		      "eor %A0,%B0");
 	    }
 	  break;  /* 10 */
@@ -7239,7 +7240,7 @@ lshrhi3_out (rtx_insn *insn, rtx operands[], int *len)
 	  if (AVR_HAVE_MUL)
 	    {
 	      *len = 6;
-	      return ("set"            CR_TAB
+	      return ("set"        CR_TAB
 		      "bld r1,3"   CR_TAB
 		      "mul %B0,r1" CR_TAB
 		      "mov %A0,r1" CR_TAB
@@ -7575,7 +7576,7 @@ avr_out_plus_1 (rtx *xop, int *plen, enum rtx_code code, int *pcc,
          where this must be done is when NEG overflowed in case [2s] because
          the V computation needs the right sign of the subtrahend.  */
 
-      rtx msb = simplify_gen_subreg (QImode, xop[0], mode, n_bytes-1);
+      rtx msb = simplify_gen_subreg (QImode, xop[0], mode, n_bytes - 1);
 
       avr_asm_len ("subi %0,128" CR_TAB
                    "brmi 0f", &msb, plen, 2);
@@ -8257,9 +8258,9 @@ avr_out_sign_extend (rtx_insn *insn, rtx *xop, int *plen)
       avr_asm_len ("mov __tmp_reg__,%0", &r_msb, plen, 1);
       r_msb = tmp_reg_rtx;
     }
-  
+
   avr_asm_len ("lsl %0", &r_msb, plen, 1);
-                   
+
   // ...and propagate it to all the new sign bits
 
   for (unsigned n = n_src; n < n_dest; n++)
@@ -8374,7 +8375,7 @@ avr_out_insert_notbit (rtx_insn *insn, rtx operands[], rtx xbitno, int *plen)
 
       avr_asm_len ("bld %0,%1", op, plen, 1);
     }
-              
+
   return "";
 }
 
@@ -9236,7 +9237,7 @@ int
 reg_unused_after (rtx_insn *insn, rtx reg)
 {
   return (dead_or_set_p (insn, reg)
-	  || (REG_P(reg) && _reg_unused_after (insn, reg)));
+	  || (REG_P (reg) && _reg_unused_after (insn, reg)));
 }
 
 /* Return nonzero if REG is not used after INSN.
@@ -9253,7 +9254,7 @@ _reg_unused_after (rtx_insn *insn, rtx reg)
      case.  Disregard the case where this is a store to memory, since
      we are checking a register used in the store address.  */
   set = single_set (insn);
-  if (set && GET_CODE (SET_DEST (set)) != MEM
+  if (set && !MEM_P (SET_DEST (set))
       && reg_overlap_mentioned_p (reg, SET_DEST (set)))
     return 1;
 
@@ -9305,7 +9306,7 @@ _reg_unused_after (rtx_insn *insn, rtx reg)
 		return 0;
 	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
 		{
-		  if (GET_CODE (SET_DEST (set)) != MEM)
+		  if (!MEM_P (SET_DEST (set)))
 		    retval = 1;
 		  else
 		    return 0;
@@ -9337,7 +9338,7 @@ _reg_unused_after (rtx_insn *insn, rtx reg)
       if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
 	return 0;
       if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
-	return GET_CODE (SET_DEST (set)) != MEM;
+	return !MEM_P (SET_DEST (set));
       if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
 	return 0;
     }
@@ -9640,7 +9641,7 @@ avr_attribute_table[] =
 /* Return true if we support address space AS for the architecture in effect
    and false, otherwise.  If LOC is not UNKNOWN_LOCATION then also issue
    a respective error.  */
-   
+
 bool
 avr_addr_space_supported_p (addr_space_t as, location_t loc)
 {
@@ -10582,7 +10583,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
 	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
 					      speed);
 	    }
-	  else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+	  else if (IN_RANGE (INTVAL (XEXP (x, 1)), -63, 63))
 	    *total = COSTS_N_INSNS (1);
 	  else
 	    *total = COSTS_N_INSNS (2);
@@ -10595,7 +10596,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
               *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
                                               speed);
             }
-          else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+          else if (IN_RANGE (INTVAL (XEXP (x, 1)), -63, 63))
             *total = COSTS_N_INSNS (2);
           else
             *total = COSTS_N_INSNS (3);
@@ -10608,7 +10609,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
 	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
 					      speed);
 	    }
-	  else if (INTVAL (XEXP (x, 1)) >= -63 && INTVAL (XEXP (x, 1)) <= 63)
+	  else if (IN_RANGE (INTVAL (XEXP (x, 1)), -63, 63))
 	    *total = COSTS_N_INSNS (1);
 	  else
 	    *total = COSTS_N_INSNS (4);
@@ -11323,8 +11324,7 @@ static bool
 avr_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	       int opno, int *total, bool speed)
 {
-  bool done = avr_rtx_costs_1 (x, mode, outer_code,
-                               opno, total, speed);
+  bool done = avr_rtx_costs_1 (x, mode, outer_code, opno, total, speed);
 
   if (avr_log.rtx_costs)
     {
@@ -11658,7 +11658,7 @@ avr_reorg (void)
             {
               rtx x = XEXP (pattern, 0);
               rtx src = SET_SRC (pat);
-              rtx t = XEXP (src,0);
+              rtx t = XEXP (src, 0);
               PUT_CODE (t, swap_condition (GET_CODE (t)));
               XEXP (pattern, 0) = XEXP (pattern, 1);
               XEXP (pattern, 1) = x;
@@ -11669,7 +11669,7 @@ avr_reorg (void)
             {
               /* This is a tst insn, we can reverse it.  */
               rtx src = SET_SRC (pat);
-              rtx t = XEXP (src,0);
+              rtx t = XEXP (src, 0);
 
               PUT_CODE (t, swap_condition (GET_CODE (t)));
               XEXP (pattern, 1) = XEXP (pattern, 0);
@@ -11682,7 +11682,7 @@ avr_reorg (void)
             {
               rtx x = XEXP (pattern, 1);
               rtx src = SET_SRC (pat);
-              rtx t = XEXP (src,0);
+              rtx t = XEXP (src, 0);
               machine_mode mode = GET_MODE (XEXP (pattern, 0));
 
               if (avr_simplify_comparison_p (mode, GET_CODE (t), x))
@@ -11889,8 +11889,8 @@ avr_hard_regno_call_part_clobbered (unsigned regno, machine_mode mode)
   /* Return true if any of the following boundaries is crossed:
      17/18 or 19/20 (if AVR_TINY), 27/28 and 29/30.  */
 
-  return ((regno <= LAST_CALLEE_SAVED_REG &&
-           regno + GET_MODE_SIZE (mode) > (LAST_CALLEE_SAVED_REG + 1))
+  return ((regno <= LAST_CALLEE_SAVED_REG
+           && regno + GET_MODE_SIZE (mode) > 1 + LAST_CALLEE_SAVED_REG)
           || (regno < REG_Y && regno + GET_MODE_SIZE (mode) > REG_Y)
           || (regno < REG_Z && regno + GET_MODE_SIZE (mode) > REG_Z));
 }
@@ -12309,7 +12309,7 @@ avr_output_addr_vec_elt (FILE *stream, int value)
 }
 
 static void
-avr_conditional_register_usage(void)
+avr_conditional_register_usage (void)
 {
   if (AVR_TINY)
     {
@@ -13191,13 +13191,13 @@ avr_expand_delay_cycles (rtx operands0)
 
   while (cycles >= 2)
     {
-      emit_insn (gen_nopv (GEN_INT(2)));
+      emit_insn (gen_nopv (GEN_INT (2)));
       cycles -= 2;
     }
 
   if (cycles == 1)
     {
-      emit_insn (gen_nopv (GEN_INT(1)));
+      emit_insn (gen_nopv (GEN_INT (1)));
       cycles--;
     }
 }
@@ -13807,7 +13807,7 @@ avr_default_expand_builtin (enum insn_code icode, tree exp, rtx target)
       tree arg = CALL_EXPR_ARG (exp, n);
       rtx op = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
       machine_mode opmode = GET_MODE (op);
-      machine_mode mode = insn_data[icode].operand[n+1].mode;
+      machine_mode mode = insn_data[icode].operand[n + 1].mode;
 
       if ((opmode == SImode || opmode == VOIDmode) && mode == HImode)
         {
@@ -13820,7 +13820,7 @@ avr_default_expand_builtin (enum insn_code icode, tree exp, rtx target)
 
       gcc_assert (opmode == mode || opmode == VOIDmode);
 
-      if (!insn_data[icode].operand[n+1].predicate (op, mode))
+      if (!insn_data[icode].operand[n + 1].predicate (op, mode))
         op = copy_to_mode_reg (mode, op);
 
       xop[n] = op;
@@ -13870,7 +13870,7 @@ avr_expand_builtin (tree exp, rtx target,
   switch (id)
     {
     case AVR_BUILTIN_NOP:
-      emit_insn (gen_nopv (GEN_INT(1)));
+      emit_insn (gen_nopv (GEN_INT (1)));
       return 0;
 
     case AVR_BUILTIN_DELAY_CYCLES:
-- 
cgit v1.1


From a2863bde755d39626ee25e3b7a8875e0d93d7217 Mon Sep 17 00:00:00 2001
From: Ville Voutilainen <ville.voutilainen@gmail.com>
Date: Thu, 1 Dec 2016 18:23:21 +0200
Subject: Implement LWG 2766,

Swapping non-swappable types and LWG 2749,
swappable traits for variants.
* include/bits/move.h (swap(_Tp&, _Tp&)): Constrain
with __is_tuple_like.
* include/bits/stl_pair.h (swap(pair<_T1, _T2>&, pair<_T1, _T2>&)):
Add a deleted overload.
* include/bits/unique_ptr.h
(swap(unique_ptr<_Tp, _Dp>&, unique_ptr<_Tp, _Dp>&)): Likewise.
* include/std/array
(swap(array<_Tp, _Nm>&, array<_Tp, _Nm>&)): Likewise.
* include/std/optional
(swap(optional<_Tp>&, optional<_Tp>&)): Likewise.
* include/std/tuple (__is_tuple_like_impl, __is_tuple_like):
Move to type_traits.
(swap(tuple<_Elements...>&, tuple<_Elements...>&)): Add a deleted
overload.
* include/std/type_traits (__is_tuple_like_impl, __is_tuple_like):
New.
(swap(_Tp&, _Tp&)): Constrain with __is_tuple_like.
* include/std/utility (__is_tuple_like_impl): Move to type_traits.
* include/std/variant
(swap(variant<_Types...>&, variant<_Types...>&)):
Add a deleted overload.
* testsuite/20_util/optional/swap/2.cc: Add tests for disabled
swaps.
* testsuite/20_util/pair/swap_cxx17.cc: New.
* testsuite/20_util/tuple/swap_cxx17.cc: Likewise.
* testsuite/20_util/unique_ptr/specialized_algorithms/swap_cxx17.cc:
Likewise.
* testsuite/20_util/variant/compile.cc: Add tests for disabled
swaps.
* testsuite/23_containers/array/specialized_algorithms/swap_cxx17.cc:
New.
* testsuite/23_containers/array/tuple_interface/get_neg.cc: Adjust.
* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
Likewise.

From-SVN: r243120
---
 libstdc++-v3/ChangeLog                             | 40 ++++++++++++++++++++
 libstdc++-v3/include/bits/move.h                   |  3 +-
 libstdc++-v3/include/bits/stl_pair.h               |  8 ++++
 libstdc++-v3/include/bits/unique_ptr.h             |  8 ++++
 libstdc++-v3/include/std/array                     |  8 ++++
 libstdc++-v3/include/std/optional                  |  4 ++
 libstdc++-v3/include/std/tuple                     | 18 ++++-----
 libstdc++-v3/include/std/type_traits               | 21 ++++++++++-
 libstdc++-v3/include/std/utility                   |  4 --
 libstdc++-v3/include/std/variant                   | 11 +++++-
 libstdc++-v3/testsuite/20_util/optional/swap/2.cc  |  4 +-
 libstdc++-v3/testsuite/20_util/pair/swap_cxx17.cc  | 35 ++++++++++++++++++
 libstdc++-v3/testsuite/20_util/tuple/swap_cxx17.cc | 43 ++++++++++++++++++++++
 .../specialized_algorithms/swap_cxx17.cc           | 33 +++++++++++++++++
 libstdc++-v3/testsuite/20_util/variant/compile.cc  | 15 ++++++++
 .../array/specialized_algorithms/swap_cxx17.cc     | 33 +++++++++++++++++
 .../23_containers/array/tuple_interface/get_neg.cc |  4 +-
 .../array/tuple_interface/tuple_element_neg.cc     |  2 +-
 18 files changed, 270 insertions(+), 24 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/20_util/pair/swap_cxx17.cc
 create mode 100644 libstdc++-v3/testsuite/20_util/tuple/swap_cxx17.cc
 create mode 100644 libstdc++-v3/testsuite/20_util/unique_ptr/specialized_algorithms/swap_cxx17.cc
 create mode 100644 libstdc++-v3/testsuite/23_containers/array/specialized_algorithms/swap_cxx17.cc

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index cc0b3ae..21404f16 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,45 @@
 2016-12-01  Ville Voutilainen  <ville.voutilainen@gmail.com>
 
+	Implement LWG 2766,
+	Swapping non-swappable types and LWG 2749,
+	swappable traits for variants.
+	* include/bits/move.h (swap(_Tp&, _Tp&)): Constrain
+	with __is_tuple_like.
+	* include/bits/stl_pair.h (swap(pair<_T1, _T2>&, pair<_T1, _T2>&)):
+	Add a deleted overload.
+	* include/bits/unique_ptr.h
+	(swap(unique_ptr<_Tp, _Dp>&, unique_ptr<_Tp, _Dp>&)): Likewise.
+	* include/std/array
+	(swap(array<_Tp, _Nm>&, array<_Tp, _Nm>&)): Likewise.
+	* include/std/optional
+	(swap(optional<_Tp>&, optional<_Tp>&)): Likewise.
+	* include/std/tuple (__is_tuple_like_impl, __is_tuple_like):
+	Move to type_traits.
+	(swap(tuple<_Elements...>&, tuple<_Elements...>&)): Add a deleted
+	overload.
+	* include/std/type_traits (__is_tuple_like_impl, __is_tuple_like):
+	New.
+	(swap(_Tp&, _Tp&)): Constrain with __is_tuple_like.
+	* include/std/utility (__is_tuple_like_impl): Move to type_traits.
+	* include/std/variant
+	(swap(variant<_Types...>&, variant<_Types...>&)):
+	Add a deleted overload.
+	* testsuite/20_util/optional/swap/2.cc: Add tests for disabled
+	swaps.
+	* testsuite/20_util/pair/swap_cxx17.cc: New.
+	* testsuite/20_util/tuple/swap_cxx17.cc: Likewise.
+	* testsuite/20_util/unique_ptr/specialized_algorithms/swap_cxx17.cc:
+	Likewise.
+	* testsuite/20_util/variant/compile.cc: Add tests for disabled
+	swaps.
+	* testsuite/23_containers/array/specialized_algorithms/swap_cxx17.cc:
+	New.
+	* testsuite/23_containers/array/tuple_interface/get_neg.cc: Adjust.
+	* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
+	Likewise.
+
+2016-12-01  Ville Voutilainen  <ville.voutilainen@gmail.com>
+
 	The convertible_to traits need to use a variadic catch-all for the
 	false-cases.
 	* include/std/istream (__is_convertible_to_basic_istream):
diff --git a/libstdc++-v3/include/bits/move.h b/libstdc++-v3/include/bits/move.h
index d0aefe7..0bd11d6 100644
--- a/libstdc++-v3/include/bits/move.h
+++ b/libstdc++-v3/include/bits/move.h
@@ -181,7 +181,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Tp>
     inline
 #if __cplusplus >= 201103L
-    typename enable_if<__and_<is_move_constructible<_Tp>,
+    typename enable_if<__and_<__not_<__is_tuple_like<_Tp>>,
+			      is_move_constructible<_Tp>,
 			      is_move_assignable<_Tp>>::value>::type
     swap(_Tp& __a, _Tp& __b)
     noexcept(__and_<is_nothrow_move_constructible<_Tp>,
diff --git a/libstdc++-v3/include/bits/stl_pair.h b/libstdc++-v3/include/bits/stl_pair.h
index ef52538..981dbeb 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -478,6 +478,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     swap(pair<_T1, _T2>& __x, pair<_T1, _T2>& __y)
     noexcept(noexcept(__x.swap(__y)))
     { __x.swap(__y); }
+
+#if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
+  template<typename _T1, typename _T2>
+    inline
+    typename enable_if<!__and_<__is_swappable<_T1>,
+			       __is_swappable<_T2>>::value>::type
+    swap(pair<_T1, _T2>&, pair<_T1, _T2>&) = delete;
+#endif
 #endif // __cplusplus >= 201103L
 
   /**
diff --git a/libstdc++-v3/include/bits/unique_ptr.h b/libstdc++-v3/include/bits/unique_ptr.h
index f9ec60f..03f9bfc 100644
--- a/libstdc++-v3/include/bits/unique_ptr.h
+++ b/libstdc++-v3/include/bits/unique_ptr.h
@@ -650,6 +650,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	 unique_ptr<_Tp, _Dp>& __y) noexcept
     { __x.swap(__y); }
 
+#if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
+  template<typename _Tp, typename _Dp>
+    inline
+    typename enable_if<!__is_swappable<_Dp>::value>::type
+    swap(unique_ptr<_Tp, _Dp>&,
+	 unique_ptr<_Tp, _Dp>&) = delete;
+#endif
+
   template<typename _Tp, typename _Dp,
 	   typename _Up, typename _Ep>
     inline bool
diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index 3ab0355..fa7bac6 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -288,6 +288,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
     noexcept(noexcept(__one.swap(__two)))
     { __one.swap(__two); }
 
+#if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
+  template<typename _Tp, std::size_t _Nm>
+    inline
+    typename enable_if<
+      !_GLIBCXX_STD_C::__array_traits<_Tp, _Nm>::_Is_swappable::value>::type
+    swap(array<_Tp, _Nm>&, array<_Tp, _Nm>&) = delete;
+#endif
+
   template<std::size_t _Int, typename _Tp, std::size_t _Nm>
     constexpr _Tp&
     get(array<_Tp, _Nm>& __arr) noexcept
diff --git a/libstdc++-v3/include/std/optional b/libstdc++-v3/include/std/optional
index ea673cc..191d64b 100644
--- a/libstdc++-v3/include/std/optional
+++ b/libstdc++-v3/include/std/optional
@@ -930,6 +930,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { __lhs.swap(__rhs); }
 
   template<typename _Tp>
+    inline enable_if_t<!(is_move_constructible_v<_Tp> && is_swappable_v<_Tp>)>
+    swap(optional<_Tp>&, optional<_Tp>&) = delete;
+
+  template<typename _Tp>
     constexpr optional<decay_t<_Tp>>
     make_optional(_Tp&& __t)
     { return optional<decay_t<_Tp>> { std::forward<_Tp>(__t) }; }
diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple
index 63cacd4..fb2fd17 100644
--- a/libstdc++-v3/include/std/tuple
+++ b/libstdc++-v3/include/std/tuple
@@ -1442,17 +1442,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     forward_as_tuple(_Elements&&... __args) noexcept
     { return tuple<_Elements&&...>(std::forward<_Elements>(__args)...); }
 
-  template<typename... _Tps>
-    struct __is_tuple_like_impl<tuple<_Tps...>> : true_type
-    { };
-
-  // Internal type trait that allows us to sfinae-protect tuple_cat.
-  template<typename _Tp>
-    struct __is_tuple_like
-    : public __is_tuple_like_impl<typename std::remove_cv
-            <typename std::remove_reference<_Tp>::type>::type>::type
-    { };
-
   template<size_t, typename, typename, size_t>
     struct __make_tuple_impl;
 
@@ -1597,6 +1586,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     noexcept(noexcept(__x.swap(__y)))
     { __x.swap(__y); }
 
+#if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
+  template<typename... _Elements>
+    inline
+    typename enable_if<!__and_<__is_swappable<_Elements>...>::value>::type
+    swap(tuple<_Elements...>&, tuple<_Elements...>&) = delete;
+#endif
+
   // A class (and instance) which can be used in 'tie' when an element
   // of a tuple is not required
   struct _Swallow_assign
diff --git a/libstdc++-v3/include/std/type_traits b/libstdc++-v3/include/std/type_traits
index e5f2bba..f164f71 100644
--- a/libstdc++-v3/include/std/type_traits
+++ b/libstdc++-v3/include/std/type_traits
@@ -2593,9 +2593,28 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template <typename _Tp>
     struct __is_nothrow_swappable;
 
+  template<typename... _Elements>
+    class tuple;
+
+  template<typename>
+    struct __is_tuple_like_impl : false_type
+    { };
+
+  template<typename... _Tps>
+    struct __is_tuple_like_impl<tuple<_Tps...>> : true_type
+    { };
+
+  // Internal type trait that allows us to sfinae-protect tuple_cat.
+  template<typename _Tp>
+    struct __is_tuple_like
+    : public __is_tuple_like_impl<typename remove_cv<
+      typename remove_reference<_Tp>::type>::type>::type
+    { };
+
   template<typename _Tp>
     inline
-    typename enable_if<__and_<is_move_constructible<_Tp>,
+    typename enable_if<__and_<__not_<__is_tuple_like<_Tp>>,
+			      is_move_constructible<_Tp>,
 			      is_move_assignable<_Tp>>::value>::type
     swap(_Tp&, _Tp&)
     noexcept(__and_<is_nothrow_move_constructible<_Tp>,
diff --git a/libstdc++-v3/include/std/utility b/libstdc++-v3/include/std/utility
index 3982156..8e02f0e 100644
--- a/libstdc++-v3/include/std/utility
+++ b/libstdc++-v3/include/std/utility
@@ -140,10 +140,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     using tuple_element_t = typename tuple_element<__i, _Tp>::type;
 #endif
 
-  template<typename>
-    struct __is_tuple_like_impl : false_type
-    { };
-
   // Various functions which give std::pair a tuple-like interface.
 
   /// Partial specialization for std::pair
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 34ad3fd..89ca979 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -889,10 +889,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return false; }
 
   template<typename... _Types>
-    inline auto swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs)
-    noexcept(noexcept(__lhs.swap(__rhs))) -> decltype(__lhs.swap(__rhs))
+    inline enable_if_t<__and_<is_move_constructible<_Types>...,
+			      is_swappable<_Types>...>::value>
+    swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs)
+    noexcept(noexcept(__lhs.swap(__rhs)))
     { __lhs.swap(__rhs); }
 
+  template<typename... _Types>
+    inline enable_if_t<!__and_<is_move_constructible<_Types>...,
+			       is_swappable<_Types>...>::value>
+    swap(variant<_Types...>&, variant<_Types...>&) = delete;
+
   class bad_variant_access : public exception
   {
   public:
diff --git a/libstdc++-v3/testsuite/20_util/optional/swap/2.cc b/libstdc++-v3/testsuite/20_util/optional/swap/2.cc
index 5793488..cb9291a 100644
--- a/libstdc++-v3/testsuite/20_util/optional/swap/2.cc
+++ b/libstdc++-v3/testsuite/20_util/optional/swap/2.cc
@@ -33,11 +33,11 @@ void swap(B&, B&) noexcept(false);
 static_assert( std::is_swappable_v<std::optional<B>> );
 static_assert( !std::is_nothrow_swappable_v<std::optional<B>> );
 
-// Not swappable, but optional<C> is swappable via the generic std::swap.
+// Not swappable, and optional<C> not swappable via the generic std::swap.
 struct C { };
 void swap(C&, C&) = delete;
 
-static_assert( std::is_swappable_v<std::optional<C>> );
+static_assert( !std::is_swappable_v<std::optional<C>> );
 
 // Not swappable, and optional<D> not swappable via the generic std::swap.
 struct D { D(D&&) = delete; };
diff --git a/libstdc++-v3/testsuite/20_util/pair/swap_cxx17.cc b/libstdc++-v3/testsuite/20_util/pair/swap_cxx17.cc
new file mode 100644
index 0000000..6b09f42
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/pair/swap_cxx17.cc
@@ -0,0 +1,35 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile }
+
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+
+#include <utility>
+
+// Not swappable, and pair not swappable via the generic std::swap.
+struct C { };
+void swap(C&, C&) = delete;
+
+static_assert( !std::is_swappable_v<std::pair<int, C>> );
+static_assert( !std::is_swappable_v<std::pair<C, int>> );
+
+// Not swappable, and pair not swappable via the generic std::swap.
+struct D { D(D&&) = delete; };
+
+static_assert( !std::is_swappable_v<std::pair<int, D>> );
+static_assert( !std::is_swappable_v<std::pair<D, int>> );
diff --git a/libstdc++-v3/testsuite/20_util/tuple/swap_cxx17.cc b/libstdc++-v3/testsuite/20_util/tuple/swap_cxx17.cc
new file mode 100644
index 0000000..d2a75ce
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/tuple/swap_cxx17.cc
@@ -0,0 +1,43 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile }
+
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+
+// NOTE: This makes use of the fact that we know how moveable
+// is implemented on tuple.  If the implementation changed
+// this test may begin to fail.
+
+#include <tuple>
+
+// Not swappable, and tuple not swappable via the generic std::swap.
+struct C { };
+void swap(C&, C&) = delete;
+
+static_assert( !std::is_swappable_v<std::tuple<int, C>> );
+static_assert( !std::is_swappable_v<std::tuple<C, int>> );
+static_assert( !std::is_swappable_v<std::tuple<int, int, C>> );
+static_assert( !std::is_swappable_v<std::tuple<C, int, int>> );
+
+// Not swappable, and tuple not swappable via the generic std::swap.
+struct D { D(D&&) = delete; };
+
+static_assert( !std::is_swappable_v<std::tuple<int, D>> );
+static_assert( !std::is_swappable_v<std::tuple<D, int>> );
+static_assert( !std::is_swappable_v<std::tuple<int, int, D>> );
+static_assert( !std::is_swappable_v<std::tuple<D, int, int>> );
diff --git a/libstdc++-v3/testsuite/20_util/unique_ptr/specialized_algorithms/swap_cxx17.cc b/libstdc++-v3/testsuite/20_util/unique_ptr/specialized_algorithms/swap_cxx17.cc
new file mode 100644
index 0000000..bf106ec
--- /dev/null
+++ b/libstdc++-v3/testsuite/20_util/unique_ptr/specialized_algorithms/swap_cxx17.cc
@@ -0,0 +1,33 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile }
+
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <memory>
+
+// Not swappable, and unique_ptr not swappable via the generic std::swap.
+struct C { };
+void swap(C&, C&) = delete;
+
+static_assert( !std::is_swappable_v<std::unique_ptr<int, C>> );
+
+// Not swappable, and unique_ptr not swappable via the generic std::swap.
+struct D { D(D&&) = delete; };
+
+static_assert( !std::is_swappable_v<std::unique_ptr<int, D>> );
+
diff --git a/libstdc++-v3/testsuite/20_util/variant/compile.cc b/libstdc++-v3/testsuite/20_util/variant/compile.cc
index e3330be..8250a95 100644
--- a/libstdc++-v3/testsuite/20_util/variant/compile.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/compile.cc
@@ -219,6 +219,21 @@ void test_relational()
   }
 }
 
+// Not swappable, and variant<C> not swappable via the generic std::swap.
+struct C { };
+void swap(C&, C&) = delete;
+
+static_assert( !std::is_swappable_v<variant<C>> );
+static_assert( !std::is_swappable_v<variant<int, C>> );
+static_assert( !std::is_swappable_v<variant<C, int>> );
+
+// Not swappable, and variant<D> not swappable via the generic std::swap.
+struct D { D(D&&) = delete; };
+
+static_assert( !std::is_swappable_v<variant<D>> );
+static_assert( !std::is_swappable_v<variant<int, D>> );
+static_assert( !std::is_swappable_v<variant<D, int>> );
+
 void test_swap()
 {
   variant<int, string> a, b;
diff --git a/libstdc++-v3/testsuite/23_containers/array/specialized_algorithms/swap_cxx17.cc b/libstdc++-v3/testsuite/23_containers/array/specialized_algorithms/swap_cxx17.cc
new file mode 100644
index 0000000..2e93c4d
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/array/specialized_algorithms/swap_cxx17.cc
@@ -0,0 +1,33 @@
+// { dg-options "-std=gnu++17" }
+// { dg-do compile }
+
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <array>
+
+// Not swappable, and pair not swappable via the generic std::swap.
+struct C { };
+void swap(C&, C&) = delete;
+
+static_assert( !std::is_swappable_v<std::array<C, 42>> );
+
+// Not swappable, and pair not swappable via the generic std::swap.
+struct D { D(D&&) = delete; };
+
+static_assert( !std::is_swappable_v<std::array<D, 42>> );
+
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
index f99bbf6..568ec85 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
@@ -27,6 +27,6 @@ int n1 = std::get<1>(a);
 int n2 = std::get<1>(std::move(a));
 int n3 = std::get<1>(ca);
 
-// { dg-error "static assertion failed" "" { target *-*-* } 295 }
-// { dg-error "static assertion failed" "" { target *-*-* } 304 }
+// { dg-error "static assertion failed" "" { target *-*-* } 303 }
 // { dg-error "static assertion failed" "" { target *-*-* } 312 }
+// { dg-error "static assertion failed" "" { target *-*-* } 320 }
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
index 1941f3c..32cb10b 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
@@ -22,4 +22,4 @@
 
 typedef std::tuple_element<1, std::array<int, 1>>::type type;
 
-// { dg-error "static assertion failed" "" { target *-*-* } 343 }
+// { dg-error "static assertion failed" "" { target *-*-* } 351 }
-- 
cgit v1.1


From 1f0133ebb9e9209f0fd8e08ddbab2e9a117d1d1e Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Thu, 1 Dec 2016 18:10:58 +0100
Subject: i386.md (*andndi3_doubleword): Depend on TARGET_SSE2.

	* config/i386/i386.md (*andndi3_doubleword): Depend on TARGET_SSE2.

From-SVN: r243121
---
 gcc/ChangeLog           | 4 ++++
 gcc/config/i386/i386.md | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5b2570b..6768c5f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/i386/i386.md (*andndi3_doubleword): Depend on TARGET_SSE2.
+
 2016-12-01  Georg-Johann Lay  <avr@gjlay.de>
 
 	* config/avr/avr.c: Fix coding rule glitches.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ed525b9..583d2bb 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8540,7 +8540,7 @@
 	  (not:DI (match_operand:DI 1 "register_operand" "r"))
 	  (match_operand:DI 2 "nonimmediate_operand" "rm")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE"
+  "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE2"
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
@@ -8575,7 +8575,7 @@
    (set_attr "btver2_decode" "direct")
    (set_attr "mode" "SI")])
 
-(define_insn "*bmi_andn_<mode>_ccno"
+(define_insn "*andn_<mode>_ccno"
   [(set (reg FLAGS_REG)
 	(compare
 	  (and:SWI48
-- 
cgit v1.1


From 3c7089946936a3e420f0e5db83212f15e0a7027a Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Thu, 1 Dec 2016 18:11:56 +0000
Subject: * g++.dg/tls/pr77285-1.C: dg-add-options tls

From-SVN: r243124
---
 gcc/testsuite/ChangeLog              | 4 ++++
 gcc/testsuite/g++.dg/tls/pr77285-1.C | 1 +
 2 files changed, 5 insertions(+)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2a04091..d6e5ac4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  David Edelsohn  <dje.gcc@gmail.com>
+
+	* g++.dg/tls/pr77285-1.C: dg-add-options tls
+
 2016-12-01  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	* gcc.dg/pr78582.c (main): Call setjmp, not sigsetjmp.
diff --git a/gcc/testsuite/g++.dg/tls/pr77285-1.C b/gcc/testsuite/g++.dg/tls/pr77285-1.C
index d8f69b2..7a93414 100644
--- a/gcc/testsuite/g++.dg/tls/pr77285-1.C
+++ b/gcc/testsuite/g++.dg/tls/pr77285-1.C
@@ -1,5 +1,6 @@
 // { dg-do link { target c++11 } }
 // { dg-require-effective-target tls }
+// { dg-add-options tls }
 // { dg-additional-sources pr77285-2.C }
 
 struct __attribute__((abi_tag("tag"))) X { ~X () {} int i = 0; };
-- 
cgit v1.1


From 1281fc99115392eb3f19f5e0a5c9b604fc72b27a Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Thu, 1 Dec 2016 18:18:30 +0000
Subject: vec.h (vec<T, [...]): Guard call to memset if len-oldlen != 0.

2016-12-01  Richard Biener  <rguenther@suse.de>
	    Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>

	* vec.h (vec<T, A, vl_embed>::quick_grow_cleared): Guard call to
	memset if len-oldlen != 0.
	(vec<T, va_heap, vl_ptr>::safe_grow_cleared): Likewise.

Co-Authored-By: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>

From-SVN: r243125
---
 gcc/ChangeLog | 7 +++++++
 gcc/vec.h     | 8 ++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6768c5f..b567324 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-01  Richard Biener  <rguenther@suse.de>
+	    Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
+
+	* vec.h (vec<T, A, vl_embed>::quick_grow_cleared): Guard call to
+	memset if len-oldlen != 0.
+	(vec<T, va_heap, vl_ptr>::safe_grow_cleared): Likewise.
+
 2016-12-01  Uros Bizjak  <ubizjak@gmail.com>
 
 	* config/i386/i386.md (*andndi3_doubleword): Depend on TARGET_SSE2.
diff --git a/gcc/vec.h b/gcc/vec.h
index 14fb2a6..aa93411 100644
--- a/gcc/vec.h
+++ b/gcc/vec.h
@@ -1092,8 +1092,10 @@ inline void
 vec<T, A, vl_embed>::quick_grow_cleared (unsigned len)
 {
   unsigned oldlen = length ();
+  size_t sz = sizeof (T) * (len - oldlen);
   quick_grow (len);
-  memset (&(address ()[oldlen]), 0, sizeof (T) * (len - oldlen));
+  if (sz != 0)
+    memset (&(address ()[oldlen]), 0, sz);
 }
 
 
@@ -1605,8 +1607,10 @@ inline void
 vec<T, va_heap, vl_ptr>::safe_grow_cleared (unsigned len MEM_STAT_DECL)
 {
   unsigned oldlen = length ();
+  size_t sz = sizeof (T) * (len - oldlen);
   safe_grow (len PASS_MEM_STAT);
-  memset (&(address ()[oldlen]), 0, sizeof (T) * (len - oldlen));
+  if (sz != 0)
+    memset (&(address ()[oldlen]), 0, sz);
 }
 
 
-- 
cgit v1.1


From d8a2f02ec6c25fb050e96048eb19ae218d79e74b Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Thu, 1 Dec 2016 18:58:47 +0000
Subject: re PR debug/66149 (ICE: tree check: expected field_decl, have
 template_decl in int_bit_position, at tree.h:5012 with -std=c++14 -gstabs)

PR debug/66419
PR c++/78235
* dbxout.c (dbxout_type_fields): Skip TEMPLATE_DECLs.

From-SVN: r243126
---
 gcc/ChangeLog | 6 ++++++
 gcc/dbxout.c  | 1 +
 2 files changed, 7 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b567324..c70394e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-01  David Edelsohn  <dje.gcc@gmail.com>
+
+	PR debug/66419
+	PR c++/78235
+	* dbxout.c (dbxout_type_fields): Skip TEMPLATE_DECLs.
+
 2016-12-01  Richard Biener  <rguenther@suse.de>
 	    Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
 
diff --git a/gcc/dbxout.c b/gcc/dbxout.c
index 3301417..658cc3d 100644
--- a/gcc/dbxout.c
+++ b/gcc/dbxout.c
@@ -1479,6 +1479,7 @@ dbxout_type_fields (tree type)
 
       /* Omit here local type decls until we know how to support them.  */
       if (TREE_CODE (tem) == TYPE_DECL
+	  || TREE_CODE (tem) == TEMPLATE_DECL
 	  /* Omit here the nameless fields that are used to skip bits.  */
 	  || DECL_IGNORED_P (tem)
 	  /* Omit fields whose position or size are variable or too large to
-- 
cgit v1.1


From 0269650d4a53dfdde90e90db701ee6233a61f837 Mon Sep 17 00:00:00 2001
From: David Edelsohn <dje.gcc@gmail.com>
Date: Thu, 1 Dec 2016 19:02:34 +0000
Subject: * testsuite/26_numerics/headers/cmath/hypot.cc: XFAIL on AIX.

From-SVN: r243127
---
 libstdc++-v3/ChangeLog                                    | 4 ++++
 libstdc++-v3/testsuite/26_numerics/headers/cmath/hypot.cc | 1 +
 2 files changed, 5 insertions(+)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 21404f16..08d9229 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  David Edelsohn  <dje.gcc@gmail.com>
+
+	* testsuite/26_numerics/headers/cmath/hypot.cc: XFAIL on AIX.
+
 2016-12-01  Ville Voutilainen  <ville.voutilainen@gmail.com>
 
 	Implement LWG 2766,
diff --git a/libstdc++-v3/testsuite/26_numerics/headers/cmath/hypot.cc b/libstdc++-v3/testsuite/26_numerics/headers/cmath/hypot.cc
index ad9e77e..b4df3ba 100644
--- a/libstdc++-v3/testsuite/26_numerics/headers/cmath/hypot.cc
+++ b/libstdc++-v3/testsuite/26_numerics/headers/cmath/hypot.cc
@@ -17,6 +17,7 @@
 
 // { dg-options "-std=gnu++17" }
 // { dg-do run { target c++1z } }
+// { dg-xfail-run-if "AIX long double" { powerpc-ibm-aix* } }
 
 #include <cmath>
 #include <type_traits>
-- 
cgit v1.1


From 98934fac3bd8fd149387164ac3ee97795a5e6825 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 1 Dec 2016 19:54:36 +0000
Subject: compiler: add slice initializers to the GC root list

    As of https://golang.org/cl/32917 we can put slice initializers in the
    .data section.  The program can still change the values in those
    slices.  That means that if the slice elements can contain pointers,
    we need to register the entire initializer as a GC root.

    This would be straightforward except that we only have a Bexpression
    for the slice initializer, not an Expression.  So introduce a
    Backend_expression type that wraps a Bexpression as an Expression.

    The test case for this is https://golang.org/cl/33790.

    Reviewed-on: https://go-review.googlesource.com/33792

From-SVN: r243129
---
 gcc/go/gofrontend/MERGE          |  2 +-
 gcc/go/gofrontend/expressions.cc | 36 ++++++++++++++++++++++++++
 gcc/go/gofrontend/expressions.h  | 55 +++++++++++++++++++++++++++++++++++++++-
 gcc/go/gofrontend/gogo.cc        | 28 +++++++++++++++++---
 gcc/go/gofrontend/gogo.h         | 12 +++++++++
 5 files changed, 128 insertions(+), 5 deletions(-)

diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 8f5f542..5529002 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-97b949f249515a61d3c09e9e06f08c8af189e967
+b7bad96ce0af50a1129eaab9aa110d68a601917b
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc
index 0ab6726..9740d32 100644
--- a/gcc/go/gofrontend/expressions.cc
+++ b/gcc/go/gofrontend/expressions.cc
@@ -4295,6 +4295,20 @@ Unary_expression::do_get_backend(Translate_context* context)
 						      true, copy_to_heap, false,
 						      bexpr);
 	  bexpr = gogo->backend()->var_expression(implicit, loc);
+
+	  // If we are not copying a slice initializer to the heap,
+	  // then it can be changed by the program, so if it can
+	  // contain pointers we must register it as a GC root.
+	  if (this->is_slice_init_
+	      && !copy_to_heap
+	      && this->expr_->type()->has_pointer())
+	    {
+	      Bexpression* root =
+		gogo->backend()->var_expression(implicit, loc);
+	      root = gogo->backend()->address_expression(root, loc);
+	      Type* type = Type::make_pointer_type(this->expr_->type());
+	      gogo->add_gc_root(Expression::make_backend(root, type, loc));
+	    }
 	}
       else if ((this->expr_->is_composite_literal()
 		|| this->expr_->string_expression() != NULL)
@@ -15433,6 +15447,28 @@ Expression::make_compound(Expression* init, Expression* expr, Location location)
   return new Compound_expression(init, expr, location);
 }
 
+// Class Backend_expression.
+
+int
+Backend_expression::do_traverse(Traverse*)
+{
+  return TRAVERSE_CONTINUE;
+}
+
+void
+Backend_expression::do_dump_expression(Ast_dump_context* ast_dump_context) const
+{
+  ast_dump_context->ostream() << "backend_expression<";
+  ast_dump_context->dump_type(this->type_);
+  ast_dump_context->ostream() << ">";
+}
+
+Expression*
+Expression::make_backend(Bexpression* bexpr, Type* type, Location location)
+{
+  return new Backend_expression(bexpr, type, location);
+}
+
 // Import an expression.  This comes at the end in order to see the
 // various class definitions.
 
diff --git a/gcc/go/gofrontend/expressions.h b/gcc/go/gofrontend/expressions.h
index 96d314f..f31d4a6 100644
--- a/gcc/go/gofrontend/expressions.h
+++ b/gcc/go/gofrontend/expressions.h
@@ -137,7 +137,8 @@ class Expression
     EXPRESSION_STRUCT_FIELD_OFFSET,
     EXPRESSION_LABEL_ADDR,
     EXPRESSION_CONDITIONAL,
-    EXPRESSION_COMPOUND
+    EXPRESSION_COMPOUND,
+    EXPRESSION_BACKEND
   };
 
   Expression(Expression_classification, Location);
@@ -485,6 +486,10 @@ class Expression
   static Expression*
   make_compound(Expression*, Expression*, Location);
 
+  // Make a backend expression.
+  static Expression*
+  make_backend(Bexpression*, Type*, Location);
+
   // Return the expression classification.
   Expression_classification
   classification() const
@@ -3825,6 +3830,54 @@ class Compound_expression : public Expression
   Expression* expr_;
 };
 
+// A backend expression.  This is a backend expression wrapped in an
+// Expression, for convenience during backend generation.
+
+class Backend_expression : public Expression
+{
+ public:
+  Backend_expression(Bexpression* bexpr, Type* type, Location location)
+    : Expression(EXPRESSION_BACKEND, location), bexpr_(bexpr), type_(type)
+  {}
+
+ protected:
+  int
+  do_traverse(Traverse*);
+
+  // For now these are always valid static initializers.  If that
+  // changes we can change this.
+  bool
+  do_is_static_initializer() const
+  { return true; }
+
+  Type*
+  do_type()
+  { return this->type_; }
+
+  void
+  do_determine_type(const Type_context*)
+  { }
+
+  Expression*
+  do_copy()
+  {
+    return new Backend_expression(this->bexpr_, this->type_, this->location());
+  }
+
+  Bexpression*
+  do_get_backend(Translate_context*)
+  { return this->bexpr_; }
+
+  void
+  do_dump_expression(Ast_dump_context*) const;
+
+ private:
+  // The backend expression we are wrapping.
+  Bexpression* bexpr_;
+  // The type of the expression;
+  Type* type_;
+};
+
 // A numeric constant.  This is used both for untyped constants and
 // for constants that have a type.
 
diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc
index b671ce5..d685bca 100644
--- a/gcc/go/gofrontend/gogo.cc
+++ b/gcc/go/gofrontend/gogo.cc
@@ -54,7 +54,9 @@ Gogo::Gogo(Backend* backend, Linemap* linemap, int, int pointer_size)
     interface_types_(),
     specific_type_functions_(),
     specific_type_functions_are_written_(false),
-    named_types_are_converted_(false)
+    named_types_are_converted_(false),
+    analysis_sets_(),
+    gc_roots_()
 {
   const Location loc = Linemap::predeclared_location();
 
@@ -750,10 +752,9 @@ Gogo::register_gc_vars(const std::vector<Named_object*>& var_gc,
 
   Expression_list* roots_init = new Expression_list();
 
-  size_t i = 0;
   for (std::vector<Named_object*>::const_iterator p = var_gc.begin();
        p != var_gc.end();
-       ++p, ++i)
+       ++p)
     {
       Expression_list* init = new Expression_list();
 
@@ -772,6 +773,27 @@ Gogo::register_gc_vars(const std::vector<Named_object*>& var_gc,
       roots_init->push_back(root_ctor);
     }
 
+  for (std::vector<Expression*>::const_iterator p = this->gc_roots_.begin();
+       p != this->gc_roots_.end();
+       ++p)
+    {
+      Expression_list *init = new Expression_list();
+
+      Expression* expr = *p;
+      Location eloc = expr->location();
+      init->push_back(expr);
+
+      Type* type = expr->type()->points_to();
+      go_assert(type != NULL);
+      Expression* size =
+	Expression::make_type_info(type, Expression::TYPE_INFO_SIZE);
+      init->push_back(size);
+
+      Expression* root_ctor =
+	Expression::make_struct_composite_literal(root_type, init, eloc);
+      roots_init->push_back(root_ctor);
+    }
+
   // The list ends with a NULL entry.
 
   Expression_list* null_init = new Expression_list();
diff --git a/gcc/go/gofrontend/gogo.h b/gcc/go/gofrontend/gogo.h
index 62bbf9e..7ddb3ce 100644
--- a/gcc/go/gofrontend/gogo.h
+++ b/gcc/go/gofrontend/gogo.h
@@ -19,6 +19,7 @@ class Typed_identifier;
 class Typed_identifier_list;
 class Function_type;
 class Expression;
+class Expression_list;
 class Statement;
 class Temporary_statement;
 class Block;
@@ -556,6 +557,15 @@ class Gogo
   specific_type_functions_are_written() const
   { return this->specific_type_functions_are_written_; }
 
+  // Add a pointer that needs to be added to the list of objects
+  // traversed by the garbage collector.  This should be an expression
+  // of pointer type that points to static storage.  It's not
+  // necessary to add global variables to this list, just global
+  // variable initializers that would otherwise not be seen.
+  void
+  add_gc_root(Expression* expr)
+  { this->gc_roots_.push_back(expr); }
+
   // Traverse the tree.  See the Traverse class.
   void
   traverse(Traverse*);
@@ -892,6 +902,8 @@ class Gogo
   // A list containing groups of possibly mutually recursive functions to be
   // considered during escape analysis.
   std::vector<Analysis_set> analysis_sets_;
+  // A list of objects to add to the GC roots.
+  std::vector<Expression*> gc_roots_;
 };
 
 // A block of statements.
-- 
cgit v1.1


From 96ad5df6db65383330cba79ed823a0256e750033 Mon Sep 17 00:00:00 2001
From: "Steven G. Kargl" <kargl@gcc.gnu.org>
Date: Thu, 1 Dec 2016 20:37:55 +0000
Subject: re PR fortran/78279 (ICE in identical_array_ref, at
 fortran/dependency.c:104)

2016-12-01  Steven G. Kargl  <kargl@gcc.gnu.org>

	PR fortran/78279
	* dependency.c (identical_array_ref): Convert gcc_assert to conditional
	and gfc_internal_error.

2016-12-01  Steven G. Kargl  <kargl@gcc.gnu.org>

	PR fortran/78279
	* gfortran.dg/pr78279.f90: New test.

From-SVN: r243131
---
 gcc/fortran/ChangeLog                 |  6 ++++++
 gcc/fortran/dependency.c              |  4 +++-
 gcc/testsuite/ChangeLog               |  5 +++++
 gcc/testsuite/gfortran.dg/pr78279.f90 | 10 ++++++++++
 4 files changed, 24 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/pr78279.f90

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 278c08f..d410392 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-01  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/78279
+	* dependency.c (identical_array_ref): Convert gcc_assert to conditional
+	and gfc_internal_error.
+
 2016-11-30  Andre Vehreschild  <vehre@gcc.gnu.org>
 
 	* check.c (gfc_check_allocated): By pass the caf_get call and check on
diff --git a/gcc/fortran/dependency.c b/gcc/fortran/dependency.c
index 82c5e6b..4a3c1a7 100644
--- a/gcc/fortran/dependency.c
+++ b/gcc/fortran/dependency.c
@@ -101,7 +101,9 @@ identical_array_ref (gfc_array_ref *a1, gfc_array_ref *a2)
 
   if (a1->type == AR_ELEMENT && a2->type == AR_ELEMENT)
     {
-      gcc_assert (a1->dimen == a2->dimen);
+      if (a1->dimen != a2->dimen)
+	gfc_internal_error ("identical_array_ref(): inconsistent dimensions");
+
       for (i = 0; i < a1->dimen; i++)
 	{
 	  if (gfc_dep_compare_expr (a1->start[i], a2->start[i]) != 0)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d6e5ac4..321a48a 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/78279
+	* gfortran.dg/pr78279.f90: New test.
+
 2016-12-01  David Edelsohn  <dje.gcc@gmail.com>
 
 	* g++.dg/tls/pr77285-1.C: dg-add-options tls
diff --git a/gcc/testsuite/gfortran.dg/pr78279.f90 b/gcc/testsuite/gfortran.dg/pr78279.f90
new file mode 100644
index 0000000..cb01752
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr78279.f90
@@ -0,0 +1,10 @@
+! { dg-do compile }
+! { dg-options "-Ofast" }
+program p
+   integer :: i
+   real :: z(2,4)
+   z = 0.0
+   do i = 1, 3
+      if ( z(i) > z(1,i+1) ) print *, i   ! { dg-error "mismatch in array reference" }
+   end do
+end
-- 
cgit v1.1


From f99bd883fb0d051ff2d7cebe217f2d2a8ad16bfd Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Thu, 1 Dec 2016 21:41:10 +0000
Subject: sparc.opt (mlra): New target option.

	* config/sparc/sparc.opt (mlra): New target option.
	* config/sparc/sparc.c (TARGET_LRA_P): Define to...
	(sparc_lra_p): ...this.  New function.
	(D_MODES, DF_MODES): Add missing cast.
	* config/sparc/sparc.md (*movsi_lo_sum, *movsi_high): Do not
	provide these insns when flag_pic.
	(sethi_di_medlow, losum_di_medlow, seth44, setm44, setl44, sethh,
	setlm, sethm, setlo, embmedany_sethi, embmedany_losum,
	embmedany_brsum, embmedany_textuhi, embmedany_texthi,
	embmedany_textulo, embmedany_textlo): Likewise.
	(sethi_di_medlow_embmedany_pic): Provide it only when flag_pic.

Co-Authored-By: David S. Miller <davem@davemloft.net>

From-SVN: r243135
---
 gcc/ChangeLog              | 15 +++++++++++++++
 gcc/config/sparc/sparc.c   | 15 ++++++++++++---
 gcc/config/sparc/sparc.md  | 46 +++++++++++++++++++++-------------------------
 gcc/config/sparc/sparc.opt |  4 ++++
 4 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c70394e..5a55f42 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2016-12-01  Eric Botcazou  <ebotcazou@adacore.com>
+            David S. Miller  <davem@davemloft.net>
+
+	* config/sparc/sparc.opt (mlra): New target option.
+	* config/sparc/sparc.c (TARGET_LRA_P): Define to...
+	(sparc_lra_p): ...this.  New function.
+	(D_MODES, DF_MODES): Add missing cast.
+	* config/sparc/sparc.md (*movsi_lo_sum, *movsi_high): Do not
+	provide these insns when flag_pic.
+	(sethi_di_medlow, losum_di_medlow, seth44, setm44, setl44, sethh,
+	setlm, sethm, setlo, embmedany_sethi, embmedany_losum,
+	embmedany_brsum, embmedany_textuhi, embmedany_texthi,
+	embmedany_textulo, embmedany_textlo): Likewise.
+	(sethi_di_medlow_embmedany_pic): Provide it only with flag_pic.
+
 2016-12-01  David Edelsohn  <dje.gcc@gmail.com>
 
 	PR debug/66419
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index a70a0ad..e17552a 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -639,6 +639,7 @@ static const char *sparc_mangle_type (const_tree);
 static void sparc_trampoline_init (rtx, tree, rtx);
 static machine_mode sparc_preferred_simd_mode (machine_mode);
 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
+static bool sparc_lra_p (void);
 static bool sparc_print_operand_punct_valid_p (unsigned char);
 static void sparc_print_operand (FILE *, rtx, int);
 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
@@ -836,7 +837,7 @@ char sparc_hard_reg_printed[8];
 #endif
 
 #undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
+#define TARGET_LRA_P sparc_lra_p
 
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
@@ -4787,7 +4788,7 @@ enum sparc_mode_class {
   ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
 
 /* Modes for double-word and smaller quantities.  */
-#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+#define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
 
 /* Modes for quad-word and smaller quantities.  */
 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
@@ -4799,7 +4800,7 @@ enum sparc_mode_class {
 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
 
 /* Modes for double-float and smaller quantities.  */
-#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
+#define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
 
 /* Modes for quad-float and smaller quantities.  */
 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
@@ -12248,6 +12249,14 @@ sparc_preferred_reload_class (rtx x, reg_class_t rclass)
   return rclass;
 }
 
+/* Return true if we use LRA instead of reload pass.  */
+
+static bool
+sparc_lra_p (void)
+{
+  return TARGET_LRA;
+}
+
 /* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
    OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index f33c391..896ce4b 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -1568,13 +1568,13 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
                    (match_operand:SI 2 "immediate_operand" "in")))]
-  ""
+  "!flag_pic"
   "or\t%1, %%lo(%a2), %0")
 
 (define_insn "*movsi_high"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(high:SI (match_operand:SI 1 "immediate_operand" "in")))]
-  ""
+  "!flag_pic"
   "sethi\t%%hi(%a1), %0")
 
 ;; The next two patterns must wrap the SYMBOL_REF in an UNSPEC
@@ -1846,27 +1846,27 @@
 (define_insn "*sethi_di_medlow_embmedany_pic"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (match_operand:DI 1 "medium_pic_operand" "")))]
-  "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && check_pic (1)"
+  "(TARGET_CM_MEDLOW || TARGET_CM_EMBMEDANY) && flag_pic && check_pic (1)"
   "sethi\t%%hi(%a1), %0")
 
 (define_insn "*sethi_di_medlow"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (match_operand:DI 1 "symbolic_operand" "")))]
-  "TARGET_CM_MEDLOW && check_pic (1)"
+  "TARGET_CM_MEDLOW && !flag_pic"
   "sethi\t%%hi(%a1), %0")
 
 (define_insn "*losum_di_medlow"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (match_operand:DI 2 "symbolic_operand" "")))]
-  "TARGET_CM_MEDLOW"
+  "TARGET_CM_MEDLOW && !flag_pic"
   "or\t%1, %%lo(%a2), %0")
 
 (define_insn "seth44"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
 			    UNSPEC_SETH44)))]
-  "TARGET_CM_MEDMID"
+  "TARGET_CM_MEDMID && !flag_pic"
   "sethi\t%%h44(%a1), %0")
 
 (define_insn "setm44"
@@ -1874,28 +1874,28 @@
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
 			      UNSPEC_SETM44)))]
-  "TARGET_CM_MEDMID"
+  "TARGET_CM_MEDMID && !flag_pic"
   "or\t%1, %%m44(%a2), %0")
 
 (define_insn "setl44"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (match_operand:DI 2 "symbolic_operand" "")))]
-  "TARGET_CM_MEDMID"
+  "TARGET_CM_MEDMID && !flag_pic"
   "or\t%1, %%l44(%a2), %0")
 
 (define_insn "sethh"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
 			    UNSPEC_SETHH)))]
-  "TARGET_CM_MEDANY"
+  "TARGET_CM_MEDANY && !flag_pic"
   "sethi\t%%hh(%a1), %0")
 
 (define_insn "setlm"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (unspec:DI [(match_operand:DI 1 "symbolic_operand" "")]
 			    UNSPEC_SETLM)))]
-  "TARGET_CM_MEDANY"
+  "TARGET_CM_MEDANY && !flag_pic"
   "sethi\t%%lm(%a1), %0")
 
 (define_insn "sethm"
@@ -1903,49 +1903,49 @@
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (unspec:DI [(match_operand:DI 2 "symbolic_operand" "")]
 			      UNSPEC_EMB_SETHM)))]
-  "TARGET_CM_MEDANY"
+  "TARGET_CM_MEDANY && !flag_pic"
   "or\t%1, %%hm(%a2), %0")
 
 (define_insn "setlo"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (match_operand:DI 2 "symbolic_operand" "")))]
-  "TARGET_CM_MEDANY"
+  "TARGET_CM_MEDANY && !flag_pic"
   "or\t%1, %%lo(%a2), %0")
 
 (define_insn "embmedany_sethi"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (unspec:DI [(match_operand:DI 1 "data_segment_operand" "")]
 			    UNSPEC_EMB_HISUM)))]
-  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "TARGET_CM_EMBMEDANY && !flag_pic"
   "sethi\t%%hi(%a1), %0")
 
 (define_insn "embmedany_losum"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (match_operand:DI 2 "data_segment_operand" "")))]
-  "TARGET_CM_EMBMEDANY"
+  "TARGET_CM_EMBMEDANY && !flag_pic"
   "add\t%1, %%lo(%a2), %0")
 
 (define_insn "embmedany_brsum"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (unspec:DI [(match_operand:DI 1 "register_operand" "r")]
 	           UNSPEC_EMB_HISUM))]
-  "TARGET_CM_EMBMEDANY"
+  "TARGET_CM_EMBMEDANY && !flag_pic"
   "add\t%1, %_, %0")
 
 (define_insn "embmedany_textuhi"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")]
 			    UNSPEC_EMB_TEXTUHI)))]
-  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "TARGET_CM_EMBMEDANY && !flag_pic"
   "sethi\t%%uhi(%a1), %0")
 
 (define_insn "embmedany_texthi"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (high:DI (unspec:DI [(match_operand:DI 1 "text_segment_operand" "")]
 			    UNSPEC_EMB_TEXTHI)))]
-  "TARGET_CM_EMBMEDANY && check_pic (1)"
+  "TARGET_CM_EMBMEDANY && !flag_pic"
   "sethi\t%%hi(%a1), %0")
 
 (define_insn "embmedany_textulo"
@@ -1953,14 +1953,14 @@
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (unspec:DI [(match_operand:DI 2 "text_segment_operand" "")]
 			      UNSPEC_EMB_TEXTULO)))]
-  "TARGET_CM_EMBMEDANY"
+  "TARGET_CM_EMBMEDANY && !flag_pic"
   "or\t%1, %%ulo(%a2), %0")
 
 (define_insn "embmedany_textlo"
   [(set (match_operand:DI 0 "register_operand" "=r")
         (lo_sum:DI (match_operand:DI 1 "register_operand" "r")
                    (match_operand:DI 2 "text_segment_operand" "")))]
-  "TARGET_CM_EMBMEDANY"
+  "TARGET_CM_EMBMEDANY && !flag_pic"
   "or\t%1, %%lo(%a2), %0")
 
 ;; Now some patterns to help reload out a bit.
@@ -1968,9 +1968,7 @@
   [(parallel [(match_operand:DI 0 "register_operand" "=r")
               (match_operand:DI 1 "immediate_operand" "")
               (match_operand:TI 2 "register_operand" "=&r")])]
-  "(TARGET_CM_MEDANY
-    || TARGET_CM_EMBMEDANY)
-   && !flag_pic"
+  "(TARGET_CM_MEDANY || TARGET_CM_EMBMEDANY) && !flag_pic"
 {
   sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
   DONE;
@@ -1980,9 +1978,7 @@
   [(parallel [(match_operand:DI 0 "register_operand" "=r")
               (match_operand:DI 1 "immediate_operand" "")
               (match_operand:TI 2 "register_operand" "=&r")])]
-  "(TARGET_CM_MEDANY
-    || TARGET_CM_EMBMEDANY)
-   && !flag_pic"
+  "(TARGET_CM_MEDANY || TARGET_CM_EMBMEDANY) && !flag_pic"
 {
   sparc_emit_set_symbolic_const64 (operands[0], operands[1], operands[2]);
   DONE;
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index 1be7800..973fe6f 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -57,6 +57,10 @@ msoft-quad-float
 Target Report RejectNegative InverseMask(HARD_QUAD)
 Do not use hardware quad fp instructions.
 
+mlra
+Target Report Mask(LRA)
+Enable Local Register Allocation.
+
 mv8plus
 Target Report Mask(V8PLUS)
 Compile for V8+ ABI.
-- 
cgit v1.1


From 859faa171ebabdddf364564acad99750cf2b6f56 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Thu, 1 Dec 2016 21:56:09 +0000
Subject: dwarf2out.c: fix jit issue with early_dwarf_finished

All of the jit testcases that generate debuginfo appear to have been
failing since r240228 on their 2nd in-process iteration on this
assertion in set_early_dwarf's ctor:

      gcc_assert (! early_dwarf_finished);

Root cause is that the global is never reset at the end of compilation,
which this patch fixes in the obvious way.

gcc/ChangeLog:
	* dwarf2out.c (dwarf2out_c_finalize): Reset early_dwarf and
	early_dwarf_finished.

From-SVN: r243136
---
 gcc/ChangeLog   | 5 +++++
 gcc/dwarf2out.c | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5a55f42..b23481f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  David Malcolm  <dmalcolm@redhat.com>
+
+	* dwarf2out.c (dwarf2out_c_finalize): Reset early_dwarf and
+	early_dwarf_finished.
+
 2016-12-01  Eric Botcazou  <ebotcazou@adacore.com>
             David S. Miller  <davem@davemloft.net>
 
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index bc328ab..8dc8523 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -29830,6 +29830,9 @@ dwarf2out_c_finalize (void)
   cold_text_section = NULL;
   current_unit_personality = NULL;
 
+  early_dwarf = false;
+  early_dwarf_finished = false;
+
   next_die_offset = 0;
   single_comp_unit_die = NULL;
   comdat_type_list = NULL;
-- 
cgit v1.1


From c1ff51dc9f743fb6ecd77a9374e543d285f98cb0 Mon Sep 17 00:00:00 2001
From: Jason Merrill <jason@gcc.gnu.org>
Date: Thu, 1 Dec 2016 17:10:57 -0500
Subject: fix PR number

From-SVN: r243137
---
 gcc/cp/ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 35db0db..1a9a1ed 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -216,7 +216,7 @@
 	IMAGPART_EXPR can be lvalues.
 
 	DR 374
-	PR c++/56840
+	PR c++/56480
 	* pt.c (check_specialization_namespace): Allow any enclosing
 	namespace.
 	(check_unqualified_spec_or_inst): New.
-- 
cgit v1.1


From 03e88100e14719f4e05dd379e88ae4daf68fa625 Mon Sep 17 00:00:00 2001
From: Jason Merrill <jason@redhat.com>
Date: Thu, 1 Dec 2016 17:13:06 -0500
Subject: call.c (add_function_candidate): Exclude inherited copy/move ctors.

	* call.c (add_function_candidate): Exclude inherited copy/move
	ctors.

From-SVN: r243138
---
 gcc/cp/ChangeLog                         |  5 +++++
 gcc/cp/call.c                            | 19 +++++++++++++++++++
 gcc/testsuite/g++.dg/cpp0x/inh-ctor15a.C | 14 --------------
 gcc/testsuite/g++.dg/cpp1z/inh-ctor36.C  | 18 ++++++++++++++++++
 4 files changed, 42 insertions(+), 14 deletions(-)
 delete mode 100644 gcc/testsuite/g++.dg/cpp0x/inh-ctor15a.C
 create mode 100644 gcc/testsuite/g++.dg/cpp1z/inh-ctor36.C

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 1a9a1ed..b407d17 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Jason Merrill  <jason@redhat.com>
+
+	* call.c (add_function_candidate): Exclude inherited copy/move
+	ctors.
+
 2016-11-29  David Malcolm  <dmalcolm@redhat.com>
 
 	PR c++/77922
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 97003e5..561cc83 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -2042,6 +2042,25 @@ add_function_candidate (struct z_candidate **candidates,
       reason = arity_rejection (first_arg, i + remaining, len);
     }
 
+  /* A constructor that is a direct member of a class C and has a first
+     parameter of type "reference to cv C" (including such a constructor
+     instantiated from a template) is excluded from the set of candidate
+     functions when used to construct an object of type derived from C (12.6.3
+     [class.inhctor.init]) with an argument list containing a single
+     argument.  */
+  if (viable && len == 1 && parmlist && DECL_CONSTRUCTOR_P (fn)
+      && flag_new_inheriting_ctors
+      && DECL_INHERITED_CTOR (fn))
+    {
+      tree ptype = non_reference (TREE_VALUE (parmlist));
+      tree ctype = DECL_INHERITED_CTOR_BASE (fn);
+      if (same_type_ignoring_top_level_qualifiers_p (ptype, ctype))
+	{
+	  viable = false;
+	  reason = inherited_ctor_rejection ();
+	}
+    }
+
   /* Second, for a function to be viable, its constraints must be
      satisfied. */
   if (flag_concepts && viable
diff --git a/gcc/testsuite/g++.dg/cpp0x/inh-ctor15a.C b/gcc/testsuite/g++.dg/cpp0x/inh-ctor15a.C
deleted file mode 100644
index a9abb84..0000000
--- a/gcc/testsuite/g++.dg/cpp0x/inh-ctor15a.C
+++ /dev/null
@@ -1,14 +0,0 @@
-// P0136 caused us to start inheriting base copy constructors.
-// { dg-do compile { target c++11 } }
-// { dg-options -fnew-inheriting-ctors }
-
-struct A { A(int); };
-struct B: public A
-{
-  using A::A;
-};
-
-A a (42);
-
-B b1 (24);			// inherited
-B b2 (a);			// also inherited now
diff --git a/gcc/testsuite/g++.dg/cpp1z/inh-ctor36.C b/gcc/testsuite/g++.dg/cpp1z/inh-ctor36.C
new file mode 100644
index 0000000..768a966
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/inh-ctor36.C
@@ -0,0 +1,18 @@
+// { dg-do link { target c++11 } }
+
+struct X { X(X &&); };
+struct A {
+  A() {}
+  A(const A&);       // #1
+  A(A &&) = default; // #2, defined as deleted (12.8 [class.copy])
+  template<typename T> A(T &&);	// #3
+  union { X x; };
+};
+struct B : A {
+  using A::A;
+  B(...) {}
+};
+
+int main() {
+  B b = A(); // calls B::B(...): #1, #2, and #3 are excluded from candidate set
+}
-- 
cgit v1.1


From c3a2f7405c2f88201b44d2edc3b8651109f0b142 Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Thu, 1 Dec 2016 22:36:49 +0000
Subject: * es.po: Update.

From-SVN: r243139
---
 gcc/po/ChangeLog |   4 +
 gcc/po/es.po     | 328 ++++++++++++-------------------------------------------
 2 files changed, 73 insertions(+), 259 deletions(-)

diff --git a/gcc/po/ChangeLog b/gcc/po/ChangeLog
index 340d7d4..4b6ca4b 100644
--- a/gcc/po/ChangeLog
+++ b/gcc/po/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-01  Joseph Myers  <joseph@codesourcery.com>
+
+	* es.po: Update.
+
 2016-11-30  Joseph Myers  <joseph@codesourcery.com>
 
 	* es.po: Update.
diff --git a/gcc/po/es.po b/gcc/po/es.po
index e7bfcd0..cf04cb3 100644
--- a/gcc/po/es.po
+++ b/gcc/po/es.po
@@ -35,7 +35,7 @@ msgstr ""
 "Project-Id-Version: gcc 6.2.0\n"
 "Report-Msgid-Bugs-To: http://gcc.gnu.org/bugs.html\n"
 "POT-Creation-Date: 2016-08-19 21:03+0000\n"
-"PO-Revision-Date: 2016-11-30 22:55+0100\n"
+"PO-Revision-Date: 2016-12-01 23:28+0100\n"
 "Last-Translator: Antonio Ceballos <aceballos@gmail.com>\n"
 "Language-Team: Spanish <es@tp.org.es>\n"
 "Language: es\n"
@@ -8105,7 +8105,7 @@ msgstr "Genera código que use las instrucciones de coma flotante del 68881."
 
 #: config/m68k/m68k.opt:99
 msgid "Align variables on a 32-bit boundary."
-msgstr "Alínea las variables en límites de 32-bit."
+msgstr "Alinea las variables en límites de 32-bit."
 
 #: config/m68k/m68k.opt:103 config/arm/arm.opt:81 config/nios2/nios2.opt:570
 #: config/nds32/nds32.opt:66 config/c6x/c6x.opt:67
@@ -8504,152 +8504,102 @@ msgid "Use simple data speculation check for control speculation."
 msgstr "Usa la revisión de especulación de datos simple para el control de especulación."
 
 #: config/ia64/ia64.opt:174
-#, fuzzy
-#| msgid "Count speculative dependencies while calculating priority of instructions"
 msgid "Count speculative dependencies while calculating priority of instructions."
 msgstr "Cuenta las dependencias especulativas mientras se calcula la prioridad de las instrucciones."
 
 #: config/ia64/ia64.opt:178
-#, fuzzy
-#| msgid "Place a stop bit after every cycle when scheduling"
 msgid "Place a stop bit after every cycle when scheduling."
-msgstr "Coloca un bit de parada después de cada ciclo durante la calendarización."
+msgstr "Coloca un bit de parada después de cada ciclo durante la planificación."
 
 #: config/ia64/ia64.opt:182
-#, fuzzy
-#| msgid "Assume that floating-point stores and loads are not likely to cause conflict when placed into one instruction group"
 msgid "Assume that floating-point stores and loads are not likely to cause conflict when placed into one instruction group."
-msgstr "Asume que los stores y loads de coma flotante no pueden causar conflictos al colocarse en un grupo de instrucción."
+msgstr "Asume que los stores y loads de coma flotante no es probable que provoquen conflictos al colocarse en un grupo de instrucción."
 
 #: config/ia64/ia64.opt:186
-#, fuzzy
-#| msgid "Soft limit on number of memory insns per instruction group, giving lower priority to subsequent memory insns attempting to schedule in the same insn group. Frequently useful to prevent cache bank conflicts.  Default value is 1"
 msgid "Soft limit on number of memory insns per instruction group, giving lower priority to subsequent memory insns attempting to schedule in the same insn group. Frequently useful to prevent cache bank conflicts.  Default value is 1."
-msgstr "Límite suave en el número de insns de memoria por grupo de instrucción, dando una prioridad más baja a insns de memoria subsecuentes que intenten calendarizar en el mismo grupo insn. Frecuentemente útil para prevenir conflictos en el banco de caché.  El valor por defecto es 1."
+msgstr "Límite suave en el número de insns de memoria por grupo de instrucción, dando una prioridad más baja a insns de memoria subsiguientes que intenten planificar en el mismo grupo insn. Frecuentemente útil para prevenir conflictos en el banco de caché.  El valor por defecto es 1."
 
 #: config/ia64/ia64.opt:190
-#, fuzzy
-#| msgid "Disallow more than 'msched-max-memory-insns' in instruction group. Otherwise, limit is 'soft' (prefer non-memory operations when limit is reached)"
 msgid "Disallow more than 'msched-max-memory-insns' in instruction group. Otherwise, limit is 'soft' (prefer non-memory operations when limit is reached)."
 msgstr "Desactiva más de 'msched-max-memory-insns' en el grupo de instrucción.  De otra forma, el límite es 'soft' (se prefieren operaciones que no sean de memoria cuando se alcanza el límite)."
 
 #: config/ia64/ia64.opt:194
-#, fuzzy
-#| msgid "Don't generate checks for control speculation in selective scheduling"
 msgid "Don't generate checks for control speculation in selective scheduling."
-msgstr "No genera revisiones para el control de especulación en la calendarización selectiva."
+msgstr "No genera revisiones para el control de especulación en la planificación selectiva."
 
 #: config/spu/spu.opt:20
-#, fuzzy
-#| msgid "Emit warnings when run-time relocations are generated"
 msgid "Emit warnings when run-time relocations are generated."
 msgstr "Emite avisos cuando se generan las reubicaciones de tiempo de ejecución."
 
 #: config/spu/spu.opt:24
-#, fuzzy
-#| msgid "Emit errors when run-time relocations are generated"
 msgid "Emit errors when run-time relocations are generated."
 msgstr "Emite errores cuando se generan las reubicaciones de tiempo de ejecución."
 
 #: config/spu/spu.opt:28
-#, fuzzy
-#| msgid "Specify cost of branches (Default 20)"
 msgid "Specify cost of branches (Default 20)."
 msgstr "Especifica el costo de las ramificaciones (20 por defecto)."
 
 #: config/spu/spu.opt:32
-#, fuzzy
-#| msgid "Make sure loads and stores are not moved past DMA instructions"
 msgid "Make sure loads and stores are not moved past DMA instructions."
 msgstr "Se asegura que las instrucciones load y store no se mueven después de las instrucciones DMA."
 
 #: config/spu/spu.opt:36
-#, fuzzy
-#| msgid "volatile must be specified on any memory that is effected by DMA"
 msgid "volatile must be specified on any memory that is effected by DMA."
-msgstr "Se debe especificar volatile en cualquier memoria que sea afectada por DMA."
+msgstr "se debe especificar volatile en cualquier memoria que sea afectada por DMA."
 
 #: config/spu/spu.opt:40 config/spu/spu.opt:44
-#, fuzzy
-#| msgid "Insert nops when it might improve performance by allowing dual issue (default)"
 msgid "Insert nops when it might improve performance by allowing dual issue (default)."
 msgstr "Inserta nops cuando se puede mejorar el rendimiento permitiendo el asunto dual (por defecto)."
 
 #: config/spu/spu.opt:48
-#, fuzzy
-#| msgid "Use standard main function as entry for startup"
 msgid "Use standard main function as entry for startup."
 msgstr "Usa la función main estándar como entrada para el inicio."
 
 #: config/spu/spu.opt:52
-#, fuzzy
-#| msgid "Generate branch hints for branches"
 msgid "Generate branch hints for branches."
 msgstr "Genera pistas de ramificación para las ramificaciones."
 
 #: config/spu/spu.opt:56
-#, fuzzy
-#| msgid "Maximum number of nops to insert for a hint (Default 2)"
 msgid "Maximum number of nops to insert for a hint (Default 2)."
 msgstr "Número máximo de nops a insertar para una pista (Por defecto 2)."
 
 #: config/spu/spu.opt:60
-#, fuzzy
-#| msgid "Approximate maximum number of instructions to allow between a hint and its branch [125]"
 msgid "Approximate maximum number of instructions to allow between a hint and its branch [125]."
 msgstr "El número máximo aproximado de instrucciones a permitir entre una pista y su ramificación [125]."
 
 #: config/spu/spu.opt:64
-#, fuzzy
-#| msgid "Generate code for 18 bit addressing"
 msgid "Generate code for 18 bit addressing."
 msgstr "Genera código para direccionamiento de 18 bit."
 
 #: config/spu/spu.opt:68
-#, fuzzy
-#| msgid "Generate code for 32 bit addressing"
 msgid "Generate code for 32 bit addressing."
 msgstr "Genera código para direccionamiento de 32 bit."
 
 #: config/spu/spu.opt:76
-#, fuzzy
-#| msgid "Insert hbrp instructions after hinted branch targets to avoid the SPU hang issue"
 msgid "Insert hbrp instructions after hinted branch targets to avoid the SPU hang issue."
 msgstr "Inserta instrucciones hbrp después de los objetivos ramificados con pista para evitar el problema del colgado de SPU."
 
 #: config/spu/spu.opt:80 config/i386/i386.opt:247 config/s390/s390.opt:56
-#, fuzzy
-#| msgid "Generate code for given CPU"
 msgid "Generate code for given CPU."
-msgstr "Genera código para el CPU dado."
+msgstr "Genera código para la CPU dada."
 
 #: config/spu/spu.opt:88
-#, fuzzy
-#| msgid "Access variables in 32-bit PPU objects (default)"
 msgid "Access variables in 32-bit PPU objects (default)."
 msgstr "Accede a las variables en objetos PPU de 32-bit (por defecto)."
 
 #: config/spu/spu.opt:92
-#, fuzzy
-#| msgid "Access variables in 64-bit PPU objects"
 msgid "Access variables in 64-bit PPU objects."
-msgstr "Accede a las varialbes en objetos PPU de 64-bit."
+msgstr "Accede a las variables en objetos PPU de 64-bit."
 
 #: config/spu/spu.opt:96
-#, fuzzy
-#| msgid "Allow conversions between __ea and generic pointers (default)"
 msgid "Allow conversions between __ea and generic pointers (default)."
 msgstr "Permite las conversiones entre __ea y punteros genéricos (por defecto)."
 
 #: config/spu/spu.opt:100
-#, fuzzy
-#| msgid "Size (in KB) of software data cache"
 msgid "Size (in KB) of software data cache."
 msgstr "Tamaño (en KB) del caché de datos de software."
 
 #: config/spu/spu.opt:104
-#, fuzzy
-#| msgid "Atomically write back software data cache lines (default)"
 msgid "Atomically write back software data cache lines (default)."
 msgstr "Escribe hacia atrás atómicamente las líneas de caché de datos de software (por defecto)."
 
@@ -8662,8 +8612,6 @@ msgid "preferentially allocate registers that allow short instruction generation
 msgstr "aloja de preferencia registros que permitan la generación de instrucciones short."
 
 #: config/epiphany/epiphany.opt:32
-#, fuzzy
-#| msgid "Set branch cost"
 msgid "Set branch cost."
 msgstr "Establece el costo de ramificación."
 
@@ -8672,32 +8620,22 @@ msgid "enable conditional move instruction usage."
 msgstr "activa el uso de la instrucción move condicional."
 
 #: config/epiphany/epiphany.opt:40
-#, fuzzy
-#| msgid "set number of nops to emit before each insn pattern"
 msgid "set number of nops to emit before each insn pattern."
 msgstr "establece el número de nops a emitir antes de cada patrón insn."
 
 #: config/epiphany/epiphany.opt:52
-#, fuzzy
-#| msgid "Use software floating point comparisons"
 msgid "Use software floating point comparisons."
 msgstr "Usa comparaciones de coma flotante de software."
 
 #: config/epiphany/epiphany.opt:56
-#, fuzzy
-#| msgid "Enable split of 32 bit immediate loads into low / high part"
 msgid "Enable split of 32 bit immediate loads into low / high part."
 msgstr "Activa la división de loads inmediatos de 32 bit en partes low / high."
 
 #: config/epiphany/epiphany.opt:60
-#, fuzzy
-#| msgid "Enable use of POST_INC / POST_DEC"
 msgid "Enable use of POST_INC / POST_DEC."
 msgstr "Activa el uso de POST_INC / POST_DEC."
 
 #: config/epiphany/epiphany.opt:64
-#, fuzzy
-#| msgid "Enable use of POST_MODIFY"
 msgid "Enable use of POST_MODIFY."
 msgstr "Activa el uso de POST_MODIFY."
 
@@ -8707,17 +8645,13 @@ msgstr "Establece el número prealojado de bytes en la pila para que use el llam
 
 #: config/epiphany/epiphany.opt:72
 msgid "Assume round to nearest is selected for purposes of scheduling."
-msgstr "Asume que está seleccionado el redondeo al más cercano para propósitos de calendarización."
+msgstr "Asume que está seleccionado el redondeo al más cercano para propósitos de planificación."
 
 #: config/epiphany/epiphany.opt:76
-#, fuzzy
-#| msgid "Generate call insns as indirect calls"
 msgid "Generate call insns as indirect calls."
 msgstr "Genera las llamadas insns como llamadas indirectas."
 
 #: config/epiphany/epiphany.opt:80
-#, fuzzy
-#| msgid "Generate call insns as direct calls"
 msgid "Generate call insns as direct calls."
 msgstr "Genera las llamadas insns como llamadas directas."
 
@@ -8738,10 +8672,8 @@ msgid "Split unaligned 8 byte vector moves before post-modify address generation
 msgstr "Divide moves vectoriales de 8 byte sin alinear antes de post-modificar la generación de dirección."
 
 #: config/epiphany/epiphany.opt:132
-#, fuzzy
-#| msgid "Use hardware floating point conversion instructions"
 msgid "Use the floating point unit for integer add/subtract."
-msgstr "Usa instrucciones de conversión de coma flotante de hardware."
+msgstr "Usa la unidad de coma flotante para suma/resta de enteros."
 
 #: config/epiphany/epiphany.opt:136
 msgid "Set register to hold -1."
@@ -8749,252 +8681,174 @@ msgstr "Establece el registro para conservar -1."
 
 #: config/ft32/ft32.opt:23
 msgid "target the software simulator."
-msgstr ""
+msgstr "destina al simulador software."
 
 #: config/ft32/ft32.opt:27 config/s390/s390.opt:201 config/mips/mips.opt:385
-#, fuzzy
-#| msgid "Use ROM instead of RAM"
 msgid "Use LRA instead of reload."
-msgstr "Usa la ROM en lugar de la RAM."
+msgstr "Usa la LRA en lugar de recarga."
 
 #: config/ft32/ft32.opt:31
-#, fuzzy
-#| msgid "Allow the use of MDMX instructions"
 msgid "Avoid use of the DIV and MOD instructions"
-msgstr "Permite el uso de las instrucciones MDMX"
+msgstr "Evita el uso de las instrucciones DIV y MOD"
 
 #: config/h8300/h8300.opt:23
-#, fuzzy
-#| msgid "Generate H8S code"
 msgid "Generate H8S code."
 msgstr "Genera código H8S."
 
 #: config/h8300/h8300.opt:27
-#, fuzzy
-#| msgid "Generate H8SX code"
 msgid "Generate H8SX code."
 msgstr "Genera código H8SX."
 
 #: config/h8300/h8300.opt:31
-#, fuzzy
-#| msgid "Generate H8S/2600 code"
 msgid "Generate H8S/2600 code."
 msgstr "Genera código H8S/2600."
 
 #: config/h8300/h8300.opt:35
-#, fuzzy
-#| msgid "Make integers 32 bits wide"
 msgid "Make integers 32 bits wide."
 msgstr "Hace los enteros de 32 bits de anchura."
 
 #: config/h8300/h8300.opt:42
-#, fuzzy
-#| msgid "Use registers for argument passing"
 msgid "Use registers for argument passing."
 msgstr "Usa registros para el paso de parámetros."
 
 #: config/h8300/h8300.opt:46
-#, fuzzy
-#| msgid "Consider access to byte sized memory slow"
 msgid "Consider access to byte sized memory slow."
 msgstr "Considera lento el acceso a memoria de tamaño byte."
 
 #: config/h8300/h8300.opt:50
-#, fuzzy
-#| msgid "Enable linker relaxing"
 msgid "Enable linker relaxing."
 msgstr "Activa la relajación del enlazador."
 
 #: config/h8300/h8300.opt:54
-#, fuzzy
-#| msgid "Generate H8/300H code"
 msgid "Generate H8/300H code."
 msgstr "Genera código H8/300H."
 
 #: config/h8300/h8300.opt:58
-#, fuzzy
-#| msgid "Enable the normal mode"
 msgid "Enable the normal mode."
-msgstr "Activa el modelo normal."
+msgstr "Activa el modo normal."
 
 #: config/h8300/h8300.opt:62
-#, fuzzy
-#| msgid "Use H8/300 alignment rules"
 msgid "Use H8/300 alignment rules."
 msgstr "Usa las reglas de alineación H8/300."
 
 #: config/h8300/h8300.opt:66
 msgid "Push extended registers on stack in monitor functions."
-msgstr ""
+msgstr "Empuja los registros extendidos a la pila en las funciones de monitorización."
 
 #: config/h8300/h8300.opt:70
 msgid "Do not push extended registers on stack in monitor functions."
-msgstr ""
+msgstr "No empuja los registros extendidos a la pila en las funciones de monitorización."
 
 #: config/pdp11/pdp11.opt:23
-#, fuzzy
-#| msgid "Generate code for an 11/10"
 msgid "Generate code for an 11/10."
 msgstr "Genera código para un 11/10."
 
 #: config/pdp11/pdp11.opt:27
-#, fuzzy
-#| msgid "Generate code for an 11/40"
 msgid "Generate code for an 11/40."
 msgstr "Genera código para un 11/40."
 
 #: config/pdp11/pdp11.opt:31
-#, fuzzy
-#| msgid "Generate code for an 11/45"
 msgid "Generate code for an 11/45."
 msgstr "Genera código para un 11/45."
 
 #: config/pdp11/pdp11.opt:35
-#, fuzzy
-#| msgid "Return floating-point results in ac0 (fr0 in Unix assembler syntax)"
 msgid "Return floating-point results in ac0 (fr0 in Unix assembler syntax)."
 msgstr "Devuelve los resultados de coma flotante en ac0 (fr0 en sintaxis de ensamblador Unix)."
 
 #: config/pdp11/pdp11.opt:39
-#, fuzzy
-#| msgid "Do not use inline patterns for copying memory"
 msgid "Do not use inline patterns for copying memory."
 msgstr "No usa patrones incluidos en línea para copiado de memoria."
 
 #: config/pdp11/pdp11.opt:43
-#, fuzzy
-#| msgid "Use inline patterns for copying memory"
 msgid "Use inline patterns for copying memory."
 msgstr "Usa patrones incluidos en línea para copiado de memoria."
 
 #: config/pdp11/pdp11.opt:47
-#, fuzzy
-#| msgid "Do not pretend that branches are expensive"
 msgid "Do not pretend that branches are expensive."
 msgstr "No pretende que las ramificaciones son costosas."
 
 #: config/pdp11/pdp11.opt:51
-#, fuzzy
-#| msgid "Pretend that branches are expensive"
 msgid "Pretend that branches are expensive."
 msgstr "Pretende que las ramificaciones son costosas."
 
 #: config/pdp11/pdp11.opt:55
-#, fuzzy
-#| msgid "Use the DEC assembler syntax"
 msgid "Use the DEC assembler syntax."
 msgstr "Usa la sintaxis de ensamblador DEC."
 
 #: config/pdp11/pdp11.opt:59
-#, fuzzy
-#| msgid "Use 32 bit float"
 msgid "Use 32 bit float."
 msgstr "Usa float de 32 bit."
 
 #: config/pdp11/pdp11.opt:63
-#, fuzzy
-#| msgid "Use 64 bit float"
 msgid "Use 64 bit float."
 msgstr "Usa float de 64 bit."
 
 #: config/pdp11/pdp11.opt:67 config/rs6000/rs6000.opt:177
 #: config/frv/frv.opt:158
-#, fuzzy
-#| msgid "Use hardware floating point"
 msgid "Use hardware floating point."
 msgstr "Usa coma flotante de hardware."
 
 #: config/pdp11/pdp11.opt:71
-#, fuzzy
-#| msgid "Use 16 bit int"
 msgid "Use 16 bit int."
 msgstr "Usa int de 16 bit."
 
 #: config/pdp11/pdp11.opt:75
-#, fuzzy
-#| msgid "Use 32 bit int"
 msgid "Use 32 bit int."
 msgstr "Usa int de 32 bit."
 
 #: config/pdp11/pdp11.opt:79 config/rs6000/rs6000.opt:173
-#, fuzzy
-#| msgid "Do not use hardware floating point"
 msgid "Do not use hardware floating point."
 msgstr "No usa coma flotante de hardware."
 
 #: config/pdp11/pdp11.opt:83
-#, fuzzy
-#| msgid "Target has split I&D"
 msgid "Target has split I&D."
 msgstr "El objetivo tiene I&D dividido."
 
 #: config/pdp11/pdp11.opt:87
-#, fuzzy
-#| msgid "Use UNIX assembler syntax"
 msgid "Use UNIX assembler syntax."
 msgstr "Usa sintaxis de ensamblador UNIX."
 
 #: config/xtensa/xtensa.opt:23
-#, fuzzy
-#| msgid "Use CONST16 instruction to load constants"
 msgid "Use CONST16 instruction to load constants."
 msgstr "Usa la instrucción CONST16 para cargar constantes."
 
 #: config/xtensa/xtensa.opt:27
-#, fuzzy
-#| msgid "Disable position-independent code (PIC) for use in OS kernel code"
 msgid "Disable position-independent code (PIC) for use in OS kernel code."
 msgstr "Desactiva el código independiente de posición (PIC) para su uso en código de núcleo de SO."
 
 #: config/xtensa/xtensa.opt:31
-#, fuzzy
-#| msgid "Use indirect CALLXn instructions for large programs"
 msgid "Use indirect CALLXn instructions for large programs."
 msgstr "Usa las instrucciones CALLXn indirectas para programas grandes."
 
 #: config/xtensa/xtensa.opt:35
-#, fuzzy
-#| msgid "Automatically align branch targets to reduce branch penalties"
 msgid "Automatically align branch targets to reduce branch penalties."
-msgstr "Alínea automáticamente los objetivos de las ramificaciones para reducir las penas de ramificación."
+msgstr "Alinea automáticamente los objetivos de las ramificaciones para reducir las penas de ramificación."
 
 #: config/xtensa/xtensa.opt:39
-#, fuzzy
-#| msgid "Intersperse literal pools with code in the text section"
 msgid "Intersperse literal pools with code in the text section."
 msgstr "Dispersa los conjuntos de literales con código en la sección de texto."
 
 #: config/xtensa/xtensa.opt:43
 msgid "Relax literals in assembler and place them automatically in the text section."
-msgstr ""
+msgstr "Relaja los literales en ensamblador y los coloca automáticamente en la sección de texto."
 
 #: config/xtensa/xtensa.opt:47
-#, fuzzy
-#| msgid "-mno-serialize-volatile\tDo not serialize volatile memory references with MEMW instructions"
 msgid "-mno-serialize-volatile\tDo not serialize volatile memory references with MEMW instructions."
 msgstr "-mno-serialize-volatile\tNo serializa las referencias a memoria volátil con instrucciones MEMW."
 
 #: config/i386/cygming.opt:23
-#, fuzzy
-#| msgid "Create console application"
 msgid "Create console application."
 msgstr "Crea una aplicación de consola."
 
 #: config/i386/cygming.opt:27
-#, fuzzy
-#| msgid "Generate code for a DLL"
 msgid "Generate code for a DLL."
 msgstr "Genera código para una DLL."
 
 #: config/i386/cygming.opt:31
-#, fuzzy
-#| msgid "Ignore dllimport for functions"
 msgid "Ignore dllimport for functions."
 msgstr "Ignora dllimport para funciones."
 
 #: config/i386/cygming.opt:35
-#, fuzzy
-#| msgid "Use Mingw-specific thread support"
 msgid "Use Mingw-specific thread support."
 msgstr "Usa el soporte de hilos específico de Mingw."
 
@@ -9005,132 +8859,94 @@ msgid "Set Windows defines."
 msgstr "Establece las definiciones de Windows."
 
 #: config/i386/cygming.opt:43
-#, fuzzy
-#| msgid "Create GUI application"
 msgid "Create GUI application."
 msgstr "Crea una aplicación con interfaz gráfica de usuario (GUI)."
 
 #: config/i386/cygming.opt:47 config/i386/interix.opt:32
-#, fuzzy
-#| msgid "Use the GNU extension to the PE format for aligned common data"
 msgid "Use the GNU extension to the PE format for aligned common data."
 msgstr "Usa la extensión GNU para el formato PE para los datos comunes alineados."
 
 #: config/i386/cygming.opt:51
-#, fuzzy
-#| msgid "Compile code that relies on Cygwin DLL wrappers to support C++ operator new/delete replacement"
 msgid "Compile code that relies on Cygwin DLL wrappers to support C++ operator new/delete replacement."
 msgstr "Compila código que depende de las envolturas DLL de Cygwin para admitir el reemplazo de los operadores de C++ new/delete."
 
 #: config/i386/cygming.opt:58
 msgid "Put relocated read-only data into .data section."
-msgstr ""
+msgstr "Pone los datos de solo lectura reubicados en la sección .data."
 
 #: config/i386/mingw.opt:29
-#, fuzzy
-#| msgid "Warn about none ISO msvcrt scanf/printf width extensions"
 msgid "Warn about none ISO msvcrt scanf/printf width extensions."
-msgstr "Avisa sobre extensiones de anchura scanf/printf msvcrt que no son ISO."
+msgstr "Advierte de extensiones de anchura scanf/printf msvcrt que no son ISO."
 
 #: config/i386/mingw.opt:33
 msgid "For nested functions on stack executable permission is set."
 msgstr "Se establece el permiso ejecutable para las funciones anidadas en la pila."
 
 #: config/i386/mingw-w64.opt:23
-#, fuzzy
-#| msgid "Use unicode startup and define UNICODE macro"
 msgid "Use unicode startup and define UNICODE macro."
 msgstr "Usa el inicio de unicode y define la macro UNICODE."
 
 #: config/i386/i386.opt:182
-#, fuzzy
-#| msgid "sizeof(long double) is 16"
 msgid "sizeof(long double) is 16."
 msgstr "sizeof(long double) es 16."
 
 #: config/i386/i386.opt:186 config/i386/i386.opt:354
-#, fuzzy
-#| msgid "Use hardware fp"
 msgid "Use hardware fp."
 msgstr "Usa fp de hardware."
 
 #: config/i386/i386.opt:190
-#, fuzzy
-#| msgid "sizeof(long double) is 12"
 msgid "sizeof(long double) is 12."
 msgstr "sizeof(long double) es 12."
 
 #: config/i386/i386.opt:194
-#, fuzzy
-#| msgid "Use 128-bit long double"
 msgid "Use 80-bit long double."
-msgstr "Usa long doubles de 128 bits."
+msgstr "Usa long doubles de 80 bits."
 
 #: config/i386/i386.opt:198 config/s390/s390.opt:130
 #: config/sparc/long-double-switch.opt:27 config/alpha/alpha.opt:102
 #, fuzzy
-#| msgid "Use 64-bit long double"
+#| msgid "Use 80-bit long double."
 msgid "Use 64-bit long double."
-msgstr "Usa long doubles de 64 bits."
+msgstr "Usa long doubles de 80 bits."
 
 #: config/i386/i386.opt:202 config/s390/s390.opt:126
 #: config/sparc/long-double-switch.opt:23 config/alpha/alpha.opt:98
-#, fuzzy
-#| msgid "Use 128-bit long double"
 msgid "Use 128-bit long double."
 msgstr "Usa long doubles de 128 bits."
 
 #: config/i386/i386.opt:206 config/sh/sh.opt:209
-#, fuzzy
-#| msgid "Reserve space for outgoing arguments in the function prologue"
 msgid "Reserve space for outgoing arguments in the function prologue."
 msgstr "Reserva espacio para los argumentos de salida en el prólogo de la función."
 
 #: config/i386/i386.opt:210
-#, fuzzy
-#| msgid "Align some doubles on dword boundary"
 msgid "Align some doubles on dword boundary."
-msgstr "Alínea algunos doubles en límites de dword."
+msgstr "Alinea algunos doubles en límites de dword."
 
 #: config/i386/i386.opt:214
-#, fuzzy
-#| msgid "Function starts are aligned to this power of 2"
 msgid "Function starts are aligned to this power of 2."
 msgstr "Los inicios de las funciones se alinean a esta potencia de 2."
 
 #: config/i386/i386.opt:218
-#, fuzzy
-#| msgid "Jump targets are aligned to this power of 2"
 msgid "Jump targets are aligned to this power of 2."
 msgstr "Los objetivos de salto se alinean a esta potencia de 2."
 
 #: config/i386/i386.opt:222
-#, fuzzy
-#| msgid "Loop code aligned to this power of 2"
 msgid "Loop code aligned to this power of 2."
 msgstr "El código de ciclo se alinea a esta potencia de 2."
 
 #: config/i386/i386.opt:226
-#, fuzzy
-#| msgid "Align destination of the string operations"
 msgid "Align destination of the string operations."
-msgstr "Alínea el destino de las operaciones de cadenas."
+msgstr "Alinea el destino de las operaciones de cadenas."
 
 #: config/i386/i386.opt:230
-#, fuzzy
-#| msgid "Do not tune writable data alignment"
 msgid "Use the given data alignment."
-msgstr "No ajusta la alineación de los datos modificables."
+msgstr "Usa la alineación de los datos dada."
 
 #: config/i386/i386.opt:234
-#, fuzzy
-#| msgid "Known TLS dialects (for use with the -mtls-dialect= option):"
 msgid "Known data alignment choices (for use with the -malign-data= option):"
-msgstr "Dialectos TLS conocidos (para usar con la opción -mtls-dialect=):"
+msgstr "Las opciones conocidas para alineamiento de datos (para usar con la opción -malign-data=):"
 
 #: config/i386/i386.opt:251
-#, fuzzy
-#| msgid "Use given assembler dialect"
 msgid "Use given assembler dialect."
 msgstr "Usa el dialecto de ensamblador dado."
 
@@ -9139,20 +8955,14 @@ msgid "Known assembler dialects (for use with the -masm-dialect= option):"
 msgstr "Dialectos de ensamblador conocidos (para uso con la opción -masm-dialect=):"
 
 #: config/i386/i386.opt:265
-#, fuzzy
-#| msgid "Branches are this expensive (1-5, arbitrary units)"
 msgid "Branches are this expensive (1-5, arbitrary units)."
 msgstr "Las ramificaciones son así de caras (1-5, unidades arbitrarias)."
 
 #: config/i386/i386.opt:269
-#, fuzzy
-#| msgid "Data greater than given threshold will go into .ldata section in x86-64 medium model"
 msgid "Data greater than given threshold will go into .ldata section in x86-64 medium model."
 msgstr "Los datos más grandes que el límite dado irán a la sección .ldata en el modeolo medium del x86-64."
 
 #: config/i386/i386.opt:273
-#, fuzzy
-#| msgid "Use given x86-64 code model"
 msgid "Use given x86-64 code model."
 msgstr "Usa el modelo de código del x86-64 dado."
 
@@ -9297,7 +9107,7 @@ msgstr "Usa las convenciones de paso de registro SSE para los modos SF y DF."
 #, fuzzy
 #| msgid "Realign stack in prologue"
 msgid "Realign stack in prologue."
-msgstr "Realínea la pila en el prólogo."
+msgstr "Realinea la pila en el prólogo."
 
 #: config/i386/i386.opt:442
 #, fuzzy
@@ -9405,7 +9215,7 @@ msgstr ""
 #, fuzzy
 #| msgid "Do dispatch scheduling if processor is bdver1 or bdver2 and Haifa scheduling"
 msgid "Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4"
-msgstr "Despacha al planificador si el procesador es bdver1 o bdver2 y la calendarización es Haifa"
+msgstr "Despacha al planificador si el procesador es bdver1 o bdver2 y la planificación es Haifa"
 
 #: config/i386/i386.opt:582
 msgid "Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer."
@@ -9950,7 +9760,7 @@ msgstr "Usa convenciones de llamada transportable."
 #, fuzzy
 #| msgid "Specify CPU for scheduling purposes.  Valid arguments are 700, 7100, 7100LC, 7200, 7300, and 8000"
 msgid "Specify CPU for scheduling purposes.  Valid arguments are 700, 7100, 7100LC, 7200, 7300, and 8000."
-msgstr "Especifica el CPU por razones de calendarización.  Los argumentos válidos son 700, 7100, 7100LC, 7200, 7300, y 8000."
+msgstr "Especifica el CPU por razones de planificación.  Los argumentos válidos son 700, 7100, 7100LC, 7200, 7300, y 8000."
 
 #: config/pa/pa.opt:132 config/frv/frv.opt:215
 #, fuzzy
@@ -10343,7 +10153,7 @@ msgstr "Compila para el m32r."
 #, fuzzy
 #| msgid "Align all loops to 32 byte boundary"
 msgid "Align all loops to 32 byte boundary."
-msgstr "Alínea todos los bucles al límite de 32 byte."
+msgstr "Alinea todos los bucles al límite de 32 byte."
 
 #: config/m32r/m32r.opt:50
 #, fuzzy
@@ -10727,7 +10537,7 @@ msgstr "Almacena nombres de función en el código objeto."
 #, fuzzy
 #| msgid "Permit scheduling of a function's prologue sequence"
 msgid "Permit scheduling of a function's prologue sequence."
-msgstr "Permite la calendarización de un secuencia de los prólogos de función."
+msgstr "Permite la planificación de un secuencia de los prólogos de función."
 
 #: config/arm/arm.opt:179 config/rs6000/rs6000.opt:248
 #, fuzzy
@@ -11484,7 +11294,7 @@ msgstr "Argumentos válidos para -malign-:"
 #, fuzzy
 #| msgid "Specify scheduling priority for dispatch slot restricted insns"
 msgid "Specify scheduling priority for dispatch slot restricted insns."
-msgstr "Especifica la prioridad de calendarización para despachar insns restringidos por ranuras."
+msgstr "Especifica la prioridad de planificación para despachar insns restringidos por ranuras."
 
 #: config/rs6000/rs6000.opt:504
 #, fuzzy
@@ -11536,7 +11346,7 @@ msgstr ""
 #, fuzzy
 #| msgid "Align destination of the string operations"
 msgid "Allow sign extension in fusion operations."
-msgstr "Alínea el destino de las operaciones de cadenas."
+msgstr "Alinea el destino de las operaciones de cadenas."
 
 #: config/rs6000/rs6000.opt:562
 msgid "Use/do not use vector and scalar instructions added in ISA 2.07."
@@ -11658,7 +11468,7 @@ msgstr "Selecciona el método para el manejo de sdata."
 #, fuzzy
 #| msgid "Align to the base type of the bit-field"
 msgid "Align to the base type of the bit-field."
-msgstr "Alínea al tipo base del campo de bit."
+msgstr "Alinea al tipo base del campo de bit."
 
 #: config/rs6000/sysv4.opt:57 config/rs6000/sysv4.opt:61
 #, fuzzy
@@ -13286,7 +13096,7 @@ msgstr "Especifica el CPU para propósitos de generación de código."
 
 #: config/iq2000/iq2000.opt:47
 msgid "Specify CPU for scheduling purposes."
-msgstr "Especifica el CPU para propósitos de calendarización."
+msgstr "Especifica el CPU para propósitos de planificación."
 
 #: config/iq2000/iq2000.opt:51
 msgid "Known IQ2000 CPUs (for use with the -mcpu= option):"
@@ -13390,13 +13200,13 @@ msgstr "No ajusta la alineación del código y de datos de sólo lectura."
 #, fuzzy
 #| msgid "Align code and data to 32 bits"
 msgid "Align code and data to 32 bits."
-msgstr "Alínea código y datos a 32 bits."
+msgstr "Alinea código y datos a 32 bits."
 
 #: config/cris/cris.opt:133
 #, fuzzy
 #| msgid "Don't align items in code or data"
 msgid "Don't align items in code or data."
-msgstr "No alínea los elementos en el código o los datos."
+msgstr "No alinea los elementos en el código o los datos."
 
 #: config/cris/cris.opt:142
 #, fuzzy
@@ -13744,7 +13554,7 @@ msgstr "Permite evitar cut2 en SH5."
 #, fuzzy
 #| msgid "Align doubles at 64-bit boundaries"
 msgid "Align doubles at 64-bit boundaries."
-msgstr "Alínea doubles en límites de 64-bit."
+msgstr "Alinea doubles en límites de 64-bit."
 
 #: config/sh/sh.opt:257
 #, fuzzy
@@ -13970,7 +13780,7 @@ msgstr "Habilita el Coprocesador MeP con registros de 64-bit."
 #, fuzzy
 #| msgid "Enable IVC2 scheduling"
 msgid "Enable IVC2 scheduling."
-msgstr "Activa la calendarización IVC2."
+msgstr "Activa la planificación IVC2."
 
 #: config/mep/mep.opt:71
 #, fuzzy
@@ -15548,25 +15358,25 @@ msgstr ""
 #, fuzzy
 #| msgid "Align the start of functions"
 msgid "Align the start of functions."
-msgstr "Alínea el inicio de las funciones."
+msgstr "Alinea el inicio de las funciones."
 
 #: common.opt:899
 #, fuzzy
 #| msgid "Align labels which are only reached by jumping"
 msgid "Align labels which are only reached by jumping."
-msgstr "Alínea las etiquetas que solamente se alcanzan saltando."
+msgstr "Alinea las etiquetas que solamente se alcanzan saltando."
 
 #: common.opt:906
 #, fuzzy
 #| msgid "Align all labels"
 msgid "Align all labels."
-msgstr "Alínea todas las etiquetas."
+msgstr "Alinea todas las etiquetas."
 
 #: common.opt:913
 #, fuzzy
 #| msgid "Align the start of loops"
 msgid "Align the start of loops."
-msgstr "Alínea el inicio de los bucles."
+msgstr "Alinea el inicio de los bucles."
 
 #: common.opt:936
 #, fuzzy
@@ -16427,13 +16237,13 @@ msgstr "-fmessage-length=<número>\tLimita los diagnósticos a <número> caracte
 #, fuzzy
 #| msgid "Perform SMS based modulo scheduling before the first scheduling pass"
 msgid "Perform SMS based modulo scheduling before the first scheduling pass."
-msgstr "Realiza la calendarización SMS basada en módulo antes del primer paso de calendarización."
+msgstr "Realiza la planificación SMS basada en módulo antes del primer paso de calendarización."
 
 #: common.opt:1747
 #, fuzzy
 #| msgid "Perform SMS based modulo scheduling with register moves allowed"
 msgid "Perform SMS based modulo scheduling with register moves allowed."
-msgstr "Realiza la calendarización módulo basada en SMS con movimientos permitidos de registros."
+msgstr "Realiza la planificación módulo basada en SMS con movimientos permitidos de registros."
 
 #: common.opt:1751
 #, fuzzy
@@ -16457,7 +16267,7 @@ msgstr "Usa la eliminación de almacenamiento muerto de RTL."
 #, fuzzy
 #| msgid "Enable/Disable the traditional scheduling in loops that already passed modulo scheduling"
 msgid "Enable/Disable the traditional scheduling in loops that already passed modulo scheduling."
-msgstr "Activa/Desactiva la calendarización tradicional en bucles que ya pasaron la calendarización módulo."
+msgstr "Activa/Desactiva la planificación tradicional en bucles que ya pasaron la calendarización módulo."
 
 #: common.opt:1767
 #, fuzzy
@@ -16693,7 +16503,7 @@ msgstr ""
 #, fuzzy
 #| msgid "Enable register pressure sensitive insn scheduling"
 msgid "Relief of register pressure through live range shrinkage."
-msgstr "Activa la calendarización de insn sensible a la presión de registros."
+msgstr "Activa la planificación de insn sensible a la presión de registros."
 
 #: common.opt:1962
 #, fuzzy
@@ -16751,13 +16561,13 @@ msgstr "Desactiva las optimizaciones que asumen la conducta de un FP que redonde
 #, fuzzy
 #| msgid "Enable scheduling across basic blocks"
 msgid "Enable scheduling across basic blocks."
-msgstr "Activa la calendarización entre bloques básicos."
+msgstr "Activa la planificación entre bloques básicos."
 
 #: common.opt:2011
 #, fuzzy
 #| msgid "Enable register pressure sensitive insn scheduling"
 msgid "Enable register pressure sensitive insn scheduling."
-msgstr "Activa la calendarización de insn sensible a la presión de registros."
+msgstr "Activa la planificación de insn sensible a la presión de registros."
 
 #: common.opt:2015
 #, fuzzy
@@ -16787,7 +16597,7 @@ msgstr "-fsched-verbose=<número>\tEstablece el nivel de detalle del planificado
 #, fuzzy
 #| msgid "If scheduling post reload, do superblock scheduling"
 msgid "If scheduling post reload, do superblock scheduling."
-msgstr "Si se calendariza después de la recarga, hace la calendarización de superbloque."
+msgstr "Si se calendariza después de la recarga, hace la planificación de superbloque."
 
 #: common.opt:2039
 #, fuzzy
@@ -16805,25 +16615,25 @@ msgstr "Recalendariza las instrucciones después del alojamiento de registros."
 #, fuzzy
 #| msgid "Schedule instructions using selective scheduling algorithm"
 msgid "Schedule instructions using selective scheduling algorithm."
-msgstr "Calendariza instrucciones usando el algoritmo de calendarización selectivo."
+msgstr "Calendariza instrucciones usando el algoritmo de planificación selectivo."
 
 #: common.opt:2054
 #, fuzzy
 #| msgid "Run selective scheduling after reload"
 msgid "Run selective scheduling after reload."
-msgstr "Ejecuta la calendarización selectiva después de recargar."
+msgstr "Ejecuta la planificación selectiva después de recargar."
 
 #: common.opt:2058
 #, fuzzy
 #| msgid "Perform software pipelining of inner loops during selective scheduling"
 msgid "Perform software pipelining of inner loops during selective scheduling."
-msgstr "Realiza el `pipelining' de software de los bucles internos durante la calendarización selectiva."
+msgstr "Realiza el `pipelining' de software de los bucles internos durante la planificación selectiva."
 
 #: common.opt:2062
 #, fuzzy
 #| msgid "Perform software pipelining of outer loops during selective scheduling"
 msgid "Perform software pipelining of outer loops during selective scheduling."
-msgstr "Realiza el `pipelining' de software de los bucles externos durante la calendarización selectiva."
+msgstr "Realiza el `pipelining' de software de los bucles externos durante la planificación selectiva."
 
 #: common.opt:2066
 #, fuzzy
@@ -16839,7 +16649,7 @@ msgstr ""
 #, fuzzy
 #| msgid "Allow premature scheduling of queued insns"
 msgid "Allow premature scheduling of queued insns."
-msgstr "Permite la calendarización prematura de insns encoladas."
+msgstr "Permite la planificación prematura de insns encoladas."
 
 #: common.opt:2080
 #, fuzzy
@@ -16851,13 +16661,13 @@ msgstr "-fsched-stalled-insns=<número>\tEstablece el número de insns encoladas
 #, fuzzy
 #| msgid "Set dependence distance checking in premature scheduling of queued insns"
 msgid "Set dependence distance checking in premature scheduling of queued insns."
-msgstr "Establece la revisión de distancia de dependencias en la calendarización prematura de insns encoladas."
+msgstr "Establece la revisión de distancia de dependencias en la planificación prematura de insns encoladas."
 
 #: common.opt:2092
 #, fuzzy
 #| msgid "-fsched-stalled-insns-dep=<number>\tSet dependence distance checking in premature scheduling of queued insns"
 msgid "-fsched-stalled-insns-dep=<number>\tSet dependence distance checking in premature scheduling of queued insns."
-msgstr "-fsched-stalled-insns-dep=<número>\tEstablece la revisión de distancia de dependencias en la calendarización prematura de insns encoladas."
+msgstr "-fsched-stalled-insns-dep=<número>\tEstablece la revisión de distancia de dependencias en la planificación prematura de insns encoladas."
 
 #: common.opt:2096
 #, fuzzy
@@ -22813,7 +22623,7 @@ msgstr "-fname-mangling-version ya no tiene soporte"
 #: toplev.c:1316
 #, gcc-internal-format
 msgid "instruction scheduling not supported on this target machine"
-msgstr "no se admite la calendarización de instrucciones en este objetivo"
+msgstr "no se admite la planificación de instrucciones en este objetivo"
 
 #: toplev.c:1320
 #, gcc-internal-format
@@ -31375,7 +31185,7 @@ msgstr "la generación de instrucciones Probables a Ramificar está activada, pe
 #, fuzzy, gcc-internal-format
 #| msgid "instruction scheduling not supported on this target machine"
 msgid "CDX instructions are only supported with R2 architecture"
-msgstr "no se admite la calendarización de instrucciones en este objetivo"
+msgstr "no se admite la planificación de instrucciones en este objetivo"
 
 #: config/nios2/nios2.c:1383
 #, fuzzy, gcc-internal-format
@@ -63748,10 +63558,10 @@ msgstr "se crea un selector para el método %qE que no existe"
 #~ msgstr "Especifica el rango de registros a convertir en fijos"
 
 #~ msgid "If set, data speculative instructions will be chosen for schedule only if there are no other choices at the moment "
-#~ msgstr "Si está definido, se escogerán las instrucciones especulativas de datos para calendarización sólo si no hay otras opciones por el momento"
+#~ msgstr "Si está definido, se escogerán las instrucciones especulativas de datos para planificación sólo si no hay otras opciones por el momento"
 
 #~ msgid "If set, control speculative instructions will be chosen for schedule only if there are no other choices at the moment "
-#~ msgstr "Si está definido, se escogerán el control especulativo de instrucciones para calendarización sólo si no hay otras opciones por el momento"
+#~ msgstr "Si está definido, se escogerán el control especulativo de instrucciones para planificación sólo si no hay otras opciones por el momento"
 
 #~ msgid "Ignored (obsolete)"
 #~ msgstr "Se descarta (obsoleto)"
@@ -63812,7 +63622,7 @@ msgstr "se crea un selector para el método %qE que no existe"
 #~ msgstr "Soporte para el ABI Green Hills"
 
 #~ msgid "Specify CPU for scheduling purposes"
-#~ msgstr "Especifica el CPU para propósitos de calendarización"
+#~ msgstr "Especifica el CPU para propósitos de planificación"
 
 #~ msgid "Specify which type of AE to target. This option sets the mul-type and byte-access."
 #~ msgstr "Especifica a qué tipo de AE se apunta. Esta opción establece el tipo muly el acceso a byte."
@@ -66103,7 +65913,7 @@ msgstr "se crea un selector para el método %qE que no existe"
 #~ msgstr "faltan argumentos para \"-%s\""
 
 #~ msgid "If scheduling post reload, do trace scheduling"
-#~ msgstr "Si se calendariza después de la recarga, hace trazado de calendarización"
+#~ msgstr "Si se calendariza después de la recarga, hace trazado de planificación"
 
 #~ msgid "(Each undeclared identifier is reported only once"
 #~ msgstr "(Cada identificador sin declarar solamente se reporta una vez"
@@ -68112,7 +67922,7 @@ msgstr "se crea un selector para el método %qE que no existe"
 #~ "\n"
 
 #~ msgid "Maximum number of loops to perform swing modulo scheduling on (mainly for debugging)"
-#~ msgstr "Número máximo de bucles que realizan calendarización de cambio de módulo en (principalmente para depuración)"
+#~ msgstr "Número máximo de bucles que realizan planificación de cambio de módulo en (principalmente para depuración)"
 
 #~ msgid "Given N calls and V call-clobbered vars in a function.  Use .GLOBAL_VAR if NxV is larger than this limit"
 #~ msgstr "Dadas N llamadas y V variables sobreescritas por llamada en una función.  Use .GLOBAL_VAR si NxV es mayor que este límite"
@@ -69518,7 +69328,7 @@ msgstr "se crea un selector para el método %qE que no existe"
 #~ msgstr "No establece las definiciones de Windows"
 
 #~ msgid "Align doubles on word boundary"
-#~ msgstr "Alínea doubles en límites de word"
+#~ msgstr "Alinea doubles en límites de word"
 
 #~ msgid "Uninitialized locals in .data"
 #~ msgstr "Locales sin inicializar en .data"
@@ -69533,7 +69343,7 @@ msgstr "se crea un selector para el método %qE que no existe"
 #~ msgstr "No genera sin, cos, sqrt para FPU"
 
 #~ msgid "Do not align destination of the string operations"
-#~ msgstr "No alínea destino de las operaciones de cadenas"
+#~ msgstr "No alinea destino de las operaciones de cadenas"
 
 #~ msgid "Do not inline all known string operations"
 #~ msgstr "No convierte a inline todas las operaciones de cadenas conocidas"
@@ -69800,7 +69610,7 @@ msgstr "se crea un selector para el método %qE que no existe"
 #~ msgstr "Especifica sí/no si se utiliza la coma flotante en los GPRs"
 
 #~ msgid "Don't align to the base type of the bit-field"
-#~ msgstr "No alínea al tipo base del campo de bit"
+#~ msgstr "No alinea al tipo base del campo de bit"
 
 #~ msgid "Assume that unaligned accesses are handled by the system"
 #~ msgstr "Asume que los accesos sin alinear son manejados por el sistema"
-- 
cgit v1.1


From f13d510e361b10bc0c83e211671f158c6665cd8e Mon Sep 17 00:00:00 2001
From: Kelvin Nilsen <kelvin@gcc.gnu.org>
Date: Thu, 1 Dec 2016 22:52:07 +0000
Subject: re PR target/78577 (Fix define_insn operand types for vexturhlx,
 vexturhrx, vextuwlx, and vextuwrx patterns)

gcc/ChangeLog:

2016-12-01  Kelvin Nilsen  <kelvin@gcc.gnu.org>

	PR target/78577
	* config/rs6000/vsx.md (vextuhlx): Revise mode of operand 2.
	(vextuhrx): Likewise.
	(vextuwlx): Likewise.
	(vextuwrx): Likewise.

From-SVN: r243141
---
 gcc/ChangeLog            | 8 ++++++++
 gcc/config/rs6000/vsx.md | 8 ++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b23481f..7f9dd0e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-01  Kelvin Nilsen  <kelvin@gcc.gnu.org>
+
+	PR target/78577
+	* config/rs6000/vsx.md (vextuhlx): Revise mode of operand 2.
+	(vextuhrx): Likewise.
+	(vextuwlx): Likewise.
+	(vextuwrx): Likewise.
+
 2016-12-01  David Malcolm  <dmalcolm@redhat.com>
 
 	* dwarf2out.c (dwarf2out_c_finalize): Reset early_dwarf and
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 01d275d..1801bc0 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3648,7 +3648,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
 	 [(match_operand:SI 1 "register_operand" "r")
-	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
+	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
 	 UNSPEC_VEXTUHLX))]
   "TARGET_P9_VECTOR"
   "vextuhlx %0,%1,%2"
@@ -3659,7 +3659,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
 	 [(match_operand:SI 1 "register_operand" "r")
-	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
+	  (match_operand:V8HI 2 "altivec_register_operand" "v")]
 	 UNSPEC_VEXTUHRX))]
   "TARGET_P9_VECTOR"
   "vextuhrx %0,%1,%2"
@@ -3670,7 +3670,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
 	 [(match_operand:SI 1 "register_operand" "r")
-	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
+	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
 	 UNSPEC_VEXTUWLX))]
   "TARGET_P9_VECTOR"
   "vextuwlx %0,%1,%2"
@@ -3681,7 +3681,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
 	 [(match_operand:SI 1 "register_operand" "r")
-	  (match_operand:V16QI 2 "altivec_register_operand" "v")]
+	  (match_operand:V4SI 2 "altivec_register_operand" "v")]
 	 UNSPEC_VEXTUWRX))]
   "TARGET_P9_VECTOR"
   "vextuwrx %0,%1,%2"
-- 
cgit v1.1


From b55e6680dec27409fe0e6dc800500564f1a06b53 Mon Sep 17 00:00:00 2001
From: Ma Jiang <ma.jiang@zte.com.cn>
Date: Thu, 1 Dec 2016 23:02:51 +0000
Subject: acx.m4: Change "tail +16c" to "tail -c +17".

	* config/acx.m4: Change "tail +16c" to "tail -c +17".
	* configure: Regenerated.

From-SVN: r243142
---
 ChangeLog     | 5 +++++
 config/acx.m4 | 2 +-
 configure     | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index a3320f1..bd2ad55 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Ma Jiang  <ma.jiang@zte.com.cn>
+
+	* config/acx.m4: Change "tail +16c" to "tail -c +17".
+	* configure: Regenerated.
+
 2016-12-01  Matthias Klose  <doko@ubuntu.com>
 
 	* configure.ac: Don't use pkg-config to check for bdw-gc.
diff --git a/config/acx.m4 b/config/acx.m4
index 9ff31eb..ab42972 100644
--- a/config/acx.m4
+++ b/config/acx.m4
@@ -404,7 +404,7 @@ AC_DEFUN([ACX_PROG_CMP_IGNORE_INITIAL],
 [AC_CACHE_CHECK([how to compare bootstrapped objects], gcc_cv_prog_cmp_skip,
 [ echo abfoo >t1
   echo cdfoo >t2
-  gcc_cv_prog_cmp_skip='tail +16c $$f1 > tmp-foo1; tail +16c $$f2 > tmp-foo2; cmp tmp-foo1 tmp-foo2'
+  gcc_cv_prog_cmp_skip='tail -c +17 $$f1 > tmp-foo1; tail -c +17 $$f2 > tmp-foo2; cmp tmp-foo1 tmp-foo2'
   if cmp t1 t2 2 2 > /dev/null 2>&1; then
     if cmp t1 t2 1 1 > /dev/null 2>&1; then
       :
diff --git a/configure b/configure
index fb79e73..b6389e4 100755
--- a/configure
+++ b/configure
@@ -5273,7 +5273,7 @@ if test "${gcc_cv_prog_cmp_skip+set}" = set; then :
 else
    echo abfoo >t1
   echo cdfoo >t2
-  gcc_cv_prog_cmp_skip='tail +16c $$f1 > tmp-foo1; tail +16c $$f2 > tmp-foo2; cmp tmp-foo1 tmp-foo2'
+  gcc_cv_prog_cmp_skip='tail -c +17 $$f1 > tmp-foo1; tail -c +17 $$f2 > tmp-foo2; cmp tmp-foo1 tmp-foo2'
   if cmp t1 t2 2 2 > /dev/null 2>&1; then
     if cmp t1 t2 1 1 > /dev/null 2>&1; then
       :
-- 
cgit v1.1


From 90ee6453b254cd77819bc30d9d13a3c9828fd1c5 Mon Sep 17 00:00:00 2001
From: Elizebeth Punnoose <elizebeth.punnoose@hpe.com>
Date: Thu, 1 Dec 2016 23:11:35 +0000
Subject: re PR fortran/77505 (Negative character length not treated as LEN=0)

2016-12-01  Elizebeth Punnoose  <elizebeth.punnoose@hpe.com>

	PR fortran/77505
	* trans-array.c (trans_array_constructor): Treat negative character
	length as LEN = 0.


2016-12-01  Elizebeth Punnoose  <elizebeth.punnoose@hpe.com>

	PR fortran/77505
	* gfortran.dg/char_length_20.f90: New test.
	* gfortran.dg/char_length_21.f90: Ditto.

From-SVN: r243143
---
 gcc/fortran/ChangeLog                        |  6 ++++++
 gcc/fortran/trans-array.c                    | 25 +++++++++++++++++++++++++
 gcc/testsuite/ChangeLog                      |  6 ++++++
 gcc/testsuite/gfortran.dg/char_length_20.f90 | 13 +++++++++++++
 gcc/testsuite/gfortran.dg/char_length_21.f90 | 11 +++++++++++
 5 files changed, 61 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/char_length_20.f90
 create mode 100644 gcc/testsuite/gfortran.dg/char_length_21.f90

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index d410392..20a9f2e 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-01  Elizebeth Punnoose  <elizebeth.punnoose@hpe.com>
+
+	PR fortran/77505
+	* trans-array.c (trans_array_constructor): Treat negative character
+	length as LEN = 0.
+
 2016-12-01  Steven G. Kargl  <kargl@gcc.gnu.org>
 
 	PR fortran/78279
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index 803462a4..ac90a4b 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -2226,6 +2226,8 @@ trans_array_constructor (gfc_ss * ss, locus * where)
   gfc_ss_info *ss_info;
   gfc_expr *expr;
   gfc_ss *s;
+  tree neg_len;
+  char *msg;
 
   /* Save the old values for nested checking.  */
   old_first_len = first_len;
@@ -2271,6 +2273,29 @@ trans_array_constructor (gfc_ss * ss, locus * where)
 	  gfc_conv_expr_type (&length_se, expr->ts.u.cl->length,
 			      gfc_charlen_type_node);
 	  ss_info->string_length = length_se.expr;
+
+	  /* Check if the character length is negative.  If it is, then
+	     set LEN = 0.  */
+	  neg_len = fold_build2_loc (input_location, LT_EXPR,
+				     boolean_type_node, ss_info->string_length,
+				     build_int_cst (gfc_charlen_type_node, 0));
+	  /* Print a warning if bounds checking is enabled.  */
+	  if (gfc_option.rtcheck & GFC_RTCHECK_BOUNDS)
+	    {
+	      msg = xasprintf ("Negative character length treated as LEN = 0");
+	      gfc_trans_runtime_check (false, true, neg_len, &length_se.pre,
+				       where, msg);
+	      free (msg);
+	    }
+
+	  ss_info->string_length
+	    = fold_build3_loc (input_location, COND_EXPR,
+			       gfc_charlen_type_node, neg_len,
+			       build_int_cst (gfc_charlen_type_node, 0),
+			       ss_info->string_length);
+	  ss_info->string_length = gfc_evaluate_now (ss_info->string_length,
+						     &length_se.pre);
+
 	  gfc_add_block_to_block (&outer_loop->pre, &length_se.pre);
 	  gfc_add_block_to_block (&outer_loop->post, &length_se.post);
 	}
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 321a48a..dcbdf56 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-01  Elizebeth Punnoose  <elizebeth.punnoose@hpe.com>
+
+	PR fortran/77505
+	* gfortran.dg/char_length_20.f90: New test.
+	* gfortran.dg/char_length_21.f90: Ditto.
+
 2016-12-01  Steven G. Kargl  <kargl@gcc.gnu.org>
 
 	PR fortran/78279
diff --git a/gcc/testsuite/gfortran.dg/char_length_20.f90 b/gcc/testsuite/gfortran.dg/char_length_20.f90
new file mode 100644
index 0000000..38a19c5
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/char_length_20.f90
@@ -0,0 +1,13 @@
+! { dg-do run }
+! { dg-options "-fcheck=bounds" }
+program rabbithole
+   implicit none
+   character(len=:), allocatable :: text_block(:)
+   integer i, ii
+   character(len=10) :: cten='abcdefghij'
+   character(len=20) :: ctwenty='abcdefghijabcdefghij'
+   ii = -6
+   text_block=[ character(len=ii) :: cten, ctwenty ]
+   if (any(len_trim(text_block) /= 0)) call abort
+end program rabbithole
+! { dg-output "At line 10 of file .*char_length_20.f90.*Fortran runtime warning: Negative character length treated as LEN = 0" }
diff --git a/gcc/testsuite/gfortran.dg/char_length_21.f90 b/gcc/testsuite/gfortran.dg/char_length_21.f90
new file mode 100644
index 0000000..76b7e8e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/char_length_21.f90
@@ -0,0 +1,11 @@
+! { dg-do run }
+program rabbithole
+   implicit none
+   character(len=:), allocatable :: text_block(:)
+   integer i, ii
+   character(len=10) :: cten='abcdefghij'
+   character(len=20) :: ctwenty='abcdefghijabcdefghij'
+   ii = -6
+   text_block = [character(len=ii) :: cten, ctwenty]
+   if (any(len_trim(text_block) /= 0)) call abort
+end program rabbithole
-- 
cgit v1.1


From 0e81719703bf681533220f3629cc4f1a24110778 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 2 Dec 2016 00:15:57 +0100
Subject: re PR tree-optimization/78586 (Wrong code caused by
 printf-return-value)

	PR tree-optimization/78586
	* gimple-ssa-sprintf.c (format_integer): Don't handle NOP_EXPR,
	CONVERT_EXPR or COMPONENT_REF here.  Formatting fix.  For
	SSA_NAME_DEF_STMT with NOP_EXPR only change argtype if the rhs1's
	type is INTEGER_TYPE or POINTER_TYPE.

From-SVN: r243145
---
 gcc/ChangeLog            |  8 ++++++++
 gcc/gimple-ssa-sprintf.c | 30 ++++++++++++------------------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7f9dd0e..c3170c0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/78586
+	* gimple-ssa-sprintf.c (format_integer): Don't handle NOP_EXPR,
+	CONVERT_EXPR or COMPONENT_REF here.  Formatting fix.  For
+	SSA_NAME_DEF_STMT with NOP_EXPR only change argtype if the rhs1's
+	type is INTEGER_TYPE or POINTER_TYPE.
+
 2016-12-01  Kelvin Nilsen  <kelvin@gcc.gnu.org>
 
 	PR target/78577
diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
index 99a635a..e86c4dc 100644
--- a/gcc/gimple-ssa-sprintf.c
+++ b/gcc/gimple-ssa-sprintf.c
@@ -968,24 +968,13 @@ format_integer (const conversion_spec &spec, tree arg)
     }
   else if (TREE_CODE (TREE_TYPE (arg)) == INTEGER_TYPE
 	   || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
-    {
-      /* Determine the type of the provided non-constant argument.  */
-      if (TREE_CODE (arg) == NOP_EXPR)
-	arg = TREE_OPERAND (arg, 0);
-      else if (TREE_CODE (arg) == CONVERT_EXPR)
-	arg = TREE_OPERAND (arg, 0);
-      if (TREE_CODE (arg) == COMPONENT_REF)
-	arg = TREE_OPERAND (arg, 1);
-
-      argtype = TREE_TYPE (arg);
-    }
+    /* Determine the type of the provided non-constant argument.  */
+    argtype = TREE_TYPE (arg);
   else
-    {
-      /* Don't bother with invalid arguments since they likely would
-	 have already been diagnosed, and disable any further checking
-	 of the format string by returning [-1, -1].  */
-      return fmtresult ();
-    }
+    /* Don't bother with invalid arguments since they likely would
+       have already been diagnosed, and disable any further checking
+       of the format string by returning [-1, -1].  */
+    return fmtresult ();
 
   fmtresult res;
 
@@ -1059,7 +1048,12 @@ format_integer (const conversion_spec &spec, tree arg)
 		}
 
 	      if (code == NOP_EXPR)
-		argtype = TREE_TYPE (gimple_assign_rhs1 (def));
+		{
+		  tree type = TREE_TYPE (gimple_assign_rhs1 (def));
+		  if (TREE_CODE (type) == INTEGER_TYPE
+		      || TREE_CODE (type) == POINTER_TYPE)
+		    argtype = type;
+		}
 	    }
 	}
     }
-- 
cgit v1.1


From f3adbf9e9355aff989bd5b0c6ba227cc01bf57ec Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Fri, 2 Dec 2016 00:16:20 +0000
Subject: Daily bump.

From-SVN: r243150
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 73f27d5..b720a20 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20161201
+20161202
-- 
cgit v1.1


From b06496b1617ffcaec0e82fd4cca9eae5e0301cd5 Mon Sep 17 00:00:00 2001
From: Jeff Law <law@redhat.com>
Date: Thu, 1 Dec 2016 23:40:57 -0700
Subject: * tree-ssa-threadedge.c 
 (record_temporary_equivalences_from_stmts_at_dest): Avoid temporary 
 propagation of operands if there are no operands.

From-SVN: r243152
---
 gcc/ChangeLog             | 6 ++++++
 gcc/tree-ssa-threadedge.c | 7 ++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c3170c0..75881ee 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-01  Jeff Law  <law@redhat.com>
+
+	* tree-ssa-threadedge.c
+	(record_temporary_equivalences_from_stmts_at_dest): Avoid temporary
+	propagation of operands if there are no operands.
+
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR tree-optimization/78586
diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
index 534292c..3fdd59e 100644
--- a/gcc/tree-ssa-threadedge.c
+++ b/gcc/tree-ssa-threadedge.c
@@ -328,9 +328,10 @@ record_temporary_equivalences_from_stmts_at_dest (edge e,
 	     SSA_NAME_VALUE in addition to its own lattice.  */
 	  cached_lhs = gimple_fold_stmt_to_constant_1 (stmt,
 						       threadedge_valueize);
-          if (!cached_lhs
-              || (TREE_CODE (cached_lhs) != SSA_NAME
-                  && !is_gimple_min_invariant (cached_lhs)))
+          if (NUM_SSA_OPERANDS (stmt, SSA_OP_ALL_USES) != 0
+	      && (!cached_lhs
+                  || (TREE_CODE (cached_lhs) != SSA_NAME
+                      && !is_gimple_min_invariant (cached_lhs))))
 	    {
 	      /* We're going to temporarily copy propagate the operands
 		 and see if that allows us to simplify this statement.  */
-- 
cgit v1.1


From 84b0769e335819050ecdd86301a5f5d41fa5df8b Mon Sep 17 00:00:00 2001
From: Maxim Ostapenko <m.ostapenko@samsung.com>
Date: Fri, 2 Dec 2016 07:39:27 +0000
Subject: Add support for ASan odr_indicator.

config/

        * bootstrap-asan.mk: Replace LSAN_OPTIONS=detect_leaks=0 with
        ASAN_OPTIONS=detect_leaks=0:use_odr_indicator=1.

gcc/

        * asan.c (asan_global_struct): Refactor.
        (create_odr_indicator): New function.
        (asan_needs_odr_indicator_p): Likewise.
        (is_odr_indicator): Likewise.
        (asan_add_global): Introduce odr_indicator_ptr. Pass it into global's
        constructor.
        (asan_protect_global): Do not protect odr indicators.

gcc/c-family/

	* c-attribs.c (asan odr indicator): New attribute.
	(handle_asan_odr_indicator_attribute): New function.

gcc/testsuite/

        * c-c++-common/asan/no-redundant-odr-indicators-1.c: New test.

From-SVN: r243153
---
 config/ChangeLog                                   |  5 ++
 config/bootstrap-asan.mk                           |  2 +-
 gcc/ChangeLog                                      | 10 +++
 gcc/asan.c                                         | 87 ++++++++++++++++++++--
 gcc/c-family/ChangeLog                             |  5 ++
 gcc/c-family/c-attribs.c                           | 14 ++++
 gcc/testsuite/ChangeLog                            |  4 +
 .../asan/no-redundant-odr-indicators-1.c           | 17 +++++
 8 files changed, 135 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/asan/no-redundant-odr-indicators-1.c

diff --git a/config/ChangeLog b/config/ChangeLog
index 8dcb483..a823d21 100644
--- a/config/ChangeLog
+++ b/config/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
+
+	* bootstrap-asan.mk: Replace LSAN_OPTIONS=detect_leaks=0 with
+	ASAN_OPTIONS=detect_leaks=0:use_odr_indicator=1.
+
 2016-12-01  Matthias Klose  <doko@ubuntu.com>
 
 	* pkg.m4: Remove.
diff --git a/config/bootstrap-asan.mk b/config/bootstrap-asan.mk
index 70baaf9..e73d4c2 100644
--- a/config/bootstrap-asan.mk
+++ b/config/bootstrap-asan.mk
@@ -1,7 +1,7 @@
 # This option enables -fsanitize=address for stage2 and stage3.
 
 # Suppress LeakSanitizer in bootstrap.
-export LSAN_OPTIONS="detect_leaks=0"
+export ASAN_OPTIONS=detect_leaks=0:use_odr_indicator=1
 
 STAGE2_CFLAGS += -fsanitize=address
 STAGE3_CFLAGS += -fsanitize=address
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 75881ee..ef080c7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
+
+	* asan.c (asan_global_struct): Refactor.
+	(create_odr_indicator): New function.
+	(asan_needs_odr_indicator_p): Likewise.
+	(is_odr_indicator): Likewise.
+	(asan_add_global): Introduce odr_indicator_ptr. Pass it into global's
+	constructor.
+	(asan_protect_global): Do not protect odr indicators.
+
 2016-12-01  Jeff Law  <law@redhat.com>
 
 	* tree-ssa-threadedge.c
diff --git a/gcc/asan.c b/gcc/asan.c
index cb5d615..5af9547 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -1388,6 +1388,16 @@ asan_needs_local_alias (tree decl)
   return DECL_WEAK (decl) || !targetm.binds_local_p (decl);
 }
 
+/* Return true if DECL, a global var, is an artificial ODR indicator symbol
+   therefore doesn't need protection.  */
+
+static bool
+is_odr_indicator (tree decl)
+{
+  return (DECL_ARTIFICIAL (decl)
+	  && lookup_attribute ("asan odr indicator", DECL_ATTRIBUTES (decl)));
+}
+
 /* Return true if DECL is a VAR_DECL that should be protected
    by Address Sanitizer, by appending a red zone with protected
    shadow memory after it and aligning it to at least
@@ -1436,7 +1446,8 @@ asan_protect_global (tree decl)
       || ASAN_RED_ZONE_SIZE * BITS_PER_UNIT > MAX_OFILE_ALIGNMENT
       || !valid_constant_size_p (DECL_SIZE_UNIT (decl))
       || DECL_ALIGN_UNIT (decl) > 2 * ASAN_RED_ZONE_SIZE
-      || TREE_TYPE (decl) == ubsan_get_source_location_type ())
+      || TREE_TYPE (decl) == ubsan_get_source_location_type ()
+      || is_odr_indicator (decl))
     return false;
 
   rtl = DECL_RTL (decl);
@@ -2266,14 +2277,15 @@ asan_dynamic_init_call (bool after_p)
 static tree
 asan_global_struct (void)
 {
-  static const char *field_names[8]
+  static const char *field_names[]
     = { "__beg", "__size", "__size_with_redzone",
-	"__name", "__module_name", "__has_dynamic_init", "__location", "__odr_indicator"};
-  tree fields[8], ret;
-  int i;
+	"__name", "__module_name", "__has_dynamic_init", "__location",
+	"__odr_indicator" };
+  tree fields[ARRAY_SIZE (field_names)], ret;
+  unsigned i;
 
   ret = make_node (RECORD_TYPE);
-  for (i = 0; i < 8; i++)
+  for (i = 0; i < ARRAY_SIZE (field_names); i++)
     {
       fields[i]
 	= build_decl (UNKNOWN_LOCATION, FIELD_DECL,
@@ -2295,6 +2307,63 @@ asan_global_struct (void)
   return ret;
 }
 
+/* Create and return odr indicator symbol for DECL.
+   TYPE is __asan_global struct type as returned by asan_global_struct.  */
+
+static tree
+create_odr_indicator (tree decl, tree type)
+{
+  char *name;
+  tree uptr = TREE_TYPE (DECL_CHAIN (TYPE_FIELDS (type)));
+  tree decl_name
+    = (HAS_DECL_ASSEMBLER_NAME_P (decl) ? DECL_ASSEMBLER_NAME (decl)
+					: DECL_NAME (decl));
+  /* DECL_NAME theoretically might be NULL.  Bail out with 0 in this case.  */
+  if (decl_name == NULL_TREE)
+    return build_int_cst (uptr, 0);
+  size_t len = strlen (IDENTIFIER_POINTER (decl_name)) + sizeof ("__odr_asan_");
+  name = XALLOCAVEC (char, len);
+  snprintf (name, len, "__odr_asan_%s", IDENTIFIER_POINTER (decl_name));
+#ifndef NO_DOT_IN_LABEL
+  name[sizeof ("__odr_asan") - 1] = '.';
+#elif !defined(NO_DOLLAR_IN_LABEL)
+  name[sizeof ("__odr_asan") - 1] = '$';
+#endif
+  tree var = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (name),
+			 char_type_node);
+  TREE_ADDRESSABLE (var) = 1;
+  TREE_READONLY (var) = 0;
+  TREE_THIS_VOLATILE (var) = 1;
+  DECL_GIMPLE_REG_P (var) = 0;
+  DECL_ARTIFICIAL (var) = 1;
+  DECL_IGNORED_P (var) = 1;
+  TREE_STATIC (var) = 1;
+  TREE_PUBLIC (var) = 1;
+  DECL_VISIBILITY (var) = DECL_VISIBILITY (decl);
+  DECL_VISIBILITY_SPECIFIED (var) = DECL_VISIBILITY_SPECIFIED (decl);
+
+  TREE_USED (var) = 1;
+  tree ctor = build_constructor_va (TREE_TYPE (var), 1, NULL_TREE,
+				    build_int_cst (unsigned_type_node, 0));
+  TREE_CONSTANT (ctor) = 1;
+  TREE_STATIC (ctor) = 1;
+  DECL_INITIAL (var) = ctor;
+  DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("asan odr indicator"),
+				     NULL, DECL_ATTRIBUTES (var));
+  make_decl_rtl (var);
+  varpool_node::finalize_decl (var);
+  return fold_convert (uptr, build_fold_addr_expr (var));
+}
+
+/* Return true if DECL, a global var, might be overridden and needs
+   an additional odr indicator symbol.  */
+
+static bool
+asan_needs_odr_indicator_p (tree decl)
+{
+  return !DECL_ARTIFICIAL (decl) && !DECL_WEAK (decl) && TREE_PUBLIC (decl);
+}
+
 /* Append description of a single global DECL into vector V.
    TYPE is __asan_global struct type as returned by asan_global_struct.  */
 
@@ -2335,6 +2404,9 @@ asan_add_global (tree decl, tree type, vec<constructor_elt, va_gc> *v)
       assemble_alias (refdecl, DECL_ASSEMBLER_NAME (decl));
     }
 
+  tree odr_indicator_ptr
+    = (asan_needs_odr_indicator_p (decl) ? create_odr_indicator (decl, type)
+					 : build_int_cst (uptr, 0));
   CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE,
 			  fold_convert (const_ptr_type_node,
 					build_fold_addr_expr (refdecl)));
@@ -2382,8 +2454,7 @@ asan_add_global (tree decl, tree type, vec<constructor_elt, va_gc> *v)
   else
     locptr = build_int_cst (uptr, 0);
   CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, locptr);
-  /* TODO: support ODR indicators.  */
-  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, build_int_cst (uptr, 0));
+  CONSTRUCTOR_APPEND_ELT (vinner, NULL_TREE, odr_indicator_ptr);
   init = build_constructor (type, vinner);
   CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init);
 }
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index 183493d..5890798 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
+
+	* c-attribs.c (asan odr indicator): New attribute.
+	(handle_asan_odr_indicator_attribute): New function.
+
 2016-11-26  Prathamesh Kulkarni  <prathamesh.kulkarni@linaro.org>
 
 	* c-common.c (c_common_nodes_and_builtins): Remove initialization of
diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 964efe9..f5adade 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -57,6 +57,8 @@ static tree handle_no_address_safety_analysis_attribute (tree *, tree, tree,
 							 int, bool *);
 static tree handle_no_sanitize_undefined_attribute (tree *, tree, tree, int,
 						    bool *);
+static tree handle_asan_odr_indicator_attribute (tree *, tree, tree, int,
+						 bool *);
 static tree handle_stack_protect_attribute (tree *, tree, tree, int, bool *);
 static tree handle_noinline_attribute (tree *, tree, tree, int, bool *);
 static tree handle_noclone_attribute (tree *, tree, tree, int, bool *);
@@ -292,6 +294,9 @@ const struct attribute_spec c_common_attribute_table[] =
   { "no_sanitize_undefined",  0, 0, true, false, false,
 			      handle_no_sanitize_undefined_attribute,
 			      false },
+  { "asan odr indicator",     0, 0, true, false, false,
+			      handle_asan_odr_indicator_attribute,
+			      false },
   { "warning",		      1, 1, true,  false, false,
 			      handle_error_attribute, false },
   { "error",		      1, 1, true,  false, false,
@@ -591,6 +596,15 @@ handle_no_sanitize_undefined_attribute (tree *node, tree name, tree, int,
   return NULL_TREE;
 }
 
+/* Handle an "asan odr indicator" attribute; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+handle_asan_odr_indicator_attribute (tree *, tree, tree, int, bool *)
+{
+  return NULL_TREE;
+}
+
 /* Handle a "stack_protect" attribute; arguments as in
    struct attribute_spec.handler.  */
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index dcbdf56..c000e07 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
+
+	* c-c++-common/asan/no-redundant-odr-indicators-1.c: New test.
+
 2016-12-01  Elizebeth Punnoose  <elizebeth.punnoose@hpe.com>
 
 	PR fortran/77505
diff --git a/gcc/testsuite/c-c++-common/asan/no-redundant-odr-indicators-1.c b/gcc/testsuite/c-c++-common/asan/no-redundant-odr-indicators-1.c
new file mode 100644
index 0000000..9231264
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/asan/no-redundant-odr-indicators-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-skip-if "" { *-*-* } { "*" } { "-O0" } } */
+
+/* Local variables should not have odr indicators.  */
+static int a = 2;
+/* Thread local variables should not have odr indicators.  */
+__thread int b = 3;
+/* Externally visible  variables should have odr indicators.  */
+int c = 1;
+
+int main () {
+    return 0;
+}
+
+/* { dg-final { scan-assembler-not "odr_asan\[\.\$\]a" } } */
+/* { dg-final { scan-assembler-not "odr_asan\[\.\$\]b" } } */
+/* { dg-final { scan-assembler "odr_asan\[\.\$\]c" } } */
-- 
cgit v1.1


From a6a2b532f9c4e92277e390febc8c07f773becb1b Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:21:43 +0000
Subject: S/390: Fix vector all/any cc modes.

This fixes a problem with the vector compares producing CC mode
results.

The instructions produce condition code modes which can be either
interpreted to check an ALL elements or an ANY element result.  As the
modes where used before they could not be inverted by the middle-end
by inverting the comparison code (e.g. eq to ne).  The result usually
was just wrong.

In fact inverting a comparison code on an CCVALL mode would require to
also change the mode to CCVANY but this cannot be done easily in the
middle-end.  With this patch the meaning of an ALL cc mode only refers
to the not-inverted comparison code (e.g. eq, gt, ge).  With that
change inverting the comparison code matches a not operation on the
condition code mask again.

Bootstrapped and regression tested on s390 and s390x.

Bye,

-Andreas-

gcc/testsuite/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/vector/vec-scalar-cmp-1.c: Fix and harden the
	pattern checks.
	* gcc.target/s390/zvector/vec-cmp-1.c: New test.

gcc/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390-modes.def (CCVEQANY, CCVH, CCVHANY, CCVHU)
	(CCVHUANY): Remove modes.
	(CCVIH, CCVIHU, CCVIALL, CCVIANY, CCVFALL, CCVFANY): Add modes and
	documentation.
	* config/s390/s390.c (s390_match_ccmode_set): Rename cc modes.
	(s390_expand_vec_compare_scalar): Pick one of the cc consumer
	modes.
	(s390_branch_condition_mask): Adjust to use the new cc consumer
	modes.  The new modes allow for proper reversal in the middle-end.
	(s390_expand_vec_compare_cc): Determine the proper cc producer and
	consumer modes for a comparison.
	* config/s390/s390.md: Rename CCVH to CCVIH and CCVHU to CCVIHU
	throughout the file.
	* config/s390/vx-builtins.md: Likewise.

From-SVN: r243154
---
 gcc/ChangeLog                                      |  17 ++
 gcc/config/s390/s390-modes.def                     |  72 ++++---
 gcc/config/s390/s390.c                             | 226 +++++++++++----------
 gcc/config/s390/s390.md                            |   2 +-
 gcc/config/s390/vx-builtins.md                     |  44 ++--
 gcc/testsuite/ChangeLog                            |   6 +
 .../gcc.target/s390/vector/vec-scalar-cmp-1.c      |  24 ++-
 gcc/testsuite/gcc.target/s390/zvector/vec-cmp-1.c  | 173 ++++++++++++++++
 8 files changed, 388 insertions(+), 176 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-cmp-1.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ef080c7..2d55409 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,20 @@
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390-modes.def (CCVEQANY, CCVH, CCVHANY, CCVHU)
+	(CCVHUANY): Remove modes.
+	(CCVIH, CCVIHU, CCVIALL, CCVIANY, CCVFALL, CCVFANY): Add modes and
+	documentation.
+	* config/s390/s390.c (s390_match_ccmode_set): Rename cc modes.
+	(s390_expand_vec_compare_scalar): Pick one of the cc consumer
+	modes.
+	(s390_branch_condition_mask): Adjust to use the new cc consumer
+	modes.  The new modes allow for proper reversal in the middle-end.
+	(s390_expand_vec_compare_cc): Determine the proper cc producer and
+	consumer modes for a comparison.
+	* config/s390/s390.md: Rename CCVH to CCVIH and CCVHU to CCVIHU
+	throughout the file.
+	* config/s390/vx-builtins.md: Likewise.
+
 2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
 
 	* asan.c (asan_global_struct): Refactor.
diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
index 69235b6..15ff903 100644
--- a/gcc/config/s390/s390-modes.def
+++ b/gcc/config/s390/s390-modes.def
@@ -84,22 +84,6 @@ Requested mode            -> Destination CC register mode
 CCS, CCU, CCT, CCSR, CCUR -> CCZ
 CCA                       -> CCAP, CCAN
 
-Vector comparison modes
-
-CCVEQ  	  EQ	  - 	       - 	   NE	      (VCEQ)
-CCVEQANY  EQ	  EQ	       - 	   NE	      (VCEQ)
-
-CCVH	  GT	  - 	       - 	   LE	      (VCH)
-CCVHANY	  GT	  GT	       - 	   LE	      (VCH)
-CCVHU	  GTU	  -  	       -  	   LEU	      (VCHL)
-CCVHUANY  GTU	  GTU	       -  	   LEU	      (VCHL)
-
-CCVFH	  GT	  -   	       -   	   UNLE	      (VFCH)
-CCVFHANY  GT	  GT	       -   	   UNLE	      (VFCH)
-CCVFHE	  GE	  -   	       -   	   UNLT	      (VFCHE)
-CCVFHEANY GE	  GE	       -   	   UNLT	      (VFCHE)
-
-
 
 
 *** Comments ***
@@ -169,14 +153,40 @@ The compare and swap instructions sets the condition code to 0/1 if the
 operands were equal/unequal. The CCZ1 mode ensures the result can be
 effectively placed into a register.
 
-
-CCV*
-
-The variants with and without ANY are generated by the same
-instructions and therefore are holding the same information.  However,
-when generating a condition code mask they require checking different
-bits of CC.  In that case the variants without ANY represent the
-results for *all* elements.
+CCVIH, CCVIHU, CCVFH, CCVFHE
+
+These are condition code modes used in instructions setting the
+condition code.  The mode determines which comparison to perform (H -
+high, HU - high unsigned, HE - high or equal) and whether it is a
+floating point comparison or not (I - int, F - float).
+
+The comparison operation to be performed needs to be encoded into the
+condition code mode since the comparison operator is not available in
+compare style patterns (set cc (compare (op0) (op1))).  So the
+condition code mode is the only information to determine the
+instruction to be used.
+
+CCVIALL, CCVIANY, CCVFALL, CCVFANY
+
+These modes are used in instructions reading the condition code.
+Opposed to the CC producer patterns the comparison operator is
+available.  Hence the comparison operation does not need to be part of
+the CC mode.  However, we still need to know whether CC has been
+generated by a float or an integer comparison in order to be able to
+invert the condition correctly (int: GT -> LE, float: GT -> UNLE).
+
+The ALL and ANY variants differ only in the usage of CC1 which
+indicates a mixed result across the vector elements.  Be aware that
+depending on the comparison code the ALL and ANY variants might
+actually refer to their opposite meaning.  I.e. while inverting the
+comparison in (EQ (reg:CCVIALL 33) (const_int 0)) results in (NE
+(reg:CCVIALL 33) (const_int 0)) it in fact describes an ANY comparison
+(inverting "all equal" should be "any not equal") However, the
+middle-end does invert only the comparison operator without touching
+the mode.
+Hence, the ALL/ANY in the mode names refer to the meaning in the
+context of EQ, GT, GE while for the inverted codes it actually means
+ANY/ALL.
 
 CCRAW
 
@@ -209,18 +219,18 @@ CC_MODE (CCT3);
 CC_MODE (CCRAW);
 
 CC_MODE (CCVEQ);
-CC_MODE (CCVEQANY);
 
-CC_MODE (CCVH);
-CC_MODE (CCVHANY);
-CC_MODE (CCVHU);
-CC_MODE (CCVHUANY);
+CC_MODE (CCVIH);
+CC_MODE (CCVIHU);
 
 CC_MODE (CCVFH);
-CC_MODE (CCVFHANY);
 CC_MODE (CCVFHE);
-CC_MODE (CCVFHEANY);
 
+CC_MODE (CCVIALL);
+CC_MODE (CCVIANY);
+
+CC_MODE (CCVFALL);
+CC_MODE (CCVFANY);
 
 /* Vector modes.  */
 
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index dc82fb6..445c147 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -1275,6 +1275,11 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode)
 
   gcc_assert (GET_CODE (set) == SET);
 
+  /* These modes are supposed to be used only in CC consumer
+     patterns.  */
+  gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
+	      && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
+
   if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
     return 1;
 
@@ -1293,8 +1298,8 @@ s390_match_ccmode_set (rtx set, machine_mode req_mode)
     case CCT2mode:
     case CCT3mode:
     case CCVEQmode:
-    case CCVHmode:
-    case CCVHUmode:
+    case CCVIHmode:
+    case CCVIHUmode:
     case CCVFHmode:
     case CCVFHEmode:
       if (req_mode != set_mode)
@@ -1752,14 +1757,20 @@ s390_expand_vec_compare_scalar (enum rtx_code *code, rtx cmp1, rtx cmp2,
       cmp2 = cmp1;
       cmp1 = tmp;
     }
-  *cc = gen_rtx_REG (cmp_mode, CC_REGNUM);
+
   emit_insn (gen_rtx_PARALLEL (VOIDmode,
 	       gen_rtvec (2,
-			  gen_rtx_SET (*cc,
+			  gen_rtx_SET (gen_rtx_REG (cmp_mode, CC_REGNUM),
 				       gen_rtx_COMPARE (cmp_mode, cmp1,
 							cmp2)),
 			  gen_rtx_CLOBBER (VOIDmode,
 					   gen_rtx_SCRATCH (V2DImode)))));
+
+  /* This is the cc reg how it will be used in the cc mode consumer.
+     It either needs to be CCVFALL or CCVFANY.  However, CC1 will
+     never be set by the scalar variants.  So it actually doesn't
+     matter which one we choose here.  */
+  *cc = gen_rtx_REG (CCVFALLmode, CC_REGNUM);
   return true;
 }
 
@@ -2021,92 +2032,63 @@ s390_branch_condition_mask (rtx code)
       break;
 
       /* Vector comparison modes.  */
-
-    case CCVEQmode:
-      switch (GET_CODE (code))
-	{
-	case EQ:        return CC0;
-	case NE:        return CC3;
-	default:        return -1;
-	}
-
-    case CCVEQANYmode:
-      switch (GET_CODE (code))
-	{
-	case EQ:        return CC0 | CC1;
-	case NE:        return CC3 | CC1;
-	default:        return -1;
-	}
-
-      /* Integer vector compare modes.  */
-
-    case CCVHmode:
-      switch (GET_CODE (code))
-	{
-	case GT:        return CC0;
-	case LE:        return CC3;
-	default:        return -1;
-	}
-
-    case CCVHANYmode:
-      switch (GET_CODE (code))
-	{
-	case GT:        return CC0 | CC1;
-	case LE:        return CC3 | CC1;
-	default:        return -1;
-	}
-
-    case CCVHUmode:
-      switch (GET_CODE (code))
-	{
-	case GTU:       return CC0;
-	case LEU:       return CC3;
-	default:        return -1;
-	}
-
-    case CCVHUANYmode:
-      switch (GET_CODE (code))
-	{
-	case GTU:       return CC0 | CC1;
-	case LEU:       return CC3 | CC1;
-	default:        return -1;
-	}
-
-      /* FP vector compare modes.  */
-
-    case CCVFHmode:
+      /* CC2 will never be set.  It however is part of the negated
+	 masks.  */
+    case CCVIALLmode:
       switch (GET_CODE (code))
 	{
-	case GT:        return CC0;
-	case UNLE:      return CC3;
+	case EQ:
+	case GTU:
+	case GT:
+	case GE:        return CC0;
+	  /* The inverted modes are in fact *any* modes.  */
+	case NE:
+	case LEU:
+	case LE:
+	case LT:        return CC3 | CC1 | CC2;
 	default:        return -1;
 	}
 
-    case CCVFHANYmode:
+    case CCVIANYmode:
       switch (GET_CODE (code))
 	{
-	case GT:        return CC0 | CC1;
-	case UNLE:      return CC3 | CC1;
+	case EQ:
+	case GTU:
+	case GT:
+	case GE:        return CC0 | CC1;
+	  /* The inverted modes are in fact *all* modes.  */
+	case NE:
+	case LEU:
+	case LE:
+	case LT:        return CC3 | CC2;
 	default:        return -1;
 	}
-
-    case CCVFHEmode:
+    case CCVFALLmode:
       switch (GET_CODE (code))
 	{
+	case EQ:
+	case GT:
 	case GE:        return CC0;
-	case UNLT:      return CC3;
+	  /* The inverted modes are in fact *any* modes.  */
+	case NE:
+	case UNLE:
+	case UNLT:      return CC3 | CC1 | CC2;
 	default:        return -1;
 	}
 
-    case CCVFHEANYmode:
+    case CCVFANYmode:
       switch (GET_CODE (code))
 	{
+	case EQ:
+	case GT:
 	case GE:        return CC0 | CC1;
-	case UNLT:      return CC3 | CC1;
+	  /* The inverted modes are in fact *all* modes.  */
+	case NE:
+	case UNLE:
+	case UNLT:      return CC3 | CC2;
 	default:        return -1;
 	}
 
-
     case CCRAWmode:
       switch (GET_CODE (code))
 	{
@@ -6256,13 +6238,15 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
 
 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
-   elements in CMP1 and CMP2 fulfill the comparison.  */
+   elements in CMP1 and CMP2 fulfill the comparison.
+   This function is only used to emit patterns for the vx builtins and
+   therefore only handles comparison codes required by the
+   builtins.  */
 void
 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
 			    rtx cmp1, rtx cmp2, bool all_p)
 {
-  enum rtx_code new_code = code;
-  machine_mode cmp_mode, full_cmp_mode, scratch_mode;
+  machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
   rtx tmp_reg = gen_reg_rtx (SImode);
   bool swap_p = false;
 
@@ -6270,53 +6254,71 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
     {
       switch (code)
 	{
-	case EQ:  cmp_mode = CCVEQmode; break;
-	case NE:  cmp_mode = CCVEQmode; break;
-	case GT:  cmp_mode = CCVHmode;  break;
-	case GE:  cmp_mode = CCVHmode;  new_code = LE; swap_p = true; break;
-	case LT:  cmp_mode = CCVHmode;  new_code = GT; swap_p = true; break;
-	case LE:  cmp_mode = CCVHmode;  new_code = LE; break;
-	case GTU: cmp_mode = CCVHUmode; break;
-	case GEU: cmp_mode = CCVHUmode; new_code = LEU; swap_p = true; break;
-	case LTU: cmp_mode = CCVHUmode; new_code = GTU; swap_p = true; break;
-	case LEU: cmp_mode = CCVHUmode; new_code = LEU; break;
-	default: gcc_unreachable ();
+	case EQ:
+	case NE:
+	  cc_producer_mode = CCVEQmode;
+	  break;
+	case GE:
+	case LT:
+	  code = swap_condition (code);
+	  swap_p = true;
+	  /* fallthrough */
+	case GT:
+	case LE:
+	  cc_producer_mode = CCVIHmode;
+	  break;
+	case GEU:
+	case LTU:
+	  code = swap_condition (code);
+	  swap_p = true;
+	  /* fallthrough */
+	case GTU:
+	case LEU:
+	  cc_producer_mode = CCVIHUmode;
+	  break;
+	default:
+	  gcc_unreachable ();
 	}
+
       scratch_mode = GET_MODE (cmp1);
+      /* These codes represent inverted CC interpretations.  Inverting
+	 an ALL CC mode results in an ANY CC mode and the other way
+	 around.  Invert the all_p flag here to compensate for
+	 that.  */
+      if (code == NE || code == LE || code == LEU)
+	all_p = !all_p;
+
+      cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
     }
-  else if (GET_MODE (cmp1) == V2DFmode)
+  else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
     {
+      bool inv_p = false;
+
       switch (code)
 	{
-	case EQ:   cmp_mode = CCVEQmode;  break;
-	case NE:   cmp_mode = CCVEQmode;  break;
-	case GT:   cmp_mode = CCVFHmode;  break;
-	case GE:   cmp_mode = CCVFHEmode; break;
-	case UNLE: cmp_mode = CCVFHmode;  break;
-	case UNLT: cmp_mode = CCVFHEmode; break;
-	case LT:   cmp_mode = CCVFHmode;  new_code = GT; swap_p = true; break;
-	case LE:   cmp_mode = CCVFHEmode; new_code = GE; swap_p = true; break;
+	case EQ:   cc_producer_mode = CCVEQmode;  break;
+	case NE:   cc_producer_mode = CCVEQmode;  inv_p = true; break;
+	case GT:   cc_producer_mode = CCVFHmode;  break;
+	case GE:   cc_producer_mode = CCVFHEmode; break;
+	case UNLE: cc_producer_mode = CCVFHmode;  inv_p = true; break;
+	case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
+	case LT:   cc_producer_mode = CCVFHmode;  code = GT; swap_p = true; break;
+	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
 	default: gcc_unreachable ();
 	}
-      scratch_mode = V2DImode;
+      scratch_mode = mode_for_vector (
+		       int_mode_for_mode (GET_MODE_INNER (GET_MODE (cmp1))),
+		       GET_MODE_NUNITS (GET_MODE (cmp1)));
+      gcc_assert (scratch_mode != BLKmode);
+
+      if (inv_p)
+	all_p = !all_p;
+
+      cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
     }
   else
     gcc_unreachable ();
 
-  if (!all_p)
-    switch (cmp_mode)
-      {
-      case CCVEQmode:  full_cmp_mode = CCVEQANYmode;  break;
-      case CCVHmode:   full_cmp_mode = CCVHANYmode;   break;
-      case CCVHUmode:  full_cmp_mode = CCVHUANYmode;  break;
-      case CCVFHmode:  full_cmp_mode = CCVFHANYmode;  break;
-      case CCVFHEmode: full_cmp_mode = CCVFHEANYmode; break;
-      default: gcc_unreachable ();
-      }
-  else
-    /* The modes without ANY match the ALL modes.  */
-    full_cmp_mode = cmp_mode;
-
   if (swap_p)
     {
       rtx tmp = cmp2;
@@ -6326,8 +6328,8 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
 
   emit_insn (gen_rtx_PARALLEL (VOIDmode,
 	       gen_rtvec (2, gen_rtx_SET (
-			       gen_rtx_REG (cmp_mode, CC_REGNUM),
-			       gen_rtx_COMPARE (cmp_mode, cmp1, cmp2)),
+			       gen_rtx_REG (cc_producer_mode, CC_REGNUM),
+			       gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
 			  gen_rtx_CLOBBER (VOIDmode,
 					   gen_rtx_SCRATCH (scratch_mode)))));
   emit_move_insn (target, const0_rtx);
@@ -6335,10 +6337,10 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
 
   emit_move_insn (target,
 		  gen_rtx_IF_THEN_ELSE (SImode,
-		    gen_rtx_fmt_ee (new_code, VOIDmode,
-				    gen_rtx_REG (full_cmp_mode, CC_REGNUM),
+		    gen_rtx_fmt_ee (code, VOIDmode,
+				    gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
 				    const0_rtx),
-		      target, tmp_reg));
+					tmp_reg, target));
 }
 
 /* Generate a vector comparison expression loading either elements of
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index a449b03..5844e28 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -782,7 +782,7 @@
 ; Used with VFCMP to expand part of the mnemonic
 ; For fp we have a mismatch: eq in the insn name - e in asm
 (define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
-(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVH "h") (CCVHU "hl") (CCVFH "h") (CCVFHE "he")])
+(define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")])
 
 ;; Subst pattern definitions
 (include "subst.md")
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index c6ac44c..51d022c 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -36,7 +36,7 @@
 			      (V1DF "DI") (V2DF "DI")])
 
 ; Condition code modes generated by int comparisons
-(define_mode_iterator VICMP [CCVEQ CCVH CCVHU])
+(define_mode_iterator VICMP [CCVEQ CCVIH CCVIHU])
 
 ; Comparisons supported by the vec_cmp* builtins
 (define_code_iterator intcmp [eq gt gtu ge geu lt ltu le leu])
@@ -1900,24 +1900,24 @@
 
 (define_expand "vec_cmph<VI_HW:mode>_cc"
   [(parallel
-    [(set (reg:CCVH CC_REGNUM)
-	  (compare:CCVH (match_operand:VI_HW 1 "register_operand" "v")
-			(match_operand:VI_HW 2 "register_operand" "v")))
+    [(set (reg:CCVIH CC_REGNUM)
+	  (compare:CCVIH (match_operand:VI_HW 1 "register_operand" "v")
+			 (match_operand:VI_HW 2 "register_operand" "v")))
      (set (match_operand:VI_HW 0 "register_operand" "=v")
 	  (gt:VI_HW (match_dup 1) (match_dup 2)))])
    (set (match_operand:SI 3 "memory_operand" "")
-	(unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))]
+	(unspec:SI [(reg:CCVIH CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_VX")
 
 (define_expand "vec_cmphl<VI_HW:mode>_cc"
   [(parallel
-    [(set (reg:CCVHU CC_REGNUM)
-	  (compare:CCVHU (match_operand:VI_HW 1 "register_operand" "v")
-			 (match_operand:VI_HW 2 "register_operand" "v")))
+    [(set (reg:CCVIHU CC_REGNUM)
+	  (compare:CCVIHU (match_operand:VI_HW 1 "register_operand" "v")
+			  (match_operand:VI_HW 2 "register_operand" "v")))
      (set (match_operand:VI_HW 0 "register_operand" "=v")
 	  (gtu:VI_HW (match_dup 1) (match_dup 2)))])
    (set (match_operand:SI 3 "memory_operand" "")
-	(unspec:SI [(reg:CCVHU CC_REGNUM)] UNSPEC_CC_TO_INT))]
+	(unspec:SI [(reg:CCVIHU CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_VX")
 
 
@@ -1932,9 +1932,9 @@
   [(set_attr "op_type" "VRR")])
 
 (define_insn "*vec_cmph<VI_HW:mode>_cc"
-  [(set (reg:CCVH CC_REGNUM)
-	(compare:CCVH (match_operand:VI_HW 0 "register_operand"  "v")
-		      (match_operand:VI_HW 1 "register_operand"  "v")))
+  [(set (reg:CCVIH CC_REGNUM)
+	(compare:CCVIH (match_operand:VI_HW 0 "register_operand"  "v")
+		       (match_operand:VI_HW 1 "register_operand"  "v")))
    (set (match_operand:VI_HW               2 "register_operand" "=v")
 	(gt:VI_HW (match_dup 0) (match_dup 1)))]
   "TARGET_VX"
@@ -1942,9 +1942,9 @@
   [(set_attr "op_type" "VRR")])
 
 (define_insn "*vec_cmphl<VI_HW:mode>_cc"
-  [(set (reg:CCVHU CC_REGNUM)
-	(compare:CCVHU (match_operand:VI_HW 0 "register_operand"  "v")
-		       (match_operand:VI_HW 1 "register_operand"  "v")))
+  [(set (reg:CCVIHU CC_REGNUM)
+	(compare:CCVIHU (match_operand:VI_HW 0 "register_operand"  "v")
+			(match_operand:VI_HW 1 "register_operand"  "v")))
    (set (match_operand:VI_HW                2 "register_operand" "=v")
 	(gtu:VI_HW (match_dup 0) (match_dup 1)))]
   "TARGET_VX"
@@ -1978,13 +1978,13 @@
 
 (define_expand "vec_cmphv2df_cc"
   [(parallel
-    [(set (reg:CCVH CC_REGNUM)
-	  (compare:CCVH (match_operand:V2DF 1 "register_operand"  "v")
-			(match_operand:V2DF 2 "register_operand"  "v")))
+    [(set (reg:CCVIH CC_REGNUM)
+	  (compare:CCVIH (match_operand:V2DF 1 "register_operand"  "v")
+			 (match_operand:V2DF 2 "register_operand"  "v")))
      (set (match_operand:V2DI 0 "register_operand" "=v")
 	  (gt:V2DI (match_dup 1) (match_dup 2)))])
    (set (match_operand:SI 3 "memory_operand" "")
-	(unspec:SI [(reg:CCVH CC_REGNUM)] UNSPEC_CC_TO_INT))]
+	(unspec:SI [(reg:CCVIH CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_VX")
 
 (define_expand "vec_cmphev2df_cc"
@@ -2010,9 +2010,9 @@
   [(set_attr "op_type" "VRR")])
 
 (define_insn "*vec_cmphv2df_cc"
-  [(set (reg:CCVH CC_REGNUM)
-	(compare:CCVH (match_operand:V2DF 0 "register_operand"  "v")
-		      (match_operand:V2DF 1 "register_operand"  "v")))
+  [(set (reg:CCVIH CC_REGNUM)
+	(compare:CCVIH (match_operand:V2DF 0 "register_operand"  "v")
+		       (match_operand:V2DF 1 "register_operand"  "v")))
    (set (match_operand:V2DI               2 "register_operand" "=v")
 	(gt:V2DI (match_dup 0) (match_dup 1)))]
   "TARGET_VX"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c000e07..2ecf8f9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* gcc.target/s390/vector/vec-scalar-cmp-1.c: Fix and harden the
+	pattern checks.
+	* gcc.target/s390/zvector/vec-cmp-1.c: New test.
+
 2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
 
 	* c-c++-common/asan/no-redundant-odr-indicators-1.c: New test.
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c b/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
index 5f63eda..46a261f 100644
--- a/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-scalar-cmp-1.c
@@ -1,16 +1,7 @@
 /* Check that we use the scalar variants of vector compares.  */
 
 /* { dg-do compile { target { s390*-*-* } } } */
-/* { dg-options "-O3 -mzarch -march=z13" } */
-
-/* { dg-final { scan-assembler-times "wfcedbs\t%v\[0-9\]*,%v0,%v2" 2 } } */
-/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v0,%v2" 1 } } */
-/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
-/* { dg-final { scan-assembler-times "wfchdbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
-/* { dg-final { scan-assembler-times "wfchedbs\t%v\[0-9\]*,%v2,%v0" 1 } } */
-/* { dg-final { scan-assembler-times "lochine" 5 } } */
-/* { dg-final { scan-assembler-times "lochino" 1 } } */
-
+/* { dg-options "-O3 -mzarch -march=z13 -fno-asynchronous-unwind-tables" } */
 
 int
 eq (double a, double b)
@@ -18,32 +9,45 @@ eq (double a, double b)
   return a == b;
 }
 
+/* { dg-final { scan-assembler "eq:\n\twfcedbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+
 int
 ne (double a, double b)
 {
   return a != b;
 }
 
+/* { dg-final { scan-assembler "ne:\n\twfcedbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochie\t%r2,0" } } */
+
 int
 gt (double a, double b)
 {
   return a > b;
 }
 
+/* { dg-final { scan-assembler "gt:\n\twfchdbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+
 int
 ge (double a, double b)
 {
   return a >= b;
 }
 
+/* { dg-final { scan-assembler "ge:\n\twfchedbs\t%v\[0-9\]*,%v0,%v2\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+
 int
 lt (double a, double b)
 {
   return a < b;
 }
 
+/* { dg-final { scan-assembler "lt:\n\twfchdbs\t%v\[0-9\]*,%v2,%v0\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+
 int
 le (double a, double b)
 {
   return a <= b;
 }
+
+/* { dg-final { scan-assembler "le:\n\twfchedbs\t%v\[0-9\]*,%v2,%v0\n\tlhi\t%r2,1\n\tlochine\t%r2,0" } } */
+
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-1.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-1.c
new file mode 100644
index 0000000..58bc39f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-1.c
@@ -0,0 +1,173 @@
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13 -mzvector -fno-asynchronous-unwind-tables" } */
+
+#include <vecintrin.h>
+
+int __attribute__((noinline,noclone))
+all_eq_double (vector double a, vector double b)
+{
+	return vec_all_eq (a, b);
+}
+/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ne_double (vector double a, vector double b)
+{
+	return vec_all_ne (a, b);
+}
+/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochile\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_gt_double (vector double a, vector double b)
+{
+	return vec_all_gt (a, b);
+}
+/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_lt_double (vector double a, vector double b)
+{
+	return vec_all_lt (a, b);
+}
+/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ge_double (vector double a, vector double b)
+{
+	return vec_all_ge (a, b);
+}
+/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_le_double (vector double a, vector double b)
+{
+	return vec_all_le (a, b);
+}
+/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_eq_double (vector double a, vector double b)
+{
+	return vec_any_eq (a, b);
+}
+/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ne_double (vector double a, vector double b)
+{
+	return vec_any_ne (a, b);
+}
+/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochie\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_gt_double (vector double a, vector double b)
+{
+	return vec_any_gt (a, b);
+}
+/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_lt_double (vector double a, vector double b)
+{
+	return vec_any_lt (a, b);
+}
+/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ge_double (vector double a, vector double b)
+{
+	return vec_any_ge (a, b);
+}
+/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_le_double (vector double a, vector double b)
+{
+	return vec_any_le (a, b);
+}
+/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_eq_int (vector int a, vector int b)
+{
+	return vec_all_eq (a, b);
+}
+/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ne_int (vector int a, vector int b)
+{
+	return vec_all_ne (a, b);
+}
+/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochile\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_gt_int (vector int a, vector int b)
+{
+	return vec_all_gt (a, b);
+}
+/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_lt_int (vector int a, vector int b)
+{
+	return vec_all_lt (a, b);
+}
+/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochine\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ge_int (vector int a, vector int b)
+{
+	return vec_all_ge (a, b);
+}
+/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochile\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+all_le_int (vector int a, vector int b)
+{
+	return vec_all_le (a, b);
+}
+/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochile\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_eq_int (vector int a, vector int b)
+{
+	return vec_any_eq (a, b);
+}
+/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ne_int (vector int a, vector int b)
+{
+	return vec_any_ne (a, b);
+}
+/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochie\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_gt_int (vector int a, vector int b)
+{
+	return vec_any_gt (a, b);
+}
+/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_lt_int (vector int a, vector int b)
+{
+	return vec_any_lt (a, b);
+}
+/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochinle\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ge_int (vector int a, vector int b)
+{
+	return vec_any_ge (a, b);
+}
+/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tlhi\t%r2,1\n\tlochie\t%r2,0 1 } } */
+
+int __attribute__((noinline,noclone))
+any_le_int (vector int a, vector int b)
+{
+	return vec_any_le (a, b);
+}
+/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tlhi\t%r2,1\n\tlochie\t%r2,0 1 } } */
+
-- 
cgit v1.1


From eca9803844ddf459d3f5992aa88353603f0cb731 Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:22:34 +0000
Subject: S/390: Merge compare of compare results

With this patch EQ and NE compares on CC mode reader patterns are
folded.  This allows using the result of the vec_all_* and vec_any_*
builtins directly in a conditional jump instruction as in the attached
testcase.

gcc/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390-protos.h (s390_reverse_condition): New
	prototype.
	* config/s390/s390.c (s390_canonicalize_comparison): Fold compares
	of CC mode values.
	(s390_reverse_condition): New function.
	* config/s390/s390.h (REVERSE_CC_MODE, REVERSE_CONDITION): Define
	target macros.

gcc/testsuite/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/zvector/vec-cmp-2.c: New test.

From-SVN: r243155
---
 gcc/ChangeLog                 | 10 ++++++++++
 gcc/config/s390/s390-protos.h |  1 +
 gcc/config/s390/s390.c        | 42 ++++++++++++++++++++++++++++++++++++++++++
 gcc/config/s390/s390.h        | 12 ++++++++++++
 gcc/testsuite/ChangeLog       |  4 ++++
 5 files changed, 69 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2d55409..d06661e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,15 @@
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
+	* config/s390/s390-protos.h (s390_reverse_condition): New
+	prototype.
+	* config/s390/s390.c (s390_canonicalize_comparison): Fold compares
+	of CC mode values.
+	(s390_reverse_condition): New function.
+	* config/s390/s390.h (REVERSE_CC_MODE, REVERSE_CONDITION): Define
+	target macros.
+
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
 	* config/s390/s390-modes.def (CCVEQANY, CCVH, CCVHANY, CCVHU)
 	(CCVHUANY): Remove modes.
 	(CCVIH, CCVIHU, CCVIALL, CCVIANY, CCVFALL, CCVFANY): Add modes and
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 7ae98d4..000a677 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -119,6 +119,7 @@ extern void s390_expand_atomic (machine_mode, enum rtx_code,
 extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
 extern void s390_expand_vec_compare (rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_compare_cc (rtx, enum rtx_code, rtx, rtx, bool);
+extern enum rtx_code s390_reverse_condition (machine_mode, enum rtx_code);
 extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx);
 extern void s390_expand_vec_init (rtx, rtx);
 extern rtx s390_return_addr_rtx (int, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 445c147..dab4f43 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -1722,6 +1722,31 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 	}
       tmp = *op0; *op0 = *op1; *op1 = tmp;
     }
+
+  /* A comparison result is compared against zero.  Replace it with
+     the (perhaps inverted) original comparison.
+     This probably should be done by simplify_relational_operation.  */
+  if ((*code == EQ || *code == NE)
+      && *op1 == const0_rtx
+      && COMPARISON_P (*op0)
+      && CC_REG_P (XEXP (*op0, 0)))
+    {
+      enum rtx_code new_code;
+
+      if (*code == EQ)
+	new_code = reversed_comparison_code_parts (GET_CODE (*op0),
+						   XEXP (*op0, 0),
+						   XEXP (*op1, 0), NULL);
+      else
+	new_code = GET_CODE (*op0);
+
+      if (new_code != UNKNOWN)
+	{
+	  *code = new_code;
+	  *op1 = XEXP (*op0, 1);
+	  *op0 = XEXP (*op0, 0);
+	}
+    }
 }
 
 /* Helper function for s390_emit_compare.  If possible emit a 64 bit
@@ -6343,6 +6368,23 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
 					tmp_reg, target));
 }
 
+/* Invert the comparison CODE applied to a CC mode.  This is only safe
+   if we know whether there result was created by a floating point
+   compare or not.  For the CCV modes this is encoded as part of the
+   mode.  */
+enum rtx_code
+s390_reverse_condition (machine_mode mode, enum rtx_code code)
+{
+  /* Reversal of FP compares takes care -- an ordered compare
+     becomes an unordered compare and vice versa.  */
+  if (mode == CCVFALLmode || mode == CCVFANYmode)
+    return reverse_condition_maybe_unordered (code);
+  else if (mode == CCVIALLmode || mode == CCVIANYmode)
+    return reverse_condition (code);
+  else
+    gcc_unreachable ();
+}
+
 /* Generate a vector comparison expression loading either elements of
    THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
    and CMP_OP2.  */
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 6be4d34..1d6d7b2 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -513,6 +513,18 @@ extern const char *s390_host_detect_local_cpu (int argc, const char **argv);
 #define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		        \
   s390_cannot_change_mode_class ((FROM), (TO), (CLASS))
 
+/* We can reverse a CC mode safely if we know whether it comes from a
+   floating point compare or not.  With the vector modes it is encoded
+   as part of the mode.
+   FIXME: It might make sense to do this for other cc modes as well.  */
+#define REVERSIBLE_CC_MODE(MODE)				\
+  ((MODE) == CCVIALLmode || (MODE) == CCVIANYmode		\
+   || (MODE) == CCVFALLmode || (MODE) == CCVFANYmode)
+
+/* Given a condition code and a mode, return the inverse condition.  */
+#define REVERSE_CONDITION(CODE, MODE) s390_reverse_condition (MODE, CODE)
+
+
 /* Register classes.  */
 
 /* We use the following register classes:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2ecf8f9..dc269ef 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
+	* gcc.target/s390/zvector/vec-cmp-2.c: New test.
+
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
 	* gcc.target/s390/vector/vec-scalar-cmp-1.c: Fix and harden the
 	pattern checks.
 	* gcc.target/s390/zvector/vec-cmp-1.c: New test.
-- 
cgit v1.1


From f00bc26c002e81a23e84c8c359a97c88b6b95447 Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@gcc.gnu.org>
Date: Fri, 2 Dec 2016 08:23:19 +0000
Subject: Add testcase missing in last commit.

gcc/testsuite/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/zvector/vec-cmp-2.c: New test.

From-SVN: r243156
---
 gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c | 203 ++++++++++++++++++++++
 1 file changed, 203 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c

diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
new file mode 100644
index 0000000..0711f9c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-cmp-2.c
@@ -0,0 +1,203 @@
+/* Similiar to vec-cmp-1.c but requires that
+   s390_canonicalize_comparison is able to merge the the two nested
+   compares.  */
+
+/* { dg-do compile { target { s390*-*-* } } } */
+/* { dg-options "-O3 -mzarch -march=z13 -mzvector -fno-asynchronous-unwind-tables" } */
+
+#include <vecintrin.h>
+
+extern void foo (void);
+
+int __attribute__((noinline,noclone))
+all_eq_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ne_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_gt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_lt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ge_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_le_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_all_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+any_eq_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_eq_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ne_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ne_double:\n\tvfcedbs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+
+int __attribute__((noinline,noclone))
+any_gt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_gt_double:\n\tvfchdbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_lt_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_lt_double:\n\tvfchdbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ge_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ge_double:\n\tvfchedbs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_le_double (vector double a, vector double b)
+{
+  if (__builtin_expect (vec_any_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_le_double:\n\tvfchedbs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_eq_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ne_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_gt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_lt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjne 1 } } */
+
+int __attribute__((noinline,noclone))
+all_ge_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+all_le_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_all_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times all_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_eq_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_eq (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_eq_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ne_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_ne (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ne_int:\n\tvceqfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+
+int __attribute__((noinline,noclone))
+any_gt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_gt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_gt_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_lt_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_lt (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_lt_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tjnle 1 } } */
+
+int __attribute__((noinline,noclone))
+any_ge_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_ge (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_ge_int:\n\tvchfs\t%v\[0-9\]*,%v26,%v24\n\tje 1 } } */
+
+int __attribute__((noinline,noclone))
+any_le_int (vector int a, vector int b)
+{
+  if (__builtin_expect (vec_any_le (a, b), 1))
+    foo ();
+}
+/* { dg-final { scan-assembler-times any_le_int:\n\tvchfs\t%v\[0-9\]*,%v24,%v26\n\tje 1 } } */
+
-- 
cgit v1.1


From 9ad49cdb5340bcaab0ecc0574ed754af9ba71283 Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:24:27 +0000
Subject: S/390: Add vector pack/unpack patterns.

gcc/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/vector.md (vec_halfhalf): New mode iterator.
	("vec_pack_trunc_<mode>", "vec_pack_ssat_<mode>")
	("vec_pack_usat_<mode>", "vec_unpacks_hi_v16qi")
	("vec_unpacks_low_v16qi", "vec_unpacku_hi_v16qi")
	("vec_unpacku_low_v16qi", "vec_unpacks_hi_v8hi")
	("vec_unpacks_lo_v8hi", "vec_unpacku_hi_v8hi")
	("vec_unpacku_lo_v8hi", "vec_unpacks_hi_v4si")
	("vec_unpacks_lo_v4si", "vec_unpacku_hi_v4si")
	("vec_unpacku_lo_v4si"): New pattern definitions.
	* config/s390/vx-builtins.md: Move VI_HW_HSD mode iterator to
	vector.md.

From-SVN: r243157
---
 gcc/ChangeLog                  |  14 +++
 gcc/config/s390/vector.md      | 198 +++++++++++++++++++++++++++++++++++++++--
 gcc/config/s390/vx-builtins.md |   1 -
 3 files changed, 203 insertions(+), 10 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d06661e..d2ecd0f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,19 @@
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
+	* config/s390/vector.md (vec_halfhalf): New mode iterator.
+	("vec_pack_trunc_<mode>", "vec_pack_ssat_<mode>")
+	("vec_pack_usat_<mode>", "vec_unpacks_hi_v16qi")
+	("vec_unpacks_low_v16qi", "vec_unpacku_hi_v16qi")
+	("vec_unpacku_low_v16qi", "vec_unpacks_hi_v8hi")
+	("vec_unpacks_lo_v8hi", "vec_unpacku_hi_v8hi")
+	("vec_unpacku_lo_v8hi", "vec_unpacks_hi_v4si")
+	("vec_unpacks_lo_v4si", "vec_unpacku_hi_v4si")
+	("vec_unpacku_lo_v4si"): New pattern definitions.
+	* config/s390/vx-builtins.md: Move VI_HW_HSD mode iterator to
+	vector.md.
+
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
 	* config/s390/s390-protos.h (s390_reverse_condition): New
 	prototype.
 	* config/s390/s390.c (s390_canonicalize_comparison): Fold compares
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index bc4f8da..d446d5f 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -38,7 +38,8 @@
 (define_mode_iterator VIT_HW    [V16QI V8HI V4SI V2DI V1TI TI])
 (define_mode_iterator VI_HW     [V16QI V8HI V4SI V2DI])
 (define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
-(define_mode_iterator VI_HW_HS  [V8HI V4SI])
+(define_mode_iterator VI_HW_HSD [V8HI  V4SI V2DI])
+(define_mode_iterator VI_HW_HS  [V8HI  V4SI])
 (define_mode_iterator VI_HW_QH  [V16QI V8HI])
 
 ; All integer vector modes supported in a vector register + TImode
@@ -114,6 +115,13 @@
 			    (V1DF "V2SF") (V2DF "V4SF")
 			    (V1TF "V1DF")])
 
+; Vector with half the element size AND half the number of elements.
+(define_mode_attr vec_halfhalf
+  [(V2HI "V2QI") (V4HI "V4QI") (V8HI "V8QI")
+   (V2SI "V2HI") (V4SI "V4HI")
+   (V2DI "V2SI")
+   (V2DF "V2SF")])
+
 ; The comparisons not setting CC iterate over the rtx code.
 (define_code_iterator VFCMP_HW_OP [eq gt ge])
 (define_code_attr asm_fcmp_op [(eq "e") (gt "h") (ge "he")])
@@ -1223,6 +1231,185 @@
   "vsel\t%v0,%2,%1,%3"
   [(set_attr "op_type" "VRR")])
 
+; vec_pack_trunc
+
+; vpkh, vpkf, vpkg
+(define_insn "vec_pack_trunc_<mode>"
+  [(set (match_operand:<vec_half> 0 "register_operand" "=v")
+	(vec_concat:<vec_half>
+	 (truncate:<vec_halfhalf>
+	  (match_operand:VI_HW_HSD 1 "register_operand" "v"))
+	 (truncate:<vec_halfhalf>
+	  (match_operand:VI_HW_HSD 2 "register_operand" "v"))))]
+  "TARGET_VX"
+  "vpk<bhfgq>\t%0,%1,%2"
+  [(set_attr "op_type" "VRR")])
+
+; vpksh, vpksf, vpksg
+(define_insn "vec_pack_ssat_<mode>"
+  [(set (match_operand:<vec_half> 0 "register_operand" "=v")
+	(vec_concat:<vec_half>
+	 (ss_truncate:<vec_halfhalf>
+	  (match_operand:VI_HW_HSD 1 "register_operand" "v"))
+	 (ss_truncate:<vec_halfhalf>
+	  (match_operand:VI_HW_HSD 2 "register_operand" "v"))))]
+  "TARGET_VX"
+  "vpks<bhfgq>\t%0,%1,%2"
+  [(set_attr "op_type" "VRR")])
+
+; vpklsh, vpklsf, vpklsg
+(define_insn "vec_pack_usat_<mode>"
+  [(set (match_operand:<vec_half> 0 "register_operand" "=v")
+	(vec_concat:<vec_half>
+	 (us_truncate:<vec_halfhalf>
+	  (match_operand:VI_HW_HSD 1 "register_operand" "v"))
+	 (us_truncate:<vec_halfhalf>
+	  (match_operand:VI_HW_HSD 2 "register_operand" "v"))))]
+  "TARGET_VX"
+  "vpkls<bhfgq>\t%0,%1,%2"
+  [(set_attr "op_type" "VRR")])
+
+;; vector unpack v16qi
+
+; signed
+
+(define_insn "vec_unpacks_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(sign_extend:V8HI
+	 (vec_select:V8QI
+	  (match_operand:V16QI 1 "register_operand" "v")
+	  (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+		     (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+  "TARGET_VX"
+  "vuphb\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "vec_unpacks_low_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(sign_extend:V8HI
+	 (vec_select:V8QI
+	  (match_operand:V16QI 1 "register_operand" "v")
+	  (parallel [(const_int 8) (const_int 9) (const_int 10)(const_int 11)
+		     (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+  "TARGET_VX"
+  "vuplb\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+; unsigned
+
+(define_insn "vec_unpacku_hi_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(zero_extend:V8HI
+	 (vec_select:V8QI
+	  (match_operand:V16QI 1 "register_operand" "v")
+	  (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)
+		     (const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+  "TARGET_VX"
+  "vuplhb\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "vec_unpacku_low_v16qi"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+	(zero_extend:V8HI
+	 (vec_select:V8QI
+	  (match_operand:V16QI 1 "register_operand" "v")
+	  (parallel [(const_int 8) (const_int 9) (const_int 10)(const_int 11)
+		     (const_int 12)(const_int 13)(const_int 14)(const_int 15)]))))]
+  "TARGET_VX"
+  "vupllb\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+;; vector unpack v8hi
+
+; signed
+
+(define_insn "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(sign_extend:V4SI
+	 (vec_select:V4HI
+	  (match_operand:V8HI 1 "register_operand" "v")
+	  (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  "TARGET_VX"
+  "vuphh\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "vec_unpacks_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(sign_extend:V4SI
+	 (vec_select:V4HI
+	  (match_operand:V8HI 1 "register_operand" "v")
+	  (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+  "TARGET_VX"
+  "vuplhw\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+; unsigned
+
+(define_insn "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(zero_extend:V4SI
+	 (vec_select:V4HI
+	  (match_operand:V8HI 1 "register_operand" "v")
+	  (parallel [(const_int 0)(const_int 1)(const_int 2)(const_int 3)]))))]
+  "TARGET_VX"
+  "vuplhh\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(zero_extend:V4SI
+	 (vec_select:V4HI
+	  (match_operand:V8HI 1 "register_operand" "v")
+	  (parallel [(const_int 4)(const_int 5)(const_int 6)(const_int 7)]))))]
+  "TARGET_VX"
+  "vupllh\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+;; vector unpack v4si
+
+; signed
+
+(define_insn "vec_unpacks_hi_v4si"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(sign_extend:V2DI
+	 (vec_select:V2SI
+	  (match_operand:V4SI 1 "register_operand" "v")
+	  (parallel [(const_int 0)(const_int 1)]))))]
+  "TARGET_VX"
+  "vuphf\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "vec_unpacks_lo_v4si"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(sign_extend:V2DI
+	 (vec_select:V2SI
+	  (match_operand:V4SI 1 "register_operand" "v")
+	  (parallel [(const_int 2)(const_int 3)]))))]
+  "TARGET_VX"
+  "vuplf\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+; unsigned
+
+(define_insn "vec_unpacku_hi_v4si"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(zero_extend:V2DI
+	 (vec_select:V2SI
+	  (match_operand:V4SI 1 "register_operand" "v")
+	  (parallel [(const_int 0)(const_int 1)]))))]
+  "TARGET_VX"
+  "vuplhf\t%0,%1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "vec_unpacku_lo_v4si"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(zero_extend:V2DI
+	 (vec_select:V2SI
+	  (match_operand:V4SI 1 "register_operand" "v")
+	  (parallel [(const_int 2)(const_int 3)]))))]
+  "TARGET_VX"
+  "vupllf\t%0,%1"
+  [(set_attr "op_type" "VRR")])
 
 
 ; reduc_smin
@@ -1233,15 +1420,8 @@
 ; vec_shl vrep + vsl
 ; vec_shr
 
-; vec_pack_trunc
-; vec_pack_ssat
-; vec_pack_usat
-; vec_pack_sfix_trunc
+; vec_pack_sfix_trunc: convert + pack ?
 ; vec_pack_ufix_trunc
-; vec_unpacks_hi
-; vec_unpacks_low
-; vec_unpacku_hi
-; vec_unpacku_low
 ; vec_unpacks_float_hi
 ; vec_unpacks_float_lo
 ; vec_unpacku_float_hi
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 51d022c..b3818ee 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -24,7 +24,6 @@
 (define_mode_iterator V_HW_32_64 [V4SI V2DI V2DF])
 (define_mode_iterator VI_HW_SD [V4SI V2DI])
 (define_mode_iterator V_HW_HSD [V8HI V4SI V2DI V2DF])
-(define_mode_iterator VI_HW_HSD [V8HI V4SI V2DI])
 
 ; The element type of the vector with floating point modes translated
 ; to int modes of the same size.
-- 
cgit v1.1


From 7f5fc63362a675a216f1475bb08fd42d8274f8cf Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:25:27 +0000
Subject: S/390: Define vectorization_cost hook

Define the vectorization_cost hook.  The only change right now
compared to the default implementation is the reduced costs for
unaligned loads/stores.  This is supposed to prevent unnecessary loop
peeling performed to reach better alignments.

Further tuning of this hook is required.

-Andreas-

gcc/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc/config/s390/s390.c (s390_builtin_vectorization_cost): New
	function.
	(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Define target
	macro.

gcc/testsuite/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* gcc.target/s390/vector/vec-nopeel-1.c: New test.

From-SVN: r243158
---
 gcc/ChangeLog                                      |  7 ++++
 gcc/config/s390/s390.c                             | 37 ++++++++++++++++++++++
 gcc/testsuite/ChangeLog                            |  4 +++
 .../gcc.target/s390/vector/vec-nopeel-1.c          | 17 ++++++++++
 4 files changed, 65 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-nopeel-1.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d2ecd0f..e5f8345 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
+	* gcc/config/s390/s390.c (s390_builtin_vectorization_cost): New
+	function.
+	(TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Define target
+	macro.
+
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
 	* config/s390/vector.md (vec_halfhalf): New mode iterator.
 	("vec_pack_trunc_<mode>", "vec_pack_ssat_<mode>")
 	("vec_pack_usat_<mode>", "vec_unpacks_hi_v16qi")
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index dab4f43..767666e 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3674,6 +3674,40 @@ s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
   return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
 }
 
+/* Implement targetm.vectorize.builtin_vectorization_cost.  */
+static int
+s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
+				 tree vectype,
+				 int misalign ATTRIBUTE_UNUSED)
+{
+  switch (type_of_cost)
+    {
+      case scalar_stmt:
+      case scalar_load:
+      case scalar_store:
+      case vector_stmt:
+      case vector_load:
+      case vector_store:
+      case vec_to_scalar:
+      case scalar_to_vec:
+      case cond_branch_not_taken:
+      case vec_perm:
+      case vec_promote_demote:
+      case unaligned_load:
+      case unaligned_store:
+	return 1;
+
+      case cond_branch_taken:
+	return 3;
+
+      case vec_construct:
+	return TYPE_VECTOR_SUBPARTS (vectype) - 1;
+
+      default:
+	gcc_unreachable ();
+    }
+}
+
 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
    otherwise return 0.  */
 
@@ -15428,6 +15462,9 @@ s390_excess_precision (enum excess_precision_type type)
 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
+  s390_builtin_vectorization_cost
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index dc269ef..e39ab1c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
+	* gcc.target/s390/vector/vec-nopeel-1.c: New test.
+
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
 	* gcc.target/s390/zvector/vec-cmp-2.c: New test.
 
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-1.c b/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-1.c
new file mode 100644
index 0000000..581c371
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-nopeel-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+/* { dg-require-effective-target vector } */
+
+int
+foo (int * restrict a, int n)
+{
+  int i, result = 0;
+
+  for (i = 0; i < n * 4; i++)
+    result += a[i];
+  return result;
+}
+
+/* We do NOT want this loop to get peeled.  Without peeling no scalar
+   memory add should appear.  */
+/* { dg-final { scan-assembler-not "\ta\t" } } */
-- 
cgit v1.1


From 8f61415f1f776d35f0d616bd662019a659a6e536 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:26:19 +0000
Subject: PR target/77822: Add helper macro EXTRACT_ARGS_IN_RANGE to system.h.

The macro can be used to validate the arguments of zero_extract and
sign_extract to fix this problem:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77822

gcc/ChangeLog:

2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	PR target/77822
	* rtl.h (EXTRACT_ARGS_IN_RANGE): New.

From-SVN: r243159
---
 gcc/ChangeLog |  5 +++++
 gcc/rtl.h     | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e5f8345..8c71c21 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
+	PR target/77822
+	* rtl.h (EXTRACT_ARGS_IN_RANGE): New.
+
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
 	* gcc/config/s390/s390.c (s390_builtin_vectorization_cost): New
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 5fde698..a5efa28 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -2694,6 +2694,16 @@ get_full_set_src_cost (rtx x, machine_mode mode, struct full_rtx_costs *c)
 }
 #endif
 
+/* A convenience macro to validate the arguments of a zero_extract
+   expression.  It determines whether SIZE lies inclusively within
+   [1, RANGE], POS lies inclusively within between [0, RANGE - 1]
+   and the sum lies inclusively within [1, RANGE].  RANGE must be
+   >= 1, but SIZE and POS may be negative.  */
+#define EXTRACT_ARGS_IN_RANGE(SIZE, POS, RANGE) \
+  (IN_RANGE ((POS), 0, (unsigned HOST_WIDE_INT) (RANGE) - 1) \
+   && IN_RANGE ((SIZE), 1, (unsigned HOST_WIDE_INT) (RANGE) \
+			   - (unsigned HOST_WIDE_INT)(POS)))
+
 /* In explow.c */
 extern HOST_WIDE_INT trunc_int_for_mode	(HOST_WIDE_INT, machine_mode);
 extern rtx plus_constant (machine_mode, rtx, HOST_WIDE_INT, bool = false);
-- 
cgit v1.1


From 0f6f72e80525f14e91d4d1ee6d3bd91fd7c96859 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:30:16 +0000
Subject: PR target/77822: S390: Validate argument range of
 {zero,sign}_extract.

With some undefined code, combine generates patterns where the arguments to
*_extract are out of range, e.b. a negative bit position.  If the s390 backend
accepts these, they lead to not just undefined behaviour but invalid assembly
instructions (argument out of the allowed range).  So this patch makes sure
that the rtl expressions with out of range arguments are rejected.

gcc/ChangeLog:

2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	PR target/77822
	* config/s390/s390.md ("extzv")
	("*extzv<mode><clobbercc_or_nocc>")
	("*extzvdi<clobbercc_or_nocc>_lshiftrt")
	("*<risbg_n>_ior_and_sr_ze")
	("*extract1bitdi<clobbercc_or_nocc>")
	("*insv<mode><clobbercc_or_nocc>", "*insv_rnsbg_noshift")
	("*insv_rnsbg_srl", "*insv<mode>_mem_reg")
	("*insvdi_mem_reghigh", "*insvdi_reg_imm"): Use EXTRACT_ARGS_IN_RANGE
	to validate the arguments of zero_extract and sign_extract.

gcc/testsuite/ChangeLog:

2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	PR target/77822
	* gcc.target/s390/s390.exp: Support .C tests.
	* gcc.target/s390/pr77822-2.c: New test.
	* gcc.target/s390/pr77822-1.C: New test.

From-SVN: r243160
---
 gcc/ChangeLog                             |  13 ++
 gcc/config/s390/s390.md                   |  20 +-
 gcc/testsuite/ChangeLog                   |   7 +
 gcc/testsuite/gcc.target/s390/pr77822-1.C |  21 ++
 gcc/testsuite/gcc.target/s390/pr77822-2.c | 307 ++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/s390/s390.exp    |   8 +-
 6 files changed, 369 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/pr77822-1.C
 create mode 100644 gcc/testsuite/gcc.target/s390/pr77822-2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8c71c21..e357932 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,19 @@
 2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 
 	PR target/77822
+	* config/s390/s390.md ("extzv")
+	("*extzv<mode><clobbercc_or_nocc>")
+	("*extzvdi<clobbercc_or_nocc>_lshiftrt")
+	("*<risbg_n>_ior_and_sr_ze")
+	("*extract1bitdi<clobbercc_or_nocc>")
+	("*insv<mode><clobbercc_or_nocc>", "*insv_rnsbg_noshift")
+	("*insv_rnsbg_srl", "*insv<mode>_mem_reg")
+	("*insvdi_mem_reghigh", "*insvdi_reg_imm"): Use EXTRACT_ARGS_IN_RANGE
+	to validate the arguments of zero_extract and sign_extract.
+
+2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
+	PR target/77822
 	* rtl.h (EXTRACT_ARGS_IN_RANGE): New.
 
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 5844e28..4f2effd 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -3741,6 +3741,8 @@
      (clobber (reg:CC CC_REGNUM))])]
   "TARGET_Z10"
 {
+  if (! EXTRACT_ARGS_IN_RANGE (INTVAL (operands[2]), INTVAL (operands[3]), 64))
+    FAIL;
   /* Starting with zEC12 there is risbgn not clobbering CC.  */
   if (TARGET_ZEC12)
     {
@@ -3760,7 +3762,9 @@
         (match_operand 2 "const_int_operand" "")   ; size
         (match_operand 3 "const_int_operand" ""))) ; start
   ]
-  "<z10_or_zEC12_cond>"
+  "<z10_or_zEC12_cond>
+   && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[2]), INTVAL (operands[3]),
+			     GET_MODE_BITSIZE (<MODE>mode))"
   "<risbg_n>\t%0,%1,64-%2,128+63,<bitoff_plus>%3+%2" ; dst, src, start, end, shift
   [(set_attr "op_type" "RIE")
    (set_attr "z10prop" "z10_super_E1")])
@@ -3773,6 +3777,7 @@
 	(lshiftrt:DI (match_operand:DI 3 "register_operand" "d")
 		     (match_operand:DI 4 "nonzero_shift_count_operand" "")))]
   "<z10_or_zEC12_cond>
+   && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[1]), INTVAL (operands[2]), 64)
    && 64 - UINTVAL (operands[4]) >= UINTVAL (operands[1])"
   "<risbg_n>\t%0,%3,%2,%2+%1-1,128-%2-%1-%4"
   [(set_attr "op_type" "RIE")
@@ -3791,6 +3796,7 @@
 		  (match_operand 5 "const_int_operand" "")) ; start
 		 4)))]
   "<z10_or_zEC12_cond>
+   && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[4]), INTVAL (operands[5]), 64)
    && UINTVAL (operands[2]) == (~(0ULL) << UINTVAL (operands[4]))"
   "<risbg_n>\t%0,%3,64-%4,63,%4+%5"
   [(set_attr "op_type" "RIE")
@@ -3804,7 +3810,8 @@
 		(const_int 1)  ; size
 		(match_operand 2 "const_int_operand" "")) ; start
 	       (const_int 0)))]
-  "<z10_or_zEC12_cond>"
+  "<z10_or_zEC12_cond>
+   && EXTRACT_ARGS_IN_RANGE (1, INTVAL (operands[2]), 64)"
   "<risbg_n>\t%0,%1,64-1,128+63,%2+1" ; dst, src, start, end, shift
   [(set_attr "op_type" "RIE")
    (set_attr "z10prop" "z10_super_E1")])
@@ -3919,6 +3926,8 @@
 			  (match_operand 2 "const_int_operand"    "I")) ; pos
 	(match_operand:GPR 3 "nonimmediate_operand" "d"))]
   "<z10_or_zEC12_cond>
+   && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[1]), INTVAL (operands[2]),
+			     GET_MODE_BITSIZE (<MODE>mode))
    && (INTVAL (operands[1]) + INTVAL (operands[2])) <= <bitsize>"
   "<risbg_n>\t%0,%3,<bitoff_plus>%2,<bitoff_plus>%2+%1-1,<bitsize>-%2-%1"
   [(set_attr "op_type" "RIE")
@@ -4214,6 +4223,7 @@
 	  (match_operand:DI 3 "nonimmediate_operand" "d")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_Z10
+   && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[1]), INTVAL (operands[2]), 64)
    && INTVAL (operands[1]) + INTVAL (operands[2]) == 64"
   "rnsbg\t%0,%3,%2,63,0"
   [(set_attr "op_type" "RIE")])
@@ -4230,6 +4240,7 @@
 	  (match_operand:DI 4 "nonimmediate_operand" "d")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_Z10
+   && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[1]), INTVAL (operands[2]), 64)
    && INTVAL (operands[3]) == 64 - INTVAL (operands[1]) - INTVAL (operands[2])"
   "rnsbg\t%0,%4,%2,%2+%1-1,%3"
   [(set_attr "op_type" "RIE")])
@@ -4239,7 +4250,8 @@
 			(match_operand 1 "const_int_operand" "n,n")
 			(const_int 0))
 	(match_operand:W 2 "register_operand" "d,d"))]
-  "INTVAL (operands[1]) > 0
+  "EXTRACT_ARGS_IN_RANGE (INTVAL (operands[1]), 0, 64)
+   && INTVAL (operands[1]) > 0
    && INTVAL (operands[1]) <= GET_MODE_BITSIZE (SImode)
    && INTVAL (operands[1]) % BITS_PER_UNIT == 0"
 {
@@ -4260,6 +4272,7 @@
 	(lshiftrt:DI (match_operand:DI 2 "register_operand" "d")
 		     (const_int 32)))]
   "TARGET_ZARCH
+   && EXTRACT_ARGS_IN_RANGE (INTVAL (operands[1]), 0, 64)
    && INTVAL (operands[1]) > 0
    && INTVAL (operands[1]) <= GET_MODE_BITSIZE (SImode)
    && INTVAL (operands[1]) % BITS_PER_UNIT == 0"
@@ -4278,6 +4291,7 @@
 			 (match_operand 1 "const_int_operand" "n"))
 	(match_operand:DI 2 "const_int_operand" "n"))]
   "TARGET_ZARCH
+   && EXTRACT_ARGS_IN_RANGE (16, INTVAL (operands[1]), 64)
    && INTVAL (operands[1]) >= 0
    && INTVAL (operands[1]) < BITS_PER_WORD
    && INTVAL (operands[1]) % 16 == 0"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e39ab1c..447aaf6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
+	PR target/77822
+	* gcc.target/s390/s390.exp: Support .C tests.
+	* gcc.target/s390/pr77822-2.c: New test.
+	* gcc.target/s390/pr77822-1.C: New test.
+
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
 	* gcc.target/s390/vector/vec-nopeel-1.c: New test.
diff --git a/gcc/testsuite/gcc.target/s390/pr77822-1.C b/gcc/testsuite/gcc.target/s390/pr77822-1.C
new file mode 100644
index 0000000..bd5a9b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr77822-1.C
@@ -0,0 +1,21 @@
+/* Regression test for PR/77822.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=zEC12" } */
+
+class A {
+  void m_fn1();
+  char m_datawidth;
+  char m_subunits;
+  int m_subunit_infos[];
+};
+int a;
+long b;
+void A::m_fn1() {
+  int c = 32, d = m_datawidth / c;
+  for (int e = 0; e < d; e++) {
+    int f = e * 32;
+    if (b >> f & 1)
+      m_subunit_infos[m_subunits] = a;
+  }
+}
diff --git a/gcc/testsuite/gcc.target/s390/pr77822-2.c b/gcc/testsuite/gcc.target/s390/pr77822-2.c
new file mode 100644
index 0000000..6789152
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr77822-2.c
@@ -0,0 +1,307 @@
+/* This testcase checks that the shift operand of r*sbg instructions is in
+   range.  */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -march=zEC12 -Wno-shift-count-overflow" } */
+
+int g;
+
+void pos_ll_129 (long long b)
+{
+  if (b >> 129 & 1)
+    g = b;
+}
+
+void sizepos_ll_134 (long long b)
+{
+  if (b >> 134 & 1)
+    g = b;
+}
+
+void pos_ll_65 (long long b)
+{
+  if (b >> 65 & 1)
+    g = b;
+}
+
+void sizepos_ll_70 (long long b)
+{
+  if (b >> 70 & 1)
+    g = b;
+}
+
+void pos_ll_33 (long long b)
+{
+  if (b >> 33 & 1)
+    g = b;
+}
+
+void sizepos_ll_38 (long long b)
+{
+  if (b >> 38 & 1)
+    g = b;
+}
+
+void pos_ll_17 (long long b)
+{
+  if (b >> 17 & 1)
+    g = b;
+}
+
+void sizepos_ll_22 (long long b)
+{
+  if (b >> 22 & 1)
+    g = b;
+}
+
+void pos_ll_8 (long long b)
+{
+  if (b >> 8 & 1)
+    g = b;
+}
+
+void sizepos_ll_13 (long long b)
+{
+  if (b >> 13 & 1)
+    g = b;
+}
+
+void pos_l_129 (long b)
+{
+  if (b >> 129 & 1)
+    g = b;
+}
+
+void sizepos_l_134 (long b)
+{
+  if (b >> 134 & 1)
+    g = b;
+}
+
+void pos_l_65 (long b)
+{
+  if (b >> 65 & 1)
+    g = b;
+}
+
+void sizepos_l_70 (long b)
+{
+  if (b >> 70 & 1)
+    g = b;
+}
+
+void pos_l_33 (long b)
+{
+  if (b >> 33 & 1)
+    g = b;
+}
+
+void sizepos_l_38 (long b)
+{
+  if (b >> 38 & 1)
+    g = b;
+}
+
+void pos_l_17 (long b)
+{
+  if (b >> 17 & 1)
+    g = b;
+}
+
+void sizepos_l_22 (long b)
+{
+  if (b >> 22 & 1)
+    g = b;
+}
+
+void pos_l_8 (long b)
+{
+  if (b >> 8 & 1)
+    g = b;
+}
+
+void sizepos_l_13 (long b)
+{
+  if (b >> 13 & 1)
+    g = b;
+}
+
+void pos_i_129 (int b)
+{
+  if (b >> 129 & 1)
+    g = b;
+}
+
+void sizepos_i_134 (int b)
+{
+  if (b >> 134 & 1)
+    g = b;
+}
+
+void pos_i_65 (int b)
+{
+  if (b >> 65 & 1)
+    g = b;
+}
+
+void sizepos_i_70 (int b)
+{
+  if (b >> 70 & 1)
+    g = b;
+}
+
+void pos_i_33 (int b)
+{
+  if (b >> 33 & 1)
+    g = b;
+}
+
+void sizepos_i_38 (int b)
+{
+  if (b >> 38 & 1)
+    g = b;
+}
+
+void pos_i_17 (int b)
+{
+  if (b >> 17 & 1)
+    g = b;
+}
+
+void sizepos_i_22 (int b)
+{
+  if (b >> 22 & 1)
+    g = b;
+}
+
+void pos_i_8 (int b)
+{
+  if (b >> 8 & 1)
+    g = b;
+}
+
+void sizepos_i_13 (int b)
+{
+  if (b >> 13 & 1)
+    g = b;
+}
+
+void pos_s_129 (short b)
+{
+  if (b >> 129 & 1)
+    g = b;
+}
+
+void sizepos_s_134 (short b)
+{
+  if (b >> 134 & 1)
+    g = b;
+}
+
+void pos_s_65 (short b)
+{
+  if (b >> 65 & 1)
+    g = b;
+}
+
+void sizepos_s_70 (short b)
+{
+  if (b >> 70 & 1)
+    g = b;
+}
+
+void pos_s_33 (short b)
+{
+  if (b >> 33 & 1)
+    g = b;
+}
+
+void sizepos_s_38 (short b)
+{
+  if (b >> 38 & 1)
+    g = b;
+}
+
+void pos_s_17 (short b)
+{
+  if (b >> 17 & 1)
+    g = b;
+}
+
+void sizepos_s_22 (short b)
+{
+  if (b >> 22 & 1)
+    g = b;
+}
+
+void pos_s_8 (short b)
+{
+  if (b >> 8 & 1)
+    g = b;
+}
+
+void sizepos_s_13 (short b)
+{
+  if (b >> 13 & 1)
+    g = b;
+}
+
+void pos_c_129 (signed char b)
+{
+  if (b >> 129 & 1)
+    g = b;
+}
+
+void sizepos_c_134 (signed char b)
+{
+  if (b >> 134 & 1)
+    g = b;
+}
+
+void pos_c_65 (signed char b)
+{
+  if (b >> 65 & 1)
+    g = b;
+}
+
+void sizepos_c_70 (signed char b)
+{
+  if (b >> 70 & 1)
+    g = b;
+}
+
+void pos_c_33 (signed char b)
+{
+  if (b >> 33 & 1)
+    g = b;
+}
+
+void sizepos_c_38 (signed char b)
+{
+  if (b >> 38 & 1)
+    g = b;
+}
+
+void pos_c_17 (signed char b)
+{
+  if (b >> 17 & 1)
+    g = b;
+}
+
+void sizepos_c_22 (signed char b)
+{
+  if (b >> 22 & 1)
+    g = b;
+}
+
+void pos_c_8 (signed char b)
+{
+  if (b >> 8 & 1)
+    g = b;
+}
+
+void sizepos_c_13 (signed char b)
+{
+  if (b >> 13 & 1)
+    g = b;
+}
diff --git a/gcc/testsuite/gcc.target/s390/s390.exp b/gcc/testsuite/gcc.target/s390/s390.exp
index f4ad7a1..450dcaf 100644
--- a/gcc/testsuite/gcc.target/s390/s390.exp
+++ b/gcc/testsuite/gcc.target/s390/s390.exp
@@ -90,16 +90,16 @@ dg-init
 set md_tests $srcdir/$subdir/md/*.c
 
 # Main loop.
-dg-runtest [lsort [prune [glob -nocomplain $srcdir/$subdir/*.\[cS\]] \
+dg-runtest [lsort [prune [glob -nocomplain $srcdir/$subdir/*.{c,S,C}] \
 			 $md_tests]] "" $DEFAULT_CFLAGS
 
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.\[cS\]]] \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*vector*/*.]] \
 	"" $DEFAULT_CFLAGS
 
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/target-attribute/*.\[cS\]]] \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/target-attribute/*.{c,S,C}]] \
 	"" $DEFAULT_CFLAGS
 
-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/md/*.\[cS\]]] \
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/md/*.{c,S,C}]] \
 	"" $DEFAULT_CFLAGS
 
 # Additional hotpatch torture tests.
-- 
cgit v1.1


From bba13c0c4359649687894823900e091576cbc2e6 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:31:09 +0000
Subject: S/390: Fix litpool-r3-1.c.

gcc/testsuite/ChangeLog:

2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	* gcc.target/s390/litpool-r3-1.c: Fix label number test.

From-SVN: r243161
---
 gcc/testsuite/ChangeLog                      | 4 ++++
 gcc/testsuite/gcc.target/s390/litpool-r3-1.c | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 447aaf6..14296a3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,9 @@
 2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 
+	* gcc.target/s390/litpool-r3-1.c: Fix label number test.
+
+2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
 	PR target/77822
 	* gcc.target/s390/s390.exp: Support .C tests.
 	* gcc.target/s390/pr77822-2.c: New test.
diff --git a/gcc/testsuite/gcc.target/s390/litpool-r3-1.c b/gcc/testsuite/gcc.target/s390/litpool-r3-1.c
index 8ee50cf..91e4807 100644
--- a/gcc/testsuite/gcc.target/s390/litpool-r3-1.c
+++ b/gcc/testsuite/gcc.target/s390/litpool-r3-1.c
@@ -13,4 +13,4 @@ int foo(void)
 	return c;
 }
 
-/* { dg-final { scan-assembler-times "\tlarl\t%r3,.L3" 1 } } */
+/* { dg-final { scan-assembler-times "\tlarl\t%r3,.L\[0-9\]" 1 } } */
-- 
cgit v1.1


From 32ff7e39c15206dada1406bdc06e18db9e02d248 Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 08:32:40 +0000
Subject: Do not simplify "(and (reg) (const bit)" to if_then_else.

combine_simplify_rtx() tries to replace rtx expressions with just two
possible values with an experession that uses if_then_else:

  (if_then_else (condition) (value1) (value2))

If the original expression is e.g.

  (and (reg) (const_int 2))

where the constant is the mask for a single bit, the replacement results
in a more complex expression than before:

  (if_then_else (ne (zero_extract (reg) (1) (31))) (2) (0))

Similar replacements are done for

  (signextend (and ...))
  (zeroextend (and ...))

Suppress the replacement this special case in if_then_else_cond().

gcc/ChangeLog:

2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	* combine.c (combine_simplify_rtx):  Suppress replacement of
	"(and (reg) (const_int bit))" with "if_then_else".

From-SVN: r243162
---
 gcc/ChangeLog |  5 +++++
 gcc/combine.c | 12 ++++++++++++
 2 files changed, 17 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e357932..a3fcd8b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
 2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 
+	* combine.c (combine_simplify_rtx):  Suppress replacement of
+	"(and (reg) (const_int bit))" with "if_then_else".
+
+2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
 	PR target/77822
 	* config/s390/s390.md ("extzv")
 	("*extzv<mode><clobbercc_or_nocc>")
diff --git a/gcc/combine.c b/gcc/combine.c
index b429453..7ba634a 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -5602,6 +5602,18 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest,
 		     && OBJECT_P (SUBREG_REG (XEXP (x, 0)))))))
     {
       rtx cond, true_rtx, false_rtx;
+      unsigned HOST_WIDE_INT nz;
+
+      /* If the operation is an AND wrapped in a SIGN_EXTEND or ZERO_EXTEND with
+	 either operand being just a constant single bit value, do nothing since
+	 IF_THEN_ELSE is likely to increase the expression's complexity.  */
+      if (HWI_COMPUTABLE_MODE_P (mode)
+	  && pow2p_hwi (nz = nonzero_bits (x, mode))
+	  && ! ((code == SIGN_EXTEND || code == ZERO_EXTEND)
+		&& GET_CODE (XEXP (x, 0)) == AND
+		&& CONST_INT_P (XEXP (XEXP (x, 0), 0))
+		&& UINTVAL (XEXP (XEXP (x, 0), 0)) == nz))
+	      return x;
 
       cond = if_then_else_cond (x, &true_rtx, &false_rtx);
       if (cond != 0
-- 
cgit v1.1


From a4f2895465da4c8856b119a5787b95db345567a9 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Fri, 2 Dec 2016 09:36:01 +0100
Subject: Fix runtime error: left shift of negative value (PR

	PR ipa/78555
	* sreal.c (sreal::to_int): Make absolute value before shifting.
	(sreal::operator/): Likewise.
	(sreal_verify_negative_division): New test.
	(void sreal_c_tests): Call the new test.
	* sreal.h (sreal::normalize_up): Use new SREAL_ABS and
	SREAL_SIGN macros.
	(sreal::normalize_down): Likewise.

From-SVN: r243163
---
 gcc/ChangeLog | 11 +++++++++++
 gcc/sreal.c   | 20 +++++++++++++++++---
 gcc/sreal.h   |  9 +++++----
 3 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a3fcd8b..4453842 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2016-12-02  Martin Liska  <mliska@suse.cz>
+
+	PR ipa/78555
+	* sreal.c (sreal::to_int): Make absolute value before shifting.
+	(sreal::operator/): Likewise.
+	(sreal_verify_negative_division): New test.
+	(void sreal_c_tests): Call the new test.
+	* sreal.h (sreal::normalize_up): Use new SREAL_ABS and
+	SREAL_SIGN macros.
+	(sreal::normalize_down): Likewise.
+
 2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 
 	* combine.c (combine_simplify_rtx):  Suppress replacement of
diff --git a/gcc/sreal.c b/gcc/sreal.c
index 9c43b4e..52e530d 100644
--- a/gcc/sreal.c
+++ b/gcc/sreal.c
@@ -102,14 +102,14 @@ sreal::shift_right (int s)
 int64_t
 sreal::to_int () const
 {
-  int64_t sign = m_sig < 0 ? -1 : 1;
+  int64_t sign = SREAL_SIGN (m_sig);
 
   if (m_exp <= -SREAL_BITS)
     return 0;
   if (m_exp >= SREAL_PART_BITS)
     return sign * INTTYPE_MAXIMUM (int64_t);
   if (m_exp > 0)
-    return m_sig << m_exp;
+    return sign * (SREAL_ABS (m_sig) << m_exp);
   if (m_exp < 0)
     return m_sig >> -m_exp;
   return m_sig;
@@ -229,7 +229,8 @@ sreal::operator/ (const sreal &other) const
 {
   gcc_checking_assert (other.m_sig != 0);
   sreal r;
-  r.m_sig = (m_sig << SREAL_PART_BITS) / other.m_sig;
+  r.m_sig
+    = SREAL_SIGN (m_sig) * (SREAL_ABS (m_sig) << SREAL_PART_BITS) / other.m_sig;
   r.m_exp = m_exp - other.m_exp - SREAL_PART_BITS;
   r.normalize ();
   return r;
@@ -334,6 +335,18 @@ sreal_verify_shifting (void)
     verify_shifting (values[i]);
 }
 
+/* Verify division by (of) a negative value.  */
+
+static void
+sreal_verify_negative_division (void)
+{
+  ASSERT_EQ (sreal (1) / sreal (1), sreal (1));
+  ASSERT_EQ (sreal (-1) / sreal (-1), sreal (1));
+  ASSERT_EQ (sreal (-1234567) / sreal (-1234567), sreal (1));
+  ASSERT_EQ (sreal (-1234567) / sreal (1234567), sreal (-1));
+  ASSERT_EQ (sreal (1234567) / sreal (-1234567), sreal (-1));
+}
+
 /* Run all of the selftests within this file.  */
 
 void sreal_c_tests ()
@@ -341,6 +354,7 @@ void sreal_c_tests ()
   sreal_verify_basics ();
   sreal_verify_arithmetics ();
   sreal_verify_shifting ();
+  sreal_verify_negative_division ();
 }
 
 } // namespace selftest
diff --git a/gcc/sreal.h b/gcc/sreal.h
index ce9cdbb..21f14b0 100644
--- a/gcc/sreal.h
+++ b/gcc/sreal.h
@@ -31,6 +31,9 @@ along with GCC; see the file COPYING3.  If not see
 
 #define SREAL_BITS SREAL_PART_BITS
 
+#define SREAL_SIGN(v) (v < 0 ? -1: 1)
+#define SREAL_ABS(v) (v < 0 ? -v: v)
+
 /* Structure for holding a simple real number.  */
 class sreal
 {
@@ -193,7 +196,6 @@ inline sreal operator>> (const sreal &a, int exp)
 inline void
 sreal::normalize_up ()
 {
-  int64_t s = m_sig < 0 ? -1 : 1;
   unsigned HOST_WIDE_INT sig = absu_hwi (m_sig);
   int shift = SREAL_PART_BITS - 2 - floor_log2 (sig);
 
@@ -208,7 +210,7 @@ sreal::normalize_up ()
       m_exp = -SREAL_MAX_EXP;
       sig = 0;
     }
-  if (s == -1)
+  if (SREAL_SIGN (m_sig) == -1)
     m_sig = -sig;
   else
     m_sig = sig;
@@ -221,7 +223,6 @@ sreal::normalize_up ()
 inline void
 sreal::normalize_down ()
 {
-  int64_t s = m_sig < 0 ? -1 : 1;
   int last_bit;
   unsigned HOST_WIDE_INT sig = absu_hwi (m_sig);
   int shift = floor_log2 (sig) - SREAL_PART_BITS + 2;
@@ -246,7 +247,7 @@ sreal::normalize_down ()
       m_exp = SREAL_MAX_EXP;
       sig = SREAL_MAX_SIG;
     }
-  if (s == -1)
+  if (SREAL_SIGN (m_sig) == -1)
     m_sig = -sig;
   else
     m_sig = sig;
-- 
cgit v1.1


From a717444986a981b21b42fccfd982dcb6ebe42254 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 2 Dec 2016 09:42:12 +0100
Subject: re PR rtl-optimization/78575 (ICE: in trunc_int_for_mode, at
 explow.c:55 with -O2 -g)

	PR rtl-optimization/78575
	* config/i386/i386.c (timode_scalar_chain::fix_debug_reg_uses): Use
	DF infrastructure to wrap all V1TImode reg uses into TImode subreg
	if not already wrapped in a subreg.  Make sure df_insn_rescan does not
	affect further iterations.

	* gcc.dg/pr78575.c: New test.

From-SVN: r243164
---
 gcc/ChangeLog                  |  8 ++++++++
 gcc/config/i386/i386.c         | 38 ++++++++++++++++++++------------------
 gcc/testsuite/ChangeLog        |  5 +++++
 gcc/testsuite/gcc.dg/pr78575.c | 16 ++++++++++++++++
 4 files changed, 49 insertions(+), 18 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr78575.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4453842..a83f528 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/78575
+	* config/i386/i386.c (timode_scalar_chain::fix_debug_reg_uses): Use
+	DF infrastructure to wrap all V1TImode reg uses into TImode subreg
+	if not already wrapped in a subreg.  Make sure df_insn_rescan does not
+	affect further iterations.
+
 2016-12-02  Martin Liska  <mliska@suse.cz>
 
 	PR ipa/78555
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index a5f5339..5226e454 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3831,30 +3831,32 @@ timode_scalar_chain::fix_debug_reg_uses (rtx reg)
   if (!flag_var_tracking)
     return;
 
-  df_ref ref;
-  for (ref = DF_REG_USE_CHAIN (REGNO (reg));
-       ref;
-       ref = DF_REF_NEXT_REG (ref))
+  df_ref ref, next;
+  for (ref = DF_REG_USE_CHAIN (REGNO (reg)); ref; ref = next)
     {
       rtx_insn *insn = DF_REF_INSN (ref);
+      /* Make sure the next ref is for a different instruction,
+         so that we're not affected by the rescan.  */
+      next = DF_REF_NEXT_REG (ref);
+      while (next && DF_REF_INSN (next) == insn)
+	next = DF_REF_NEXT_REG (next);
+
       if (DEBUG_INSN_P (insn))
 	{
 	  /* It may be a debug insn with a TImode variable in
 	     register.  */
-	  rtx val = PATTERN (insn);
-	  if (GET_MODE (val) != TImode)
-	    continue;
-	  gcc_assert (GET_CODE (val) == VAR_LOCATION);
-	  rtx loc = PAT_VAR_LOCATION_LOC (val);
-	  /* It may have been converted to TImode already.  */
-	  if (GET_MODE (loc) == TImode)
-	    continue;
-	  gcc_assert (REG_P (loc)
-		      && GET_MODE (loc) == V1TImode);
-	  /* Convert V1TImode register, which has been updated by a SET
-	     insn before, to SUBREG TImode.  */
-	  PAT_VAR_LOCATION_LOC (val) = gen_rtx_SUBREG (TImode, loc, 0);
-	  df_insn_rescan (insn);
+	  bool changed = false;
+	  for (; ref != next; ref = DF_REF_NEXT_REG (ref))
+	    {
+	      rtx *loc = DF_REF_LOC (ref);
+	      if (REG_P (*loc) && GET_MODE (*loc) == V1TImode)
+		{
+		  *loc = gen_rtx_SUBREG (TImode, *loc, 0);
+		  changed = true;
+		}
+	    }
+	  if (changed)
+	    df_insn_rescan (insn);
 	}
     }
 }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 14296a3..6ee4b13 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/78575
+	* gcc.dg/pr78575.c: New test.
+
 2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 
 	* gcc.target/s390/litpool-r3-1.c: Fix label number test.
diff --git a/gcc/testsuite/gcc.dg/pr78575.c b/gcc/testsuite/gcc.dg/pr78575.c
new file mode 100644
index 0000000..6b27f10
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr78575.c
@@ -0,0 +1,16 @@
+/* PR rtl-optimization/78575 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O2 -g -Wno-psabi" } */
+
+typedef unsigned __int128 V __attribute__((vector_size(64)));
+
+V g;
+
+void
+foo (V v)
+{
+  unsigned __int128 x = 1;
+  int c = v[1] <= ~x;
+  v &= v[1];
+  g = v;
+}
-- 
cgit v1.1


From 60ebe8ce1d029cbff8ef80c967f98ba43d746f3b Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 2 Dec 2016 09:44:42 +0100
Subject: re PR rtl-optimization/78547 (ICE: in loc_cmp, at var-tracking.c:3417
 with -Os -g -mstringop-strategy=libcall -freorder-blocks-algorithm=simple)

	PR rtl-optimization/78547
	* emit-rtl.c (unshare_all_rtl): Make sure DECL_RTL and
	DECL_INCOMING_RTL is not shared.
	* config/i386/i386.c (convert_scalars_to_vectors): If any
	insns have been converted, adjust all parameter's DEC_RTL and
	DECL_INCOMING_RTL back from V1TImode to TImode if the parameters have
	TImode.

	* gcc.dg/pr78547.c: New test.

From-SVN: r243165
---
 gcc/ChangeLog                  |  8 ++++++++
 gcc/config/i386/i386.c         | 22 ++++++++++++++++++++++
 gcc/emit-rtl.c                 |  8 ++++++++
 gcc/testsuite/ChangeLog        |  3 +++
 gcc/testsuite/gcc.dg/pr78547.c | 18 ++++++++++++++++++
 5 files changed, 59 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr78547.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a83f528..260a66d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
+	PR rtl-optimization/78547
+	* emit-rtl.c (unshare_all_rtl): Make sure DECL_RTL and
+	DECL_INCOMING_RTL is not shared.
+	* config/i386/i386.c (convert_scalars_to_vectors): If any
+	insns have been converted, adjust all parameter's DEC_RTL and
+	DECL_INCOMING_RTL back from V1TImode to TImode if the parameters have
+	TImode.
+
 	PR rtl-optimization/78575
 	* config/i386/i386.c (timode_scalar_chain::fix_debug_reg_uses): Use
 	DF infrastructure to wrap all V1TImode reg uses into TImode subreg
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5226e454..5678fa2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -4075,6 +4075,28 @@ convert_scalars_to_vector ()
 	crtl->stack_alignment_needed = 128;
       if (crtl->stack_alignment_estimated < 128)
 	crtl->stack_alignment_estimated = 128;
+      /* Fix up DECL_RTL/DECL_INCOMING_RTL of arguments.  */
+      if (TARGET_64BIT)
+	for (tree parm = DECL_ARGUMENTS (current_function_decl);
+	     parm; parm = DECL_CHAIN (parm))
+	  {
+	    if (TYPE_MODE (TREE_TYPE (parm)) != TImode)
+	      continue;
+	    if (DECL_RTL_SET_P (parm)
+		&& GET_MODE (DECL_RTL (parm)) == V1TImode)
+	      {
+		rtx r = DECL_RTL (parm);
+		if (REG_P (r))
+		  SET_DECL_RTL (parm, gen_rtx_SUBREG (TImode, r, 0));
+	      }
+	    if (DECL_INCOMING_RTL (parm)
+		&& GET_MODE (DECL_INCOMING_RTL (parm)) == V1TImode)
+	      {
+		rtx r = DECL_INCOMING_RTL (parm);
+		if (REG_P (r))
+		  DECL_INCOMING_RTL (parm) = gen_rtx_SUBREG (TImode, r, 0);
+	      }
+	  }
     }
 
   return 0;
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 02512d3..d2ac88b 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -2668,6 +2668,14 @@ unsigned int
 unshare_all_rtl (void)
 {
   unshare_all_rtl_1 (get_insns ());
+
+  for (tree decl = DECL_ARGUMENTS (cfun->decl); decl; decl = DECL_CHAIN (decl))
+    {
+      if (DECL_RTL_SET_P (decl))
+	SET_DECL_RTL (decl, copy_rtx_if_shared (DECL_RTL (decl)));
+      DECL_INCOMING_RTL (decl) = copy_rtx_if_shared (DECL_INCOMING_RTL (decl));
+    }
+
   return 0;
 }
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6ee4b13..490d081 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
+	PR rtl-optimization/78547
+	* gcc.dg/pr78547.c: New test.
+
 	PR rtl-optimization/78575
 	* gcc.dg/pr78575.c: New test.
 
diff --git a/gcc/testsuite/gcc.dg/pr78547.c b/gcc/testsuite/gcc.dg/pr78547.c
new file mode 100644
index 0000000..9300cbc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr78547.c
@@ -0,0 +1,18 @@
+/* PR rtl-optimization/78547 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-Os -g -freorder-blocks-algorithm=simple -Wno-psabi" } */
+/* { dg-additional-options "-mstringop-strategy=libcall" { target i?86-*-* x86_64-*-* } } */
+
+typedef unsigned __int128 u128;
+typedef unsigned __int128 V __attribute__ ((vector_size (64)));
+
+V
+foo (u128 a, u128 b, u128 c, V d)
+{
+  V e = (V) {a};
+  V f = e & 1;
+  e = 0 != e;
+  c = c;
+  f = f << ((V) {c} & 7);
+  return f + e;
+}
-- 
cgit v1.1


From 46f87aa3e06d9db97f21e25af4be91efc3176000 Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <avr@gjlay.de>
Date: Fri, 2 Dec 2016 09:05:56 +0000
Subject: invoke.texi (AVR Options): Point to absdata.

	* doc/invoke.texi (AVR Options) [-mabsdata]: Point to absdata.
	* doc/extend.texi (AVR Variable Attributes) [progmem]: Hint
	about linker description to avoid progmem altogether.
	[absdata]: Point to -mabsdata option.

From-SVN: r243170
---
 gcc/ChangeLog       |  7 +++++++
 gcc/doc/extend.texi | 26 ++++++++++++++++++++++++++
 gcc/doc/invoke.texi |  3 ++-
 3 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 260a66d..d50f546 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-02  Georg-Johann Lay  <avr@gjlay.de>
+
+	* doc/invoke.texi (AVR Options) [-mabsdata]: Point to absdata.
+	* doc/extend.texi (AVR Variable Attributes) [progmem]: Hint
+	about linker description to avoid progmem altogether.
+	[absdata]: Point to -mabsdata option.
+
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR rtl-optimization/78547
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index d873403..c40e289 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5929,6 +5929,30 @@ int read_var (int i)
 @}
 @end smallexample
 
+Please notice that on these devices, there is no need for @code{progmem}
+at all.  Just use an appropriate linker description file like outlined below.
+
+@smallexample
+  .text :
+  @{ ...
+  @} > text
+  /* Leave .rodata in flash and add an offset of 0x4000 to all
+     addresses so that respective objects can be accessed by LD
+     instructions and open coded C/C++.  This means there is no
+     need for progmem in the source and no overhead by read-only
+     data in RAM.  */
+  .rodata ADDR(.text) + SIZEOF (.text) + 0x4000 :
+  @{
+    *(.rodata)
+    *(.rodata*)
+    *(.gnu.linkonce.r*)
+  @} AT> text
+  /* No more need to put .rodata into .data:
+     Removed all .rodata entries from .data.  */
+  .data :
+  @{ ...
+@end smallexample
+
 @end table
 
 @item io
@@ -6001,6 +6025,8 @@ warning like
 
 @end itemize
 
+See also the @option{-mabsdata} @ref{AVR Options,command-line option}.
+
 @end table
 
 @node Blackfin Variable Attributes
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8a0cad7..fd549ec 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -15402,7 +15402,8 @@ GCC supports the following AVR devices and ISAs:
 
 Assume that all data in static storage can be accessed by LDS / STS
 instructions.  This option has only an effect on reduced Tiny devices like
-ATtiny40.
+ATtiny40.  See also the @code{absdata}
+@ref{AVR Variable Attributes,variable attribute}.
 
 @item -maccumulate-args
 @opindex maccumulate-args
-- 
cgit v1.1


From 45d5091d5cfa98d0a6906b9333e0396344040810 Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <avr@gjlay.de>
Date: Fri, 2 Dec 2016 09:12:22 +0000
Subject: avr-arch.h (avr_mcu_t): Remove field.

	* config/avr/avr-arch.h (avr_mcu_t) [n_flash]: Remove field.
	* config/avr/avr-devices.c (AVR_MCU): Remove N_FLASH macro argument.
	* config/avr/avr-mcus.def (AVR_MCU): Remove initializer for n_flash.
	* config/avr/avr.c (avr_set_core_architecture) [avr_n_flash]: Use
	avr_mcu_types.flash_size to compute default value.
	* config/avr/gen-avr-mmcu-specs.c (print_mcu) [cc1_n_flash]: Use
	mcu->flash_size to compute value for spec.

From-SVN: r243171
---
 gcc/ChangeLog                       |  10 +
 gcc/config/avr/avr-arch.h           |   3 -
 gcc/config/avr/avr-devices.c        |   6 +-
 gcc/config/avr/avr-mcus.def         | 551 ++++++++++++++++++------------------
 gcc/config/avr/avr.c                |   2 +-
 gcc/config/avr/gen-avr-mmcu-specs.c |   4 +-
 6 files changed, 291 insertions(+), 285 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d50f546..a0cefa7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,15 @@
 2016-12-02  Georg-Johann Lay  <avr@gjlay.de>
 
+	* config/avr/avr-arch.h (avr_mcu_t) [n_flash]: Remove field.
+	* config/avr/avr-devices.c (AVR_MCU): Remove N_FLASH macro argument.
+	* config/avr/avr-mcus.def (AVR_MCU): Remove initializer for n_flash.
+	* config/avr/avr.c (avr_set_core_architecture) [avr_n_flash]: Use
+	avr_mcu_types.flash_size to compute default value.
+	* config/avr/gen-avr-mmcu-specs.c (print_mcu) [cc1_n_flash]: Use
+	mcu->flash_size to compute value for spec.
+
+2016-12-02  Georg-Johann Lay  <avr@gjlay.de>
+
 	* doc/invoke.texi (AVR Options) [-mabsdata]: Point to absdata.
 	* doc/extend.texi (AVR Variable Attributes) [progmem]: Hint
 	about linker description to avoid progmem altogether.
diff --git a/gcc/config/avr/avr-arch.h b/gcc/config/avr/avr-arch.h
index e6a2d75..98b394f 100644
--- a/gcc/config/avr/avr-arch.h
+++ b/gcc/config/avr/avr-arch.h
@@ -120,9 +120,6 @@ typedef struct
   /* Start of text section. */
   int text_section_start;
 
-  /* Number of 64k segments in the flash.  */
-  int n_flash;
-
   /* Flash size in bytes.  */
   int flash_size;
 } avr_mcu_t;
diff --git a/gcc/config/avr/avr-devices.c b/gcc/config/avr/avr-devices.c
index cef3b9a..1bd3e5f 100644
--- a/gcc/config/avr/avr-devices.c
+++ b/gcc/config/avr/avr-devices.c
@@ -111,12 +111,12 @@ avr_texinfo[] =
 const avr_mcu_t
 avr_mcu_types[] =
 {
-#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, N_FLASH, FLASH_SIZE)\
-  { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, N_FLASH, FLASH_SIZE },
+#define AVR_MCU(NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE)\
+  { NAME, ARCH, DEV_ATTRIBUTE, MACRO, DATA_SEC, TEXT_SEC, FLASH_SIZE },
 #include "avr-mcus.def"
 #undef AVR_MCU
     /* End of list.  */
-  { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0, 0 }
+  { NULL, ARCH_UNKNOWN, AVR_ISA_NONE, NULL, 0, 0, 0 }
 };
 
 
diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
index 4008741..aafa224 100644
--- a/gcc/config/avr/avr-mcus.def
+++ b/gcc/config/avr/avr-mcus.def
@@ -59,301 +59,298 @@
 
        TEXT_START    First address of Flash, used in -Ttext=<TEXT_START>.
 
-       N_FLASH       Number of 64 KiB flash segments, rounded up.  The default
-                     value for -mn-flash=<N_FLASH>.
-
        FLASH_SIZE    Flash size in bytes.
 
    "avr2" must be first for the "0" default to work as intended.  */
 
 /* Classic, <= 8K.  */
-AVR_MCU ("avr2",             ARCH_AVR2, AVR_ERRATA_SKIP, NULL,                     0x0060, 0x0, 6, 0x2000)
+AVR_MCU ("avr2",             ARCH_AVR2, AVR_ERRATA_SKIP, NULL,                     0x0060, 0x0, 0x60000)
 
-AVR_MCU ("at90s2313",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__",         0x0060, 0x0, 1, 0x800)
-AVR_MCU ("at90s2323",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__",         0x0060, 0x0, 1, 0x800)
-AVR_MCU ("at90s2333",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__",         0x0060, 0x0, 1, 0x800)
-AVR_MCU ("at90s2343",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__",         0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny22",         ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__",          0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny26",         ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__",          0x0060, 0x0, 1, 0x800)
-AVR_MCU ("at90s4414",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4414__",         0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("at90s4433",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S4433__",         0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("at90s4434",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4434__",         0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("at90s8515",        ARCH_AVR2, AVR_ERRATA_SKIP, "__AVR_AT90S8515__",      0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("at90c8534",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90C8534__",         0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("at90s8535",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S8535__",         0x0060, 0x0, 1, 0x2000)
+AVR_MCU ("at90s2313",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2313__",         0x0060, 0x0, 0x800)
+AVR_MCU ("at90s2323",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2323__",         0x0060, 0x0, 0x800)
+AVR_MCU ("at90s2333",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2333__",         0x0060, 0x0, 0x800)
+AVR_MCU ("at90s2343",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S2343__",         0x0060, 0x0, 0x800)
+AVR_MCU ("attiny22",         ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny22__",          0x0060, 0x0, 0x800)
+AVR_MCU ("attiny26",         ARCH_AVR2, AVR_SHORT_SP, "__AVR_ATtiny26__",          0x0060, 0x0, 0x800)
+AVR_MCU ("at90s4414",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4414__",         0x0060, 0x0, 0x1000)
+AVR_MCU ("at90s4433",        ARCH_AVR2, AVR_SHORT_SP, "__AVR_AT90S4433__",         0x0060, 0x0, 0x1000)
+AVR_MCU ("at90s4434",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S4434__",         0x0060, 0x0, 0x1000)
+AVR_MCU ("at90s8515",        ARCH_AVR2, AVR_ERRATA_SKIP, "__AVR_AT90S8515__",      0x0060, 0x0, 0x2000)
+AVR_MCU ("at90c8534",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90C8534__",         0x0060, 0x0, 0x2000)
+AVR_MCU ("at90s8535",        ARCH_AVR2, AVR_ISA_NONE, "__AVR_AT90S8535__",         0x0060, 0x0, 0x2000)
 /* Classic + MOVW, <= 8K.  */
-AVR_MCU ("avr25",            ARCH_AVR25, AVR_ISA_NONE, NULL,                       0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("ata5272",          ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATA5272__",          0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("ata6616c",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATA6616C__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("attiny13",         ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny13__",         0x0060, 0x0, 1, 0x400)
-AVR_MCU ("attiny13a",        ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny13A__",        0x0060, 0x0, 1, 0x400)
-AVR_MCU ("attiny2313",       ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny2313__",       0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny2313a",      ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny2313A__",      0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny24",         ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny24__",         0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny24a",        ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny24A__",        0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny4313",       ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny4313__",       0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("attiny44",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny44__",         0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("attiny44a",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny44A__",        0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("attiny441",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny441__",        0x0100, 0x0, 1, 0x1000)
-AVR_MCU ("attiny84",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny84__",         0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("attiny84a",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny84A__",        0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("attiny25",         ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny25__",         0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny45",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny45__",         0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("attiny85",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny85__",         0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("attiny261",        ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny261__",        0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny261a",       ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny261A__",       0x0060, 0x0, 1, 0x800)
-AVR_MCU ("attiny461",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny461__",        0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("attiny461a",       ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny461A__",       0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("attiny861",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny861__",        0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("attiny861a",       ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny861A__",       0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("attiny43u",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny43U__",        0x0060, 0x0, 1, 0x1000)
-AVR_MCU ("attiny87",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny87__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("attiny48",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny48__",         0x0100, 0x0, 1, 0x1000)
-AVR_MCU ("attiny88",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny88__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("attiny828",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny828__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("attiny841",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny841__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at86rf401",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_AT86RF401__",        0x0060, 0x0, 1, 0x800)
+AVR_MCU ("avr25",            ARCH_AVR25, AVR_ISA_NONE, NULL,                       0x0060, 0x0, 0x2000)
+AVR_MCU ("ata5272",          ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATA5272__",          0x0100, 0x0, 0x2000)
+AVR_MCU ("ata6616c",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATA6616C__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("attiny13",         ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny13__",         0x0060, 0x0, 0x400)
+AVR_MCU ("attiny13a",        ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny13A__",        0x0060, 0x0, 0x400)
+AVR_MCU ("attiny2313",       ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny2313__",       0x0060, 0x0, 0x800)
+AVR_MCU ("attiny2313a",      ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny2313A__",      0x0060, 0x0, 0x800)
+AVR_MCU ("attiny24",         ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny24__",         0x0060, 0x0, 0x800)
+AVR_MCU ("attiny24a",        ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny24A__",        0x0060, 0x0, 0x800)
+AVR_MCU ("attiny4313",       ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny4313__",       0x0060, 0x0, 0x1000)
+AVR_MCU ("attiny44",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny44__",         0x0060, 0x0, 0x1000)
+AVR_MCU ("attiny44a",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny44A__",        0x0060, 0x0, 0x1000)
+AVR_MCU ("attiny441",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny441__",        0x0100, 0x0, 0x1000)
+AVR_MCU ("attiny84",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny84__",         0x0060, 0x0, 0x2000)
+AVR_MCU ("attiny84a",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny84A__",        0x0060, 0x0, 0x2000)
+AVR_MCU ("attiny25",         ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny25__",         0x0060, 0x0, 0x800)
+AVR_MCU ("attiny45",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny45__",         0x0060, 0x0, 0x1000)
+AVR_MCU ("attiny85",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny85__",         0x0060, 0x0, 0x2000)
+AVR_MCU ("attiny261",        ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny261__",        0x0060, 0x0, 0x800)
+AVR_MCU ("attiny261a",       ARCH_AVR25, AVR_SHORT_SP, "__AVR_ATtiny261A__",       0x0060, 0x0, 0x800)
+AVR_MCU ("attiny461",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny461__",        0x0060, 0x0, 0x1000)
+AVR_MCU ("attiny461a",       ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny461A__",       0x0060, 0x0, 0x1000)
+AVR_MCU ("attiny861",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny861__",        0x0060, 0x0, 0x2000)
+AVR_MCU ("attiny861a",       ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny861A__",       0x0060, 0x0, 0x2000)
+AVR_MCU ("attiny43u",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny43U__",        0x0060, 0x0, 0x1000)
+AVR_MCU ("attiny87",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny87__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("attiny48",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny48__",         0x0100, 0x0, 0x1000)
+AVR_MCU ("attiny88",         ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny88__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("attiny828",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny828__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("attiny841",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_ATtiny841__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("at86rf401",        ARCH_AVR25, AVR_ISA_NONE, "__AVR_AT86RF401__",        0x0060, 0x0, 0x800)
 /* Classic, > 8K, <= 64K.  */
-AVR_MCU ("avr3",             ARCH_AVR3, AVR_ISA_NONE, NULL,                        0x0060, 0x0, 1, 0x6000)
-AVR_MCU ("at43usb355",       ARCH_AVR3, AVR_ISA_NONE, "__AVR_AT43USB355__",        0x0060, 0x0, 1, 0x6000)
-AVR_MCU ("at76c711",         ARCH_AVR3, AVR_ISA_NONE, "__AVR_AT76C711__",          0x0060, 0x0, 1, 0x4000)
+AVR_MCU ("avr3",             ARCH_AVR3, AVR_ISA_NONE, NULL,                        0x0060, 0x0, 0x6000)
+AVR_MCU ("at43usb355",       ARCH_AVR3, AVR_ISA_NONE, "__AVR_AT43USB355__",        0x0060, 0x0, 0x6000)
+AVR_MCU ("at76c711",         ARCH_AVR3, AVR_ISA_NONE, "__AVR_AT76C711__",          0x0060, 0x0, 0x4000)
 /* Classic, == 128K.  */
-AVR_MCU ("avr31",            ARCH_AVR31, AVR_ERRATA_SKIP, NULL,                    0x0060, 0x0, 2, 0x20000)
-AVR_MCU ("atmega103",        ARCH_AVR31, AVR_ERRATA_SKIP, "__AVR_ATmega103__",     0x0060, 0x0, 2, 0x20000)
-AVR_MCU ("at43usb320",       ARCH_AVR31, AVR_ISA_NONE, "__AVR_AT43USB320__",       0x0060, 0x0, 1, 0x10000)
+AVR_MCU ("avr31",            ARCH_AVR31, AVR_ERRATA_SKIP, NULL,                    0x0060, 0x0, 0x20000)
+AVR_MCU ("atmega103",        ARCH_AVR31, AVR_ERRATA_SKIP, "__AVR_ATmega103__",     0x0060, 0x0, 0x20000)
+AVR_MCU ("at43usb320",       ARCH_AVR31, AVR_ISA_NONE, "__AVR_AT43USB320__",       0x0060, 0x0, 0x10000)
 /* Classic + MOVW + JMP/CALL.  */
-AVR_MCU ("avr35",            ARCH_AVR35, AVR_ISA_NONE, NULL,                       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("ata5505",          ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATA5505__",          0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("ata6617c",         ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATA6617C__",         0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("ata664251",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATA664251__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("at90usb82",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_AT90USB82__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at90usb162",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_AT90USB162__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega8u2",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATmega8U2__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("atmega16u2",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATmega16U2__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega32u2",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATmega32U2__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("attiny167",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny167__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("attiny1634",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny1634__",       0x0100, 0x0, 1, 0x4000)
+AVR_MCU ("avr35",            ARCH_AVR35, AVR_ISA_NONE, NULL,                       0x0100, 0x0, 0x4000)
+AVR_MCU ("ata5505",          ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATA5505__",          0x0100, 0x0, 0x4000)
+AVR_MCU ("ata6617c",         ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATA6617C__",         0x0100, 0x0, 0x4000)
+AVR_MCU ("ata664251",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATA664251__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("at90usb82",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_AT90USB82__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("at90usb162",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_AT90USB162__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega8u2",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATmega8U2__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("atmega16u2",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATmega16U2__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega32u2",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATmega32U2__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("attiny167",        ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny167__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("attiny1634",       ARCH_AVR35, AVR_ISA_NONE, "__AVR_ATtiny1634__",       0x0100, 0x0, 0x4000)
 /* Enhanced, <= 8K.  */
-AVR_MCU ("avr4",             ARCH_AVR4, AVR_ISA_NONE,  NULL,                       0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("ata6285",          ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6285__",          0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("ata6286",          ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6286__",          0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("ata6289",          ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6289__",           0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("ata6612c",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6612C__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("atmega8",          ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8__",          0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("atmega8a",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8A__",         0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("atmega48",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48__",         0x0100, 0x0, 1, 0x1000)
-AVR_MCU ("atmega48a",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48A__",        0x0100, 0x0, 1, 0x1000)
-AVR_MCU ("atmega48p",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48P__",        0x0100, 0x0, 1, 0x1000)
-AVR_MCU ("atmega48pa",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48PA__",       0x0100, 0x0, 1, 0x1000)
-AVR_MCU ("atmega48pb",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48PB__",       0x0100, 0x0, 1, 0x1000)
-AVR_MCU ("atmega88",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("atmega88a",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88A__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("atmega88p",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88P__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("atmega88pa",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88PA__",       0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("atmega88pb",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88PB__",       0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("atmega8515",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8515__",       0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("atmega8535",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8535__",       0x0060, 0x0, 1, 0x2000)
-AVR_MCU ("atmega8hva",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8HVA__",       0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at90pwm1",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM1__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at90pwm2",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM2__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at90pwm2b",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM2B__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at90pwm3",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM3__",         0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at90pwm3b",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM3B__",        0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("at90pwm81",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM81__",        0x0100, 0x0, 1, 0x2000)
+AVR_MCU ("avr4",             ARCH_AVR4, AVR_ISA_NONE,  NULL,                       0x0060, 0x0, 0x2000)
+AVR_MCU ("ata6285",          ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6285__",          0x0100, 0x0, 0x2000)
+AVR_MCU ("ata6286",          ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6286__",          0x0100, 0x0, 0x2000)
+AVR_MCU ("ata6289",          ARCH_AVR4, AVR_ISA_NONE, "__AVR_ATA6289__",           0x0100, 0x0, 0x2000)
+AVR_MCU ("ata6612c",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATA6612C__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("atmega8",          ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8__",          0x0060, 0x0, 0x2000)
+AVR_MCU ("atmega8a",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8A__",         0x0060, 0x0, 0x2000)
+AVR_MCU ("atmega48",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48__",         0x0100, 0x0, 0x1000)
+AVR_MCU ("atmega48a",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48A__",        0x0100, 0x0, 0x1000)
+AVR_MCU ("atmega48p",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48P__",        0x0100, 0x0, 0x1000)
+AVR_MCU ("atmega48pa",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48PA__",       0x0100, 0x0, 0x1000)
+AVR_MCU ("atmega48pb",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega48PB__",       0x0100, 0x0, 0x1000)
+AVR_MCU ("atmega88",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("atmega88a",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88A__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("atmega88p",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88P__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("atmega88pa",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88PA__",       0x0100, 0x0, 0x2000)
+AVR_MCU ("atmega88pb",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega88PB__",       0x0100, 0x0, 0x2000)
+AVR_MCU ("atmega8515",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8515__",       0x0060, 0x0, 0x2000)
+AVR_MCU ("atmega8535",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8535__",       0x0060, 0x0, 0x2000)
+AVR_MCU ("atmega8hva",       ARCH_AVR4, AVR_ISA_NONE,  "__AVR_ATmega8HVA__",       0x0100, 0x0, 0x2000)
+AVR_MCU ("at90pwm1",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM1__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("at90pwm2",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM2__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("at90pwm2b",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM2B__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("at90pwm3",         ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM3__",         0x0100, 0x0, 0x2000)
+AVR_MCU ("at90pwm3b",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM3B__",        0x0100, 0x0, 0x2000)
+AVR_MCU ("at90pwm81",        ARCH_AVR4, AVR_ISA_NONE,  "__AVR_AT90PWM81__",        0x0100, 0x0, 0x2000)
 /* Enhanced, > 8K, <= 64K.  */
-AVR_MCU ("avr5",             ARCH_AVR5, AVR_ISA_NONE, NULL,                        0x0060, 0x0, 1, 0x4000)
-AVR_MCU ("ata5702m322",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5702M322__",       0x0200, 0x0, 1, 0x10000)
-AVR_MCU ("ata5782",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5782__",           0x0200, 0x8000, 1, 0xd000)
-AVR_MCU ("ata5790",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790__",           0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("ata5790n",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790N__",          0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("ata5791",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5791__",           0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("ata5795",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5795__",           0x0100, 0x0, 1, 0x2000)
-AVR_MCU ("ata5831",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5831__",           0x0200, 0x8000, 1, 0xd000)
-AVR_MCU ("ata6613c",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA6613C__",          0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("ata6614q",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA6614Q__",          0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("ata8210",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA8210__",           0x0200, 0x8000, 1, 0xd000)
-AVR_MCU ("ata8510",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA8510__",           0x0200, 0x8000, 1, 0xd000)
-AVR_MCU ("atmega16",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16__",          0x0060, 0x0, 1, 0x4000)
-AVR_MCU ("atmega16a",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16A__",         0x0060, 0x0, 1, 0x4000)
-AVR_MCU ("atmega161",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega161__",         0x0060, 0x0, 1, 0x4000)
-AVR_MCU ("atmega162",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega162__",         0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega163",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega163__",         0x0060, 0x0, 1, 0x4000)
-AVR_MCU ("atmega164a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega164A__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega164p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega164P__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega164pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega164PA__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega165",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165__",         0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega165a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165A__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega165p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165P__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega165pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165PA__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega168",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168__",         0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega168a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168A__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega168p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168P__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega168pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168PA__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega168pb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168PB__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega169",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169__",         0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega169a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169A__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega169p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169P__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega169pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169PA__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega16hvb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVB__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega16hvbrevb",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVBREVB__",   0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega16m1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16M1__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega16u4",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16U4__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega32a",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32A__",         0x0060, 0x0, 1, 0x8000)
-AVR_MCU ("atmega32",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32__",          0x0060, 0x0, 1, 0x8000)
-AVR_MCU ("atmega323",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega323__",         0x0060, 0x0, 1, 0x8000)
-AVR_MCU ("atmega324a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324A__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega324p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324P__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega324pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324PA__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega325",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325__",         0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega325a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325A__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega325p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325P__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega325pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325PA__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3250",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3250a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250A__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3250p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250P__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3250pa",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250PA__",      0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega328",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega328__",         0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega328p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega328P__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega328pb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega328PB__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega329",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329__",         0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega329a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329A__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega329p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329P__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega329pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329PA__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3290",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3290a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290A__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3290p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290P__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega3290pa",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290PA__",      0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega32c1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32C1__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega32m1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32M1__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega32u4",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32U4__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega32u6",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32U6__",        0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega406",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega406__",         0x0100, 0x0, 1, 0xa000)
-AVR_MCU ("atmega64",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64__",          0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega64a",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64A__",         0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega640",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega640__",         0x0200, 0x0, 1, 0x10000)
-AVR_MCU ("atmega644",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644__",         0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega644a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644A__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega644p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644P__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega644pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644PA__",       0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega645",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega645__",         0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega645a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega645A__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega645p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega645P__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega6450",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6450__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega6450a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6450A__",       0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega6450p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6450P__",       0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega649",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega649__",         0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega649a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega649A__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega649p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega649P__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega6490",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6490__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega16hva",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVA__",       0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega16hva2",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVA2__",      0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("atmega32hvb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32HVB__",       0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("atmega6490a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6490A__",       0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega6490p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6490P__",       0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega64c1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64C1__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega64m1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64M1__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega64hve",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64HVE__",       0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega64hve2",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64HVE2__",      0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("atmega64rfr2",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64RFR2__",      0x0200, 0x0, 1, 0x10000)
-AVR_MCU ("atmega644rfr2",    ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644RFR2__",     0x0200, 0x0, 1, 0x10000)
-AVR_MCU ("atmega32hvbrevb",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32HVBREVB__",   0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("at90can32",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90CAN32__",         0x0100, 0x0, 1, 0x8000)
-AVR_MCU ("at90can64",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90CAN64__",         0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("at90pwm161",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90PWM161__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("at90pwm216",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90PWM216__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("at90pwm316",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90PWM316__",        0x0100, 0x0, 1, 0x4000)
-AVR_MCU ("at90scr100",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90SCR100__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("at90usb646",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90USB646__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("at90usb647",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90USB647__",        0x0100, 0x0, 1, 0x10000)
-AVR_MCU ("at94k",            ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT94K__",             0x0060, 0x0, 1, 0x8000)
-AVR_MCU ("m3000",            ARCH_AVR5, AVR_ISA_NONE, "__AVR_M3000__",             0x1000, 0x0, 1, 0x10000)
+AVR_MCU ("avr5",             ARCH_AVR5, AVR_ISA_NONE, NULL,                        0x0060, 0x0, 0x4000)
+AVR_MCU ("ata5702m322",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5702M322__",       0x0200, 0x0, 0x10000)
+AVR_MCU ("ata5782",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5782__",           0x0200, 0x8000, 0xd000)
+AVR_MCU ("ata5790",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790__",           0x0100, 0x0, 0x4000)
+AVR_MCU ("ata5790n",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5790N__",          0x0100, 0x0, 0x4000)
+AVR_MCU ("ata5791",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5791__",           0x0100, 0x0, 0x4000)
+AVR_MCU ("ata5795",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5795__",           0x0100, 0x0, 0x2000)
+AVR_MCU ("ata5831",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA5831__",           0x0200, 0x8000, 0xd000)
+AVR_MCU ("ata6613c",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA6613C__",          0x0100, 0x0, 0x4000)
+AVR_MCU ("ata6614q",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA6614Q__",          0x0100, 0x0, 0x8000)
+AVR_MCU ("ata8210",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA8210__",           0x0200, 0x8000, 0xd000)
+AVR_MCU ("ata8510",          ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATA8510__",           0x0200, 0x8000, 0xd000)
+AVR_MCU ("atmega16",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16__",          0x0060, 0x0, 0x4000)
+AVR_MCU ("atmega16a",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16A__",         0x0060, 0x0, 0x4000)
+AVR_MCU ("atmega161",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega161__",         0x0060, 0x0, 0x4000)
+AVR_MCU ("atmega162",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega162__",         0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega163",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega163__",         0x0060, 0x0, 0x4000)
+AVR_MCU ("atmega164a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega164A__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega164p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega164P__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega164pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega164PA__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega165",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165__",         0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega165a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165A__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega165p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165P__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega165pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega165PA__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega168",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168__",         0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega168a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168A__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega168p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168P__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega168pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168PA__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega168pb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega168PB__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega169",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169__",         0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega169a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169A__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega169p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169P__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega169pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega169PA__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega16hvb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVB__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega16hvbrevb",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVBREVB__",   0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega16m1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16M1__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega16u4",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16U4__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega32a",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32A__",         0x0060, 0x0, 0x8000)
+AVR_MCU ("atmega32",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32__",          0x0060, 0x0, 0x8000)
+AVR_MCU ("atmega323",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega323__",         0x0060, 0x0, 0x8000)
+AVR_MCU ("atmega324a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324A__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega324p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324P__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega324pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega324PA__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega325",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325__",         0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega325a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325A__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega325p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325P__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega325pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega325PA__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3250",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3250a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250A__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3250p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250P__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3250pa",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3250PA__",      0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega328",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega328__",         0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega328p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega328P__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega328pb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega328PB__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega329",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329__",         0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega329a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329A__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega329p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329P__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega329pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega329PA__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3290",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3290a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290A__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3290p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290P__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega3290pa",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega3290PA__",      0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega32c1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32C1__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega32m1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32M1__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega32u4",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32U4__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega32u6",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32U6__",        0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega406",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega406__",         0x0100, 0x0, 0xa000)
+AVR_MCU ("atmega64",         ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64__",          0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega64a",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64A__",         0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega640",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega640__",         0x0200, 0x0, 0x10000)
+AVR_MCU ("atmega644",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644__",         0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega644a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644A__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega644p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644P__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega644pa",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644PA__",       0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega645",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega645__",         0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega645a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega645A__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega645p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega645P__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega6450",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6450__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega6450a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6450A__",       0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega6450p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6450P__",       0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega649",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega649__",         0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega649a",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega649A__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega649p",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega649P__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega6490",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6490__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega16hva",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVA__",       0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega16hva2",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega16HVA2__",      0x0100, 0x0, 0x4000)
+AVR_MCU ("atmega32hvb",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32HVB__",       0x0100, 0x0, 0x8000)
+AVR_MCU ("atmega6490a",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6490A__",       0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega6490p",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega6490P__",       0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega64c1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64C1__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega64m1",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64M1__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega64hve",      ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64HVE__",       0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega64hve2",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64HVE2__",      0x0100, 0x0, 0x10000)
+AVR_MCU ("atmega64rfr2",     ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega64RFR2__",      0x0200, 0x0, 0x10000)
+AVR_MCU ("atmega644rfr2",    ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega644RFR2__",     0x0200, 0x0, 0x10000)
+AVR_MCU ("atmega32hvbrevb",  ARCH_AVR5, AVR_ISA_NONE, "__AVR_ATmega32HVBREVB__",   0x0100, 0x0, 0x8000)
+AVR_MCU ("at90can32",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90CAN32__",         0x0100, 0x0, 0x8000)
+AVR_MCU ("at90can64",        ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90CAN64__",         0x0100, 0x0, 0x10000)
+AVR_MCU ("at90pwm161",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90PWM161__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("at90pwm216",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90PWM216__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("at90pwm316",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90PWM316__",        0x0100, 0x0, 0x4000)
+AVR_MCU ("at90scr100",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90SCR100__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("at90usb646",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90USB646__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("at90usb647",       ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT90USB647__",        0x0100, 0x0, 0x10000)
+AVR_MCU ("at94k",            ARCH_AVR5, AVR_ISA_NONE, "__AVR_AT94K__",             0x0060, 0x0, 0x8000)
+AVR_MCU ("m3000",            ARCH_AVR5, AVR_ISA_NONE, "__AVR_M3000__",             0x1000, 0x0, 0x10000)
 /* Enhanced, == 128K.  */
-AVR_MCU ("avr51",            ARCH_AVR51, AVR_ISA_NONE, NULL,                       0x0100, 0x0, 2, 0x20000)
-AVR_MCU ("atmega128",        ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128__",        0x0100, 0x0, 2, 0x20000)
-AVR_MCU ("atmega128a",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128A__",       0x0100, 0x0, 2, 0x20000)
-AVR_MCU ("atmega1280",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1280__",       0x0200, 0x0, 2, 0x20000)
-AVR_MCU ("atmega1281",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1281__",       0x0200, 0x0, 2, 0x20000)
-AVR_MCU ("atmega1284",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1284__",       0x0100, 0x0, 2, 0x20000)
-AVR_MCU ("atmega1284p",      ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1284P__",      0x0100, 0x0, 2, 0x20000)
-AVR_MCU ("atmega128rfa1",    ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128RFA1__",    0x0200, 0x0, 2, 0x20000)
-AVR_MCU ("atmega128rfr2",    ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128RFR2__",    0x0200, 0x0, 2, 0x20000)
-AVR_MCU ("atmega1284rfr2",   ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1284RFR2__",   0x0200, 0x0, 2, 0x20000)
-AVR_MCU ("at90can128",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_AT90CAN128__",       0x0100, 0x0, 2, 0x20000)
-AVR_MCU ("at90usb1286",      ARCH_AVR51, AVR_ISA_NONE, "__AVR_AT90USB1286__",      0x0100, 0x0, 2, 0x20000)
-AVR_MCU ("at90usb1287",      ARCH_AVR51, AVR_ISA_NONE, "__AVR_AT90USB1287__",      0x0100, 0x0, 2, 0x20000)
+AVR_MCU ("avr51",            ARCH_AVR51, AVR_ISA_NONE, NULL,                       0x0100, 0x0, 0x20000)
+AVR_MCU ("atmega128",        ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128__",        0x0100, 0x0, 0x20000)
+AVR_MCU ("atmega128a",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128A__",       0x0100, 0x0, 0x20000)
+AVR_MCU ("atmega1280",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1280__",       0x0200, 0x0, 0x20000)
+AVR_MCU ("atmega1281",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1281__",       0x0200, 0x0, 0x20000)
+AVR_MCU ("atmega1284",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1284__",       0x0100, 0x0, 0x20000)
+AVR_MCU ("atmega1284p",      ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1284P__",      0x0100, 0x0, 0x20000)
+AVR_MCU ("atmega128rfa1",    ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128RFA1__",    0x0200, 0x0, 0x20000)
+AVR_MCU ("atmega128rfr2",    ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega128RFR2__",    0x0200, 0x0, 0x20000)
+AVR_MCU ("atmega1284rfr2",   ARCH_AVR51, AVR_ISA_NONE, "__AVR_ATmega1284RFR2__",   0x0200, 0x0, 0x20000)
+AVR_MCU ("at90can128",       ARCH_AVR51, AVR_ISA_NONE, "__AVR_AT90CAN128__",       0x0100, 0x0, 0x20000)
+AVR_MCU ("at90usb1286",      ARCH_AVR51, AVR_ISA_NONE, "__AVR_AT90USB1286__",      0x0100, 0x0, 0x20000)
+AVR_MCU ("at90usb1287",      ARCH_AVR51, AVR_ISA_NONE, "__AVR_AT90USB1287__",      0x0100, 0x0, 0x20000)
 /* 3-Byte PC.  */
-AVR_MCU ("avr6",             ARCH_AVR6, AVR_ISA_NONE, NULL,                        0x0200, 0x0, 4, 0x40000)
-AVR_MCU ("atmega2560",       ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2560__",        0x0200, 0x0, 4, 0x40000)
-AVR_MCU ("atmega2561",       ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2561__",        0x0200, 0x0, 4, 0x40000)
-AVR_MCU ("atmega256rfr2",    ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega256RFR2__",     0x0200, 0x0, 4, 0x40000)
-AVR_MCU ("atmega2564rfr2",   ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2564RFR2__",    0x0200, 0x0, 4, 0x40000)
+AVR_MCU ("avr6",             ARCH_AVR6, AVR_ISA_NONE, NULL,                        0x0200, 0x0, 0x40000)
+AVR_MCU ("atmega2560",       ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2560__",        0x0200, 0x0, 0x40000)
+AVR_MCU ("atmega2561",       ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2561__",        0x0200, 0x0, 0x40000)
+AVR_MCU ("atmega256rfr2",    ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega256RFR2__",     0x0200, 0x0, 0x40000)
+AVR_MCU ("atmega2564rfr2",   ARCH_AVR6, AVR_ISA_NONE, "__AVR_ATmega2564RFR2__",    0x0200, 0x0, 0x40000)
 /* Xmega, 16K <= Flash < 64K, RAM <= 64K */
-AVR_MCU ("avrxmega2",        ARCH_AVRXMEGA2, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 1, 0x9000)
-AVR_MCU ("atxmega8e5",       ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega8E5__",   0x2000, 0x0, 1, 0x2800)
-AVR_MCU ("atxmega16a4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega16A4__",  0x2000, 0x0, 1, 0x5000)
-AVR_MCU ("atxmega16d4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega16D4__",  0x2000, 0x0, 1, 0x5000)
-AVR_MCU ("atxmega16e5",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega16E5__",  0x2000, 0x0, 1, 0x5000)
-AVR_MCU ("atxmega32a4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32A4__",  0x2000, 0x0, 1, 0x9000)
-AVR_MCU ("atxmega32c3",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32C3__",  0x2000, 0x0, 1, 0x9000)
-AVR_MCU ("atxmega32d3",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32D3__",  0x2000, 0x0, 1, 0x9000)
-AVR_MCU ("atxmega32d4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32D4__",  0x2000, 0x0, 1, 0x9000)
-AVR_MCU ("atxmega16a4u",     ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega16A4U__", 0x2000, 0x0, 1, 0x5000)
-AVR_MCU ("atxmega16c4",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega16C4__",  0x2000, 0x0, 1, 0x5000)
-AVR_MCU ("atxmega32a4u",     ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32A4U__", 0x2000, 0x0, 1, 0x9000)
-AVR_MCU ("atxmega32c4",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32C4__",  0x2000, 0x0, 1, 0x9000)
-AVR_MCU ("atxmega32e5",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32E5__",  0x2000, 0x0, 1, 0x9000)
+AVR_MCU ("avrxmega2",        ARCH_AVRXMEGA2, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 0x9000)
+AVR_MCU ("atxmega8e5",       ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega8E5__",   0x2000, 0x0, 0x2800)
+AVR_MCU ("atxmega16a4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega16A4__",  0x2000, 0x0, 0x5000)
+AVR_MCU ("atxmega16d4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega16D4__",  0x2000, 0x0, 0x5000)
+AVR_MCU ("atxmega16e5",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega16E5__",  0x2000, 0x0, 0x5000)
+AVR_MCU ("atxmega32a4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32A4__",  0x2000, 0x0, 0x9000)
+AVR_MCU ("atxmega32c3",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32C3__",  0x2000, 0x0, 0x9000)
+AVR_MCU ("atxmega32d3",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32D3__",  0x2000, 0x0, 0x9000)
+AVR_MCU ("atxmega32d4",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32D4__",  0x2000, 0x0, 0x9000)
+AVR_MCU ("atxmega16a4u",     ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega16A4U__", 0x2000, 0x0, 0x5000)
+AVR_MCU ("atxmega16c4",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega16C4__",  0x2000, 0x0, 0x5000)
+AVR_MCU ("atxmega32a4u",     ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32A4U__", 0x2000, 0x0, 0x9000)
+AVR_MCU ("atxmega32c4",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32C4__",  0x2000, 0x0, 0x9000)
+AVR_MCU ("atxmega32e5",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32E5__",  0x2000, 0x0, 0x9000)
 /* Xmega, 64K < Flash <= 128K, RAM <= 64K */
-AVR_MCU ("avrxmega4",        ARCH_AVRXMEGA4, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64a3",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64A3__",  0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64d3",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64D3__",  0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64a3u",     ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64A3U__", 0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64a4u",     ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64A4U__", 0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64b1",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64B1__",  0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64b3",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64B3__",  0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64c3",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64C3__",  0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64d4",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64D4__",  0x2000, 0x0, 2, 0x11000)
+AVR_MCU ("avrxmega4",        ARCH_AVRXMEGA4, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64a3",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64A3__",  0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64d3",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64D3__",  0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64a3u",     ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64A3U__", 0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64a4u",     ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64A4U__", 0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64b1",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64B1__",  0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64b3",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64B3__",  0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64c3",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64C3__",  0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64d4",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64D4__",  0x2000, 0x0, 0x11000)
 /* Xmega, 64K < Flash <= 128K, RAM > 64K */
-AVR_MCU ("avrxmega5",        ARCH_AVRXMEGA5, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64a1",      ARCH_AVRXMEGA5, AVR_ISA_NONE, "__AVR_ATxmega64A1__",  0x2000, 0x0, 2, 0x11000)
-AVR_MCU ("atxmega64a1u",     ARCH_AVRXMEGA5, AVR_ISA_RMW,  "__AVR_ATxmega64A1U__", 0x2000, 0x0, 2, 0x11000)
+AVR_MCU ("avrxmega5",        ARCH_AVRXMEGA5, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64a1",      ARCH_AVRXMEGA5, AVR_ISA_NONE, "__AVR_ATxmega64A1__",  0x2000, 0x0, 0x11000)
+AVR_MCU ("atxmega64a1u",     ARCH_AVRXMEGA5, AVR_ISA_RMW,  "__AVR_ATxmega64A1U__", 0x2000, 0x0, 0x11000)
 /* Xmega, 128K < Flash, RAM <= 64K */
-AVR_MCU ("avrxmega6",        ARCH_AVRXMEGA6, AVR_ISA_NONE, NULL,                       0x2000, 0x0, 6, 0x60000)
-AVR_MCU ("atxmega128a3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega128A3__",     0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega128D3__",     0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega192a3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega192A3__",     0x2000, 0x0, 4, 0x32000)
-AVR_MCU ("atxmega192d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega192D3__",     0x2000, 0x0, 4, 0x32000)
-AVR_MCU ("atxmega256a3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256A3__",     0x2000, 0x0, 5, 0x42000)
-AVR_MCU ("atxmega256a3b",    ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256A3B__",    0x2000, 0x0, 5, 0x42000)
-AVR_MCU ("atxmega256a3bu",   ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256A3BU__",   0x2000, 0x0, 5, 0x42000)
-AVR_MCU ("atxmega256d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256D3__",     0x2000, 0x0, 5, 0x42000)
-AVR_MCU ("atxmega128a3u",    ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128A3U__",    0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128b1",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128B1__",     0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128b3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128B3__",     0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128C3__",     0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128d4",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega128D4__",     0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega192a3u",    ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega192A3U__",    0x2000, 0x0, 4, 0x32000)
-AVR_MCU ("atxmega192c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega192C3__",     0x2000, 0x0, 4, 0x32000)
-AVR_MCU ("atxmega256a3u",    ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega256A3U__",    0x2000, 0x0, 5, 0x42000)
-AVR_MCU ("atxmega256c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega256C3__",     0x2000, 0x0, 5, 0x42000)
-AVR_MCU ("atxmega384c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega384C3__",     0x2000, 0x0, 7, 0x62000)
-AVR_MCU ("atxmega384d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega384D3__",     0x2000, 0x0, 7, 0x62000)
+AVR_MCU ("avrxmega6",        ARCH_AVRXMEGA6, AVR_ISA_NONE, NULL,                       0x2000, 0x0, 0x60000)
+AVR_MCU ("atxmega128a3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega128A3__",     0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega128D3__",     0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega192a3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega192A3__",     0x2000, 0x0, 0x32000)
+AVR_MCU ("atxmega192d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega192D3__",     0x2000, 0x0, 0x32000)
+AVR_MCU ("atxmega256a3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256A3__",     0x2000, 0x0, 0x42000)
+AVR_MCU ("atxmega256a3b",    ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256A3B__",    0x2000, 0x0, 0x42000)
+AVR_MCU ("atxmega256a3bu",   ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256A3BU__",   0x2000, 0x0, 0x42000)
+AVR_MCU ("atxmega256d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega256D3__",     0x2000, 0x0, 0x42000)
+AVR_MCU ("atxmega128a3u",    ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128A3U__",    0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128b1",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128B1__",     0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128b3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128B3__",     0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega128C3__",     0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128d4",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega128D4__",     0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega192a3u",    ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega192A3U__",    0x2000, 0x0, 0x32000)
+AVR_MCU ("atxmega192c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega192C3__",     0x2000, 0x0, 0x32000)
+AVR_MCU ("atxmega256a3u",    ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega256A3U__",    0x2000, 0x0, 0x42000)
+AVR_MCU ("atxmega256c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega256C3__",     0x2000, 0x0, 0x42000)
+AVR_MCU ("atxmega384c3",     ARCH_AVRXMEGA6, AVR_ISA_RMW,  "__AVR_ATxmega384C3__",     0x2000, 0x0, 0x62000)
+AVR_MCU ("atxmega384d3",     ARCH_AVRXMEGA6, AVR_ISA_NONE, "__AVR_ATxmega384D3__",     0x2000, 0x0, 0x62000)
 /* Xmega, 128K < Flash, RAM > 64K RAM.  */
-AVR_MCU ("avrxmega7",        ARCH_AVRXMEGA7, AVR_ISA_NONE, NULL,                       0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128a1",     ARCH_AVRXMEGA7, AVR_ISA_NONE, "__AVR_ATxmega128A1__",     0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128a1u",    ARCH_AVRXMEGA7, AVR_ISA_RMW,  "__AVR_ATxmega128A1U__",    0x2000, 0x0, 3, 0x22000)
-AVR_MCU ("atxmega128a4u",    ARCH_AVRXMEGA7, AVR_ISA_RMW,  "__AVR_ATxmega128A4U__",    0x2000, 0x0, 3, 0x22000)
+AVR_MCU ("avrxmega7",        ARCH_AVRXMEGA7, AVR_ISA_NONE, NULL,                       0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128a1",     ARCH_AVRXMEGA7, AVR_ISA_NONE, "__AVR_ATxmega128A1__",     0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128a1u",    ARCH_AVRXMEGA7, AVR_ISA_RMW,  "__AVR_ATxmega128A1U__",    0x2000, 0x0, 0x22000)
+AVR_MCU ("atxmega128a4u",    ARCH_AVRXMEGA7, AVR_ISA_RMW,  "__AVR_ATxmega128A4U__",    0x2000, 0x0, 0x22000)
 /* Tiny family */
-AVR_MCU ("avrtiny",          ARCH_AVRTINY, AVR_ISA_NONE, NULL,                     0x0040, 0x0, 1, 0x400)
-AVR_MCU ("attiny4",          ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny4__",        0x0040, 0x0, 1, 0x200)
-AVR_MCU ("attiny5",          ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny5__",        0x0040, 0x0, 1, 0x200)
-AVR_MCU ("attiny9",          ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny9__",        0x0040, 0x0, 1, 0x400)
-AVR_MCU ("attiny10",         ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny10__",       0x0040, 0x0, 1, 0x400)
-AVR_MCU ("attiny20",         ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny20__",       0x0040, 0x0, 1, 0x800)
-AVR_MCU ("attiny40",         ARCH_AVRTINY, AVR_ISA_NONE, "__AVR_ATtiny40__",       0x0040, 0x0, 1, 0x1000)
+AVR_MCU ("avrtiny",          ARCH_AVRTINY, AVR_ISA_NONE, NULL,                     0x0040, 0x0, 0x400)
+AVR_MCU ("attiny4",          ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny4__",        0x0040, 0x0, 0x200)
+AVR_MCU ("attiny5",          ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny5__",        0x0040, 0x0, 0x200)
+AVR_MCU ("attiny9",          ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny9__",        0x0040, 0x0, 0x400)
+AVR_MCU ("attiny10",         ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny10__",       0x0040, 0x0, 0x400)
+AVR_MCU ("attiny20",         ARCH_AVRTINY, AVR_ISA_LDS,  "__AVR_ATtiny20__",       0x0040, 0x0, 0x800)
+AVR_MCU ("attiny40",         ARCH_AVRTINY, AVR_ISA_NONE, "__AVR_ATtiny40__",       0x0040, 0x0, 0x1000)
 /* Assembler only.  */
-AVR_MCU ("avr1",             ARCH_AVR1, AVR_ISA_NONE, NULL,                        0x0060, 0x0, 1, 0x400)
-AVR_MCU ("at90s1200",        ARCH_AVR1, AVR_ISA_NONE, "__AVR_AT90S1200__",         0x0060, 0x0, 1, 0x400)
-AVR_MCU ("attiny11",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny11__",          0x0060, 0x0, 1, 0x400)
-AVR_MCU ("attiny12",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny12__",          0x0060, 0x0, 1, 0x400)
-AVR_MCU ("attiny15",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny15__",          0x0060, 0x0, 1, 0x400)
-AVR_MCU ("attiny28",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny28__",          0x0060, 0x0, 1, 0x800)
+AVR_MCU ("avr1",             ARCH_AVR1, AVR_ISA_NONE, NULL,                        0x0060, 0x0, 0x400)
+AVR_MCU ("at90s1200",        ARCH_AVR1, AVR_ISA_NONE, "__AVR_AT90S1200__",         0x0060, 0x0, 0x400)
+AVR_MCU ("attiny11",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny11__",          0x0060, 0x0, 0x400)
+AVR_MCU ("attiny12",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny12__",          0x0060, 0x0, 0x400)
+AVR_MCU ("attiny15",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny15__",          0x0060, 0x0, 0x400)
+AVR_MCU ("attiny28",         ARCH_AVR1, AVR_ISA_NONE, "__AVR_ATtiny28__",          0x0060, 0x0, 0x800)
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index b7fd8798..48f4788 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -707,7 +707,7 @@ avr_set_core_architecture (void)
         {
           avr_arch = &avr_arch_types[mcu->arch_id];
           if (avr_n_flash < 0)
-            avr_n_flash = mcu->n_flash;
+            avr_n_flash = 1 + (mcu->flash_size - 1) / 0x10000;
 
           return true;
         }
diff --git a/gcc/config/avr/gen-avr-mmcu-specs.c b/gcc/config/avr/gen-avr-mmcu-specs.c
index ee75b1e..48a749b 100644
--- a/gcc/config/avr/gen-avr-mmcu-specs.c
+++ b/gcc/config/avr/gen-avr-mmcu-specs.c
@@ -179,8 +179,10 @@ print_mcu (const avr_mcu_t *mcu)
 
   // avr-gcc specific specs for the compilation / the compiler proper.
 
+  int n_flash = 1 + (mcu->flash_size - 1) / 0x10000;
+
   fprintf (f, "*cc1_n_flash:\n"
-           "\t%%{!mn-flash=*:-mn-flash=%d}\n\n", mcu->n_flash);
+           "\t%%{!mn-flash=*:-mn-flash=%d}\n\n", n_flash);
 
   fprintf (f, "*cc1_rmw:\n%s\n\n", rmw
            ? "\t%{!mno-rmw: -mrmw}"
-- 
cgit v1.1


From efa68ffca9f9032e795ae84c5642038bd613c253 Mon Sep 17 00:00:00 2001
From: Andreas Krebbel <krebbel@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 11:52:58 +0000
Subject: S/390: Fix RTL sharing when generating reg note.

gcc/ChangeLog:

2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>

	* config/s390/s390.c (s390_save_gprs_to_fprs): Fix RTL sharing
	problem.

From-SVN: r243173
---
 gcc/ChangeLog          | 5 +++++
 gcc/config/s390/s390.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a0cefa7..03387cf 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
+
+	* config/s390/s390.c (s390_save_gprs_to_fprs): Fix RTL sharing
+	problem.
+
 2016-12-02  Georg-Johann Lay  <avr@gjlay.de>
 
 	* config/avr/avr-arch.h (avr_mcu_t) [n_flash]: Remove field.
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 767666e..030e10d 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -10666,7 +10666,7 @@ s390_save_gprs_to_fprs (void)
 	  /* This prevents dwarf2cfi from interpreting the set.  Doing
 	     so it might emit def_cfa_register infos setting an FPR as
 	     new CFA.  */
-	  add_reg_note (insn, REG_CFA_REGISTER, PATTERN (insn));
+	  add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
 	}
     }
 }
-- 
cgit v1.1


From cc9037a6e93f50efae6494a849c09a7ffa4b253f Mon Sep 17 00:00:00 2001
From: Aldy Hernandez <aldyh@redhat.com>
Date: Fri, 2 Dec 2016 12:20:42 +0000
Subject: re PR middle-end/78328 (wrong wording for unbounded alloc case in
 -Walloca-larger-than note)

	PR middle-end/78328
	* gimple-ssa-warn-alloca.c (alloca_call_type): Handle
	VR_ANTI_RANGE.

From-SVN: r243174
---
 gcc/ChangeLog                     |  6 ++++++
 gcc/gimple-ssa-warn-alloca.c      |  2 ++
 gcc/testsuite/gcc.dg/Walloca-12.c | 11 +++++++++++
 3 files changed, 19 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/Walloca-12.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 03387cf..afac973 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-02  Aldy Hernandez  <aldyh@redhat.com>
+
+	PR middle-end/78328
+	* gimple-ssa-warn-alloca.c (alloca_call_type): Handle
+	VR_ANTI_RANGE.
+
 2016-12-02  Andreas Krebbel  <krebbel@linux.vnet.ibm.com>
 
 	* config/s390/s390.c (s390_save_gprs_to_fprs): Fix RTL sharing
diff --git a/gcc/gimple-ssa-warn-alloca.c b/gcc/gimple-ssa-warn-alloca.c
index e75f2fa..ae379f9 100644
--- a/gcc/gimple-ssa-warn-alloca.c
+++ b/gcc/gimple-ssa-warn-alloca.c
@@ -339,6 +339,8 @@ alloca_call_type (gimple *stmt, bool is_vla, tree *invalid_casted_type)
 		{
 		  // Fall through.
 		}
+	      else if (range_type == VR_ANTI_RANGE)
+		return alloca_type_and_limit (ALLOCA_UNBOUNDED);
 	      else if (range_type != VR_VARYING)
 		return
 		  alloca_type_and_limit (ALLOCA_BOUND_MAYBE_LARGE, max);
diff --git a/gcc/testsuite/gcc.dg/Walloca-12.c b/gcc/testsuite/gcc.dg/Walloca-12.c
new file mode 100644
index 0000000..5d71cda
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Walloca-12.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-Walloca-larger-than=128 -O2" } */
+
+void f (void*);
+
+void g (unsigned int n)
+{
+  if (n == 7)
+    n = 11;
+  f (__builtin_alloca (n)); /* { dg-warning "unbounded use of 'alloca'" } */
+}
-- 
cgit v1.1


From d003d97f1d17b7ca89668fdf71d8a7f9c839cc2b Mon Sep 17 00:00:00 2001
From: Dominik Vogt <vogt@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 12:32:16 +0000
Subject: S/390: Fix setmem-long test.

Adding a " in the scan-assembler pattern is necessary because of a
recent change in print-rtl.c.

gcc/testsuite/ChangeLog:

2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>

	* gcc.target/s390/md/setmem_long-1.c: Fix test.

From-SVN: r243176
---
 gcc/testsuite/ChangeLog                          | 4 ++++
 gcc/testsuite/gcc.target/s390/md/setmem_long-1.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 490d081..c1416d0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
+
+	* gcc.target/s390/md/setmem_long-1.c: Fix test.
+
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR rtl-optimization/78547
diff --git a/gcc/testsuite/gcc.target/s390/md/setmem_long-1.c b/gcc/testsuite/gcc.target/s390/md/setmem_long-1.c
index 933a698..bd0c594 100644
--- a/gcc/testsuite/gcc.target/s390/md/setmem_long-1.c
+++ b/gcc/testsuite/gcc.target/s390/md/setmem_long-1.c
@@ -16,8 +16,8 @@ void test2(char *p, int c, int len)
 }
 
 /* Check that the right patterns are used.  */
-/* { dg-final { scan-assembler-times {c:9 .*{[*]setmem_long_?3?1?z?}} 1 } } */
-/* { dg-final { scan-assembler-times {c:15 .*{[*]setmem_long_and_?3?1?z?}} 1 { xfail *-*-* } } } */
+/* { dg-final { scan-assembler-times {c"?:9 .*{[*]setmem_long_?3?1?z?}} 1 } } */
+/* { dg-final { scan-assembler-times {c"?:15 .*{[*]setmem_long_and_?3?1?z?}} 1 { xfail *-*-* } } } */
 
 #define LEN 500
 char buf[LEN + 2];
-- 
cgit v1.1


From 03fd1ef632f41caaaa38a80712ed7eda3f97fdff Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Fri, 2 Dec 2016 13:14:01 +0000
Subject: diagnostic.c (diagnostic_report_diagnostic): Remove extraneous
 braces.

	* diagnostic.c (diagnostic_report_diagnostic): Remove extraneous
	braces.

From-SVN: r243177
---
 gcc/ChangeLog    | 5 +++++
 gcc/diagnostic.c | 4 +---
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index afac973..7da0aca 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Nathan Sidwell  <nathan@acm.org>
+
+	* diagnostic.c (diagnostic_report_diagnostic): Remove extraneous
+	braces.
+
 2016-12-02  Aldy Hernandez  <aldyh@redhat.com>
 
 	PR middle-end/78328
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 2304e14..4278a10 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -834,9 +834,7 @@ diagnostic_report_diagnostic (diagnostic_context *context,
      -Wno-error=*.  */
   if (context->warning_as_error_requested
       && diagnostic->kind == DK_WARNING)
-    {
-      diagnostic->kind = DK_ERROR;
-    }
+    diagnostic->kind = DK_ERROR;
 
   if (diagnostic->option_index
       && diagnostic->option_index != permissive_error_option (context))
-- 
cgit v1.1


From 310a7f96996f46f1565376f092e08daa1d44d1e7 Mon Sep 17 00:00:00 2001
From: Jason Merrill <jason@redhat.com>
Date: Fri, 2 Dec 2016 08:58:32 -0500
Subject: call.c (add_function_candidate): Also exclude inherited ctors that
 take a type reference-related to the derived...

	* call.c (add_function_candidate): Also exclude inherited ctors
	that take a type reference-related to the derived class.

From-SVN: r243178
---
 gcc/cp/ChangeLog |  3 +++
 gcc/cp/call.c    | 23 +++++------------------
 2 files changed, 8 insertions(+), 18 deletions(-)

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index b407d17..4977ff2 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-01  Jason Merrill  <jason@redhat.com>
 
+	* call.c (add_function_candidate): Also exclude inherited ctors
+	that take a type reference-related to the derived class.
+
 	* call.c (add_function_candidate): Exclude inherited copy/move
 	ctors.
 
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 561cc83..b7aa97c 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -2042,19 +2042,18 @@ add_function_candidate (struct z_candidate **candidates,
       reason = arity_rejection (first_arg, i + remaining, len);
     }
 
-  /* A constructor that is a direct member of a class C and has a first
+  /* An inherited constructor (12.6.3 [class.inhctor.init]) that has a first
      parameter of type "reference to cv C" (including such a constructor
      instantiated from a template) is excluded from the set of candidate
-     functions when used to construct an object of type derived from C (12.6.3
-     [class.inhctor.init]) with an argument list containing a single
-     argument.  */
+     functions when used to construct an object of type D with an argument list
+     containing a single argument if C is reference-related to D.  */
   if (viable && len == 1 && parmlist && DECL_CONSTRUCTOR_P (fn)
       && flag_new_inheriting_ctors
       && DECL_INHERITED_CTOR (fn))
     {
       tree ptype = non_reference (TREE_VALUE (parmlist));
-      tree ctype = DECL_INHERITED_CTOR_BASE (fn);
-      if (same_type_ignoring_top_level_qualifiers_p (ptype, ctype))
+      tree dtype = DECL_CONTEXT (fn);
+      if (reference_related_p (ptype, dtype))
 	{
 	  viable = false;
 	  reason = inherited_ctor_rejection ();
@@ -2161,18 +2160,6 @@ add_function_candidate (struct z_candidate **candidates,
 		}
 	    }
 
-	  /* Don't consider inherited constructors for initialization from an
-	     expression of the same or derived type.  */
-	  /* FIXME extend to operator=.  */
-	  if (i == 0 && len == 1
-	      && DECL_INHERITED_CTOR (fn)
-	      && reference_related_p (ctype, argtype))
-	    {
-	      viable = 0;
-	      reason = inherited_ctor_rejection ();
-	      goto out;
-	    }
-
 	  /* Core issue 899: When [copy-]initializing a temporary to be bound
 	     to the first parameter of a copy constructor (12.8) called with
 	     a single argument in the context of direct-initialization,
-- 
cgit v1.1


From d313d52cd5fff9374f01967fb7964b6633df219c Mon Sep 17 00:00:00 2001
From: Sebastian Huber <sebastian.huber@embedded-brains.de>
Date: Fri, 2 Dec 2016 14:10:33 +0000
Subject: [RTEMS] Fix libgomp for nthreads == 1

libgomp/

        * config/rtems/pool.h (gomp_get_thread_pool): Return proper
        thread pool in case nthreads == 1.

From-SVN: r243179
---
 libgomp/ChangeLog           |  5 +++++
 libgomp/config/rtems/pool.h | 26 +++++++++++---------------
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 9e2a300..f072ce4 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Sebastian Huber  <sebastian.huber@embedded-brains.de>
+
+	* config/rtems/pool.h (gomp_get_thread_pool): Return proper
+	thread pool in case nthreads == 1.
+
 2016-11-30  Alexander Monakov  <amonakov@ispras.ru>
 
 	* config/nvptx/env.c: Delete.
diff --git a/libgomp/config/rtems/pool.h b/libgomp/config/rtems/pool.h
index 7520c07..e69eca4 100644
--- a/libgomp/config/rtems/pool.h
+++ b/libgomp/config/rtems/pool.h
@@ -87,28 +87,24 @@ static inline struct gomp_thread_pool *
 gomp_get_thread_pool (struct gomp_thread *thr, unsigned nthreads)
 {
   struct gomp_thread_pool *pool;
+  struct gomp_thread_pool_reservoir *res;
 
   if (__builtin_expect (thr->thread_pool == NULL, 0))
     pthread_setspecific (gomp_thread_destructor, thr);
 
-  if (nthreads != 1)
+  res = gomp_get_thread_pool_reservoir ();
+  if (res != NULL)
     {
-      struct gomp_thread_pool_reservoir *res =
-	gomp_get_thread_pool_reservoir ();
-      if (res != NULL)
-	{
-	  gomp_sem_wait (&res->available);
-	  gomp_mutex_lock (&res->lock);
-	  pool = res->pools[--res->index];
-	  gomp_mutex_unlock (&res->lock);
-	  pool->threads_busy = nthreads;
-	  thr->thread_pool = pool;
-	}
-      else
-	pool = gomp_get_own_thread_pool (thr, nthreads);
+      gomp_sem_wait (&res->available);
+      gomp_mutex_lock (&res->lock);
+      pool = res->pools[--res->index];
+      gomp_mutex_unlock (&res->lock);
+      pool->threads_busy = nthreads;
+      thr->thread_pool = pool;
     }
   else
-    pool = NULL;
+    pool = gomp_get_own_thread_pool (thr, nthreads);
+
   return pool;
 }
 
-- 
cgit v1.1


From 714445ae04640bc096693623fb805bcf14148663 Mon Sep 17 00:00:00 2001
From: Bin Cheng <bin.cheng@arm.com>
Date: Fri, 2 Dec 2016 14:13:11 +0000
Subject: match.pd: Add new pattern: (cond (cmp (convert?

	* match.pd: Add new pattern:
	(cond (cmp (convert? x) c1) (op x c2) c3) -> (op (minmax x c1) c2).
	gcc/testsuite
	* gcc.dg/fold-bopcond-1.c: New test.
	* gcc.dg/fold-bopcond-2.c: New test.

From-SVN: r243180
---
 gcc/ChangeLog                         |   5 ++
 gcc/match.pd                          | 100 ++++++++++++++++++++++++++++++++++
 gcc/testsuite/ChangeLog               |   5 ++
 gcc/testsuite/gcc.dg/fold-bopcond-1.c |  48 ++++++++++++++++
 gcc/testsuite/gcc.dg/fold-bopcond-2.c |  48 ++++++++++++++++
 5 files changed, 206 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/fold-bopcond-1.c
 create mode 100644 gcc/testsuite/gcc.dg/fold-bopcond-2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7da0aca..85b4bdb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Bin Cheng  <bin.cheng@arm.com>
+
+	* match.pd: Add new pattern:
+	(cond (cmp (convert? x) c1) (op x c2) c3) -> (op (minmax x c1) c2).
+
 2016-12-02  Nathan Sidwell  <nathan@acm.org>
 
 	* diagnostic.c (diagnostic_report_diagnostic): Remove extraneous
diff --git a/gcc/match.pd b/gcc/match.pd
index bc8a5e7..dbb9103 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2038,6 +2038,106 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
       (convert (cond (eq @1 (convert @3))
 		     (convert:from_type @3) (convert:from_type @2)))))))))
 
+/* (cond (cmp (convert? x) c1) (op x c2) c3) -> (op (minmax x c1) c2) if:
+
+     1) OP is PLUS or MINUS.
+     2) CMP is LT, LE, GT or GE.
+     3) C3 == (C1 op C2), and computation doesn't have undefined behavior.
+
+   This pattern also handles special cases like:
+
+     A) Operand x is a unsigned to signed type conversion and c1 is
+	integer zero.  In this case,
+	  (signed type)x  < 0  <=>  x  > MAX_VAL(signed type)
+	  (signed type)x >= 0  <=>  x <= MAX_VAL(signed type)
+     B) Const c1 may not equal to (C3 op' C2).  In this case we also
+	check equality for (c1+1) and (c1-1) by adjusting comparison
+	code.
+
+   TODO: Though signed type is handled by this pattern, it cannot be
+   simplified at the moment because C standard requires additional
+   type promotion.  In order to match&simplify it here, the IR needs
+   to be cleaned up by other optimizers, i.e, VRP.  */
+(for op (plus minus)
+ (for cmp (lt le gt ge)
+  (simplify
+   (cond (cmp (convert? @X) INTEGER_CST@1) (op @X INTEGER_CST@2) INTEGER_CST@3)
+   (with { tree from_type = TREE_TYPE (@X), to_type = TREE_TYPE (@1); }
+    (if (types_match (from_type, to_type)
+	 /* Check if it is special case A).  */
+	 || (TYPE_UNSIGNED (from_type)
+	     && !TYPE_UNSIGNED (to_type)
+	     && TYPE_PRECISION (from_type) == TYPE_PRECISION (to_type)
+	     && integer_zerop (@1)
+	     && (cmp == LT_EXPR || cmp == GE_EXPR)))
+     (with
+      {
+	bool overflow = false;
+	enum tree_code code, cmp_code = cmp;
+	wide_int real_c1, c1 = @1, c2 = @2, c3 = @3;
+	signop sgn = TYPE_SIGN (from_type);
+
+	/* Handle special case A), given x of unsigned type:
+	    ((signed type)x  < 0) <=> (x  > MAX_VAL(signed type))
+	    ((signed type)x >= 0) <=> (x <= MAX_VAL(signed type))  */
+	if (!types_match (from_type, to_type))
+	  {
+	    if (cmp_code == LT_EXPR)
+	      cmp_code = GT_EXPR;
+	    if (cmp_code == GE_EXPR)
+	      cmp_code = LE_EXPR;
+	    c1 = wi::max_value (to_type);
+	  }
+	/* To simplify this pattern, we require c3 = (c1 op c2).  Here we
+	   compute (c3 op' c2) and check if it equals to c1 with op' being
+	   the inverted operator of op.  Make sure overflow doesn't happen
+	   if it is undefined.  */
+	if (op == PLUS_EXPR)
+	  real_c1 = wi::sub (c3, c2, sgn, &overflow);
+	else
+	  real_c1 = wi::add (c3, c2, sgn, &overflow);
+
+	code = cmp_code;
+	if (!overflow || !TYPE_OVERFLOW_UNDEFINED (from_type))
+	  {
+	    /* Check if c1 equals to real_c1.  Boundary condition is handled
+	       by adjusting comparison operation if necessary.  */
+	    if (!wi::cmp (wi::sub (real_c1, 1, sgn, &overflow), c1, sgn)
+		&& !overflow)
+	      {
+		/* X <= Y - 1 equals to X < Y.  */
+		if (cmp_code == LE_EXPR)
+		  code = LT_EXPR;
+		/* X > Y - 1 equals to X >= Y.  */
+		if (cmp_code == GT_EXPR)
+		  code = GE_EXPR;
+	      }
+	    if (!wi::cmp (wi::add (real_c1, 1, sgn, &overflow), c1, sgn)
+		&& !overflow)
+	      {
+		/* X < Y + 1 equals to X <= Y.  */
+		if (cmp_code == LT_EXPR)
+		  code = LE_EXPR;
+		/* X >= Y + 1 equals to X > Y.  */
+		if (cmp_code == GE_EXPR)
+		  code = GT_EXPR;
+	      }
+	    if (code != cmp_code || !wi::cmp (real_c1, c1, sgn))
+	      {
+		if (cmp_code == LT_EXPR || cmp_code == LE_EXPR)
+		  code = MIN_EXPR;
+		if (cmp_code == GT_EXPR || cmp_code == GE_EXPR)
+		  code = MAX_EXPR;
+	      }
+	  }
+      }
+      (if (code == MAX_EXPR)
+       (op (max @X { wide_int_to_tree (from_type, real_c1); })
+	   { wide_int_to_tree (from_type, c2); })
+       (if (code == MIN_EXPR)
+	(op (min @X { wide_int_to_tree (from_type, real_c1); })
+	    { wide_int_to_tree (from_type, c2); })))))))))
+
 (for cnd (cond vec_cond)
  /* A ? B : (A ? X : C) -> A ? B : C.  */
  (simplify
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c1416d0..143687d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Bin Cheng  <bin.cheng@arm.com>
+
+	* gcc.dg/fold-bopcond-1.c: New test.
+	* gcc.dg/fold-bopcond-2.c: New test.
+
 2016-12-02  Dominik Vogt  <vogt@linux.vnet.ibm.com>
 
 	* gcc.target/s390/md/setmem_long-1.c: Fix test.
diff --git a/gcc/testsuite/gcc.dg/fold-bopcond-1.c b/gcc/testsuite/gcc.dg/fold-bopcond-1.c
new file mode 100644
index 0000000..7324c16
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bopcond-1.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ifcvt" } */
+
+int foo1 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x <= 32768 ? x + 32768 : 0);
+    }
+  return x;
+}
+
+int foo2 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x < 32768 ? x + 32768 : 0);
+    }
+  return x;
+}
+
+int foo3 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x < 1000 ? x - 1000 : 0);
+    }
+  return x;
+}
+
+int foo4 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x <= 2 ? x + 999 : 1001);
+    }
+  return x;
+}
+
+/* { dg-final { scan-tree-dump-times "MIN_EXPR " 4 "ifcvt" } } */
diff --git a/gcc/testsuite/gcc.dg/fold-bopcond-2.c b/gcc/testsuite/gcc.dg/fold-bopcond-2.c
new file mode 100644
index 0000000..7a47449
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-bopcond-2.c
@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-ifcvt" } */
+
+int foo1 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x >= 32768 ? x - 32768 : 0);
+    }
+  return x;
+}
+
+int foo2 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x > 32768 ? x - 32768 : 0);
+    }
+  return x;
+}
+
+int foo3 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x > 1000 ? x - 1000 : 0);
+    }
+  return x;
+}
+
+int foo4 (unsigned short a[], unsigned int x)
+{
+  unsigned int i;
+  for (i = 0; i < 1000; i++)
+    {
+      x = a[i];
+      a[i] = (unsigned short)(x >= 2 ? x - 32768 : 32770);
+    }
+  return x;
+}
+
+/* { dg-final { scan-tree-dump-times "MAX_EXPR " 4 "ifcvt" } } */
-- 
cgit v1.1


From e7a7f4bea8356e24ca5f2c25a75dc005c7492f89 Mon Sep 17 00:00:00 2001
From: Sebastian Huber <sebastian.huber@embedded-brains.de>
Date: Fri, 2 Dec 2016 14:13:12 +0000
Subject: [RTEMS] Use spin lock for pool management

libgomp/

	* libgomp/config/rtems/pool.h (gomp_thread_pool_reservoir): Use
	pthread_spinlock_t instead of gomp_mutex_t lock.
	(gomp_get_thread_pool): Likewise.
	(gomp_release_thread_pool): Likewise.
	* libgomp/config/rtems/proc.c (allocate_thread_pool_reservoir):
	Likewise.

From-SVN: r243181
---
 libgomp/ChangeLog           |  9 +++++++++
 libgomp/config/rtems/pool.h | 10 +++++-----
 libgomp/config/rtems/proc.c |  2 +-
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index f072ce4..469e896 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,5 +1,14 @@
 2016-12-02  Sebastian Huber  <sebastian.huber@embedded-brains.de>
 
+	* libgomp/config/rtems/pool.h (gomp_thread_pool_reservoir): Use
+	pthread_spinlock_t instead of gomp_mutex_t lock.
+	(gomp_get_thread_pool): Likewise.
+	(gomp_release_thread_pool): Likewise.
+	* libgomp/config/rtems/proc.c (allocate_thread_pool_reservoir):
+	Likewise.
+
+2016-12-02  Sebastian Huber  <sebastian.huber@embedded-brains.de>
+
 	* config/rtems/pool.h (gomp_get_thread_pool): Return proper
 	thread pool in case nthreads == 1.
 
diff --git a/libgomp/config/rtems/pool.h b/libgomp/config/rtems/pool.h
index e69eca4..83fddc8 100644
--- a/libgomp/config/rtems/pool.h
+++ b/libgomp/config/rtems/pool.h
@@ -39,7 +39,7 @@
    GOMP_RTEMS_THREAD_POOLS environment variable.  */
 struct gomp_thread_pool_reservoir {
   gomp_sem_t available;
-  gomp_mutex_t lock;
+  pthread_spinlock_t lock;
   size_t index;
   int priority;
   struct gomp_thread_pool *pools[];
@@ -96,9 +96,9 @@ gomp_get_thread_pool (struct gomp_thread *thr, unsigned nthreads)
   if (res != NULL)
     {
       gomp_sem_wait (&res->available);
-      gomp_mutex_lock (&res->lock);
+      pthread_spin_lock (&res->lock);
       pool = res->pools[--res->index];
-      gomp_mutex_unlock (&res->lock);
+      pthread_spin_unlock (&res->lock);
       pool->threads_busy = nthreads;
       thr->thread_pool = pool;
     }
@@ -115,9 +115,9 @@ gomp_release_thread_pool (struct gomp_thread_pool *pool)
     gomp_tls_rtems_data.thread_pool_reservoir;
   if (res != NULL)
     {
-      gomp_mutex_lock (&res->lock);
+      pthread_spin_lock (&res->lock);
       res->pools[res->index++] = pool;
-      gomp_mutex_unlock (&res->lock);
+      pthread_spin_unlock (&res->lock);
       gomp_sem_post (&res->available);
     }
 }
diff --git a/libgomp/config/rtems/proc.c b/libgomp/config/rtems/proc.c
index d4123d2..5e04b47 100644
--- a/libgomp/config/rtems/proc.c
+++ b/libgomp/config/rtems/proc.c
@@ -66,7 +66,7 @@ allocate_thread_pool_reservoir (unsigned long count, unsigned long priority,
   res->index = count;
   res->priority = priority;
   gomp_sem_init (&res->available, count);
-  gomp_mutex_init (&res->lock);
+  pthread_spin_init (&res->lock, PTHREAD_PROCESS_PRIVATE);
   for (i = 0; i < count; ++i)
     res->pools[i] = &pools[i];
   gomp_thread_pool_reservoirs[scheduler] = res;
-- 
cgit v1.1


From 474bbda1675d35ab6f78ffdf20ee091f8402d185 Mon Sep 17 00:00:00 2001
From: James Greenhalgh <james.greenhalgh@arm.com>
Date: Fri, 2 Dec 2016 14:29:35 +0000
Subject: [Patch 1/2 PR78561] Rename get_pool_size to get_pool_size_upper_bound

gcc/

	PR rtl-optimization/78561
	* config/rs6000/rs6000.c (rs6000_reg_live_or_pic_offset_p) Rename
	get_pool_size to get_pool_size_upper_bound.
	(rs6000_stack_info): Likewise.
	(rs6000_emit_prologue): Likewise.
	(rs6000_elf_declare_function_name): Likewise.
	(rs6000_set_up_by_prologue): Likewise.
	(rs6000_can_eliminate): Likewise, reformat spaces to tabs.
	* output.h (get_pool_size): Rename to...
	(get_pool_size_upper_bound): ...This.
	* varasm.c (get_pool_size): Rename to...
	(get_pool_size_upper_bound): ...This.

From-SVN: r243182
---
 gcc/ChangeLog              | 15 +++++++++++++++
 gcc/config/rs6000/rs6000.c | 23 +++++++++++++----------
 gcc/output.h               |  7 +++++--
 gcc/varasm.c               |  2 +-
 4 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 85b4bdb..7348684 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2016-12-02  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR rtl-optimization/78561
+	* config/rs6000/rs6000.c (rs6000_reg_live_or_pic_offset_p) Rename
+	get_pool_size to get_pool_size_upper_bound.
+	(rs6000_stack_info): Likewise.
+	(rs6000_emit_prologue): Likewise.
+	(rs6000_elf_declare_function_name): Likewise.
+	(rs6000_set_up_by_prologue): Likewise.
+	(rs6000_can_eliminate): Likewise, reformat spaces to tabs.
+	* output.h (get_pool_size): Rename to...
+	(get_pool_size_upper_bound): ...This.
+	* varasm.c (get_pool_size): Rename to...
+	(get_pool_size_upper_bound): ...This.
+
 2016-12-02  Bin Cheng  <bin.cheng@arm.com>
 
 	* match.pd: Add new pattern:
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index e572620..425a885 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -25460,7 +25460,7 @@ rs6000_reg_live_or_pic_offset_p (int reg)
       if (TARGET_TOC && TARGET_MINIMAL_TOC
 	  && (crtl->calls_eh_return
 	      || df_regs_ever_live_p (reg)
-	      || get_pool_size ()))
+	      || get_pool_size_upper_bound ()))
 	return true;
 
       if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
@@ -26266,7 +26266,7 @@ rs6000_stack_info (void)
 #ifdef TARGET_RELOCATABLE
       || (DEFAULT_ABI == ABI_V4
 	  && (TARGET_RELOCATABLE || flag_pic > 1)
-	  && get_pool_size () != 0)
+	  && get_pool_size_upper_bound () != 0)
 #endif
       || rs6000_ra_ever_killed ())
     info->lr_save_p = 1;
@@ -28044,7 +28044,8 @@ rs6000_emit_prologue (void)
       cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
 
       /* With -mminimal-toc we may generate an extra use of r2 below.  */
-      if (TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+      if (TARGET_TOC && TARGET_MINIMAL_TOC
+	  && get_pool_size_upper_bound () != 0)
 	cfun->machine->r2_setup_needed = true;
     }
 
@@ -28899,7 +28900,8 @@ rs6000_emit_prologue (void)
 
   /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up.  */
   if (!TARGET_SINGLE_PIC_BASE
-      && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
+      && ((TARGET_TOC && TARGET_MINIMAL_TOC
+	   && get_pool_size_upper_bound () != 0)
 	  || (DEFAULT_ABI == ABI_V4
 	      && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
 	      && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
@@ -34966,7 +34968,7 @@ rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
   if (DEFAULT_ABI == ABI_V4
       && (TARGET_RELOCATABLE || flag_pic > 1)
       && !TARGET_SECURE_PLT
-      && (get_pool_size () != 0 || crtl->profile)
+      && (get_pool_size_upper_bound () != 0 || crtl->profile)
       && uses_TOC ())
     {
       char buf[256];
@@ -37449,10 +37451,11 @@ static bool
 rs6000_can_eliminate (const int from, const int to)
 {
   return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
-          ? ! frame_pointer_needed
-          : from == RS6000_PIC_OFFSET_TABLE_REGNUM
-            ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
-            : true);
+	  ? ! frame_pointer_needed
+	  : from == RS6000_PIC_OFFSET_TABLE_REGNUM
+	    ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
+		|| get_pool_size_upper_bound () == 0
+	    : true);
 }
 
 /* Define the offset between two registers, FROM to be eliminated and its
@@ -38988,7 +38991,7 @@ rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
   if (!TARGET_SINGLE_PIC_BASE
       && TARGET_TOC
       && TARGET_MINIMAL_TOC
-      && get_pool_size () != 0)
+      && get_pool_size_upper_bound () != 0)
     add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
   if (cfun->machine->split_stack_argp_used)
     add_to_hard_reg_set (&set->set, Pmode, 12);
diff --git a/gcc/output.h b/gcc/output.h
index 0924499..7186dc1 100644
--- a/gcc/output.h
+++ b/gcc/output.h
@@ -287,8 +287,11 @@ extern void assemble_real (REAL_VALUE_TYPE, machine_mode, unsigned,
 /* Write the address of the entity given by SYMBOL to SEC.  */
 extern void assemble_addr_to_section (rtx, section *);
 
-/* Return the size of the constant pool.  */
-extern int get_pool_size (void);
+/* Return the maximum size of the constant pool.  This may be larger
+   than the final size of the constant pool, as entries may be added to
+   the constant pool which become unreferenced, or otherwise not need
+   output by the time we actually emit the pool.  */
+extern int get_pool_size_upper_bound (void);
 
 extern rtx_insn *peephole (rtx_insn *);
 
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 1e7c2b5..f8af0c1 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -3811,7 +3811,7 @@ get_pool_mode (const_rtx addr)
 /* Return the size of the constant pool.  */
 
 int
-get_pool_size (void)
+get_pool_size_upper_bound (void)
 {
   return crtl->varasm.pool->offset;
 }
-- 
cgit v1.1


From 04c452f40ba95e15a76762e4bb5767d15cf8b322 Mon Sep 17 00:00:00 2001
From: James Greenhalgh <james.greenhalgh@arm.com>
Date: Fri, 2 Dec 2016 14:31:10 +0000
Subject: [Patch 2/2 PR78561] Recalculate constant pool size before emitting it

gcc/

	PR rtl-optimization/78561
	* varasm.c (recompute_pool_offsets): New.
	(output_constant_pool): Call it.

gcc/testsuite/

	PR rtl-optimization/78561
	* gcc.target/aarch64/pr78561.c: New.

From-SVN: r243183
---
 gcc/ChangeLog           |  6 ++++++
 gcc/testsuite/ChangeLog |  5 +++++
 gcc/varasm.c            | 28 ++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7348684..92501fc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,12 @@
 2016-12-02  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	PR rtl-optimization/78561
+	* varasm.c (recompute_pool_offsets): New.
+	(output_constant_pool): Call it.
+
+2016-12-02  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR rtl-optimization/78561
 	* config/rs6000/rs6000.c (rs6000_reg_live_or_pic_offset_p) Rename
 	get_pool_size to get_pool_size_upper_bound.
 	(rs6000_stack_info): Likewise.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 143687d..9970806 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR rtl-optimization/78561
+	* gcc.target/aarch64/pr78561.c: New.
+
 2016-12-02  Bin Cheng  <bin.cheng@arm.com>
 
 	* gcc.dg/fold-bopcond-1.c: New test.
diff --git a/gcc/varasm.c b/gcc/varasm.c
index f8af0c1..f3cd70a 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -3942,6 +3942,29 @@ output_constant_pool_1 (struct constant_descriptor_rtx *desc,
   return;
 }
 
+/* Recompute the offsets of entries in POOL, and the overall size of
+   POOL.  Do this after calling mark_constant_pool to ensure that we
+   are computing the offset values for the pool which we will actually
+   emit.  */
+
+static void
+recompute_pool_offsets (struct rtx_constant_pool *pool)
+{
+  struct constant_descriptor_rtx *desc;
+  pool->offset = 0;
+
+  for (desc = pool->first; desc ; desc = desc->next)
+    if (desc->mark)
+      {
+	  /* Recalculate offset.  */
+	unsigned int align = desc->align;
+	pool->offset += (align / BITS_PER_UNIT) - 1;
+	pool->offset &= ~ ((align / BITS_PER_UNIT) - 1);
+	desc->offset = pool->offset;
+	pool->offset += GET_MODE_SIZE (desc->mode);
+      }
+}
+
 /* Mark all constants that are referenced by SYMBOL_REFs in X.
    Emit referenced deferred strings.  */
 
@@ -4060,6 +4083,11 @@ output_constant_pool (const char *fnname ATTRIBUTE_UNUSED,
      case we do not need to output the constant.  */
   mark_constant_pool ();
 
+  /* Having marked the constant pool entries we'll actually emit, we
+     now need to rebuild the offset information, which may have become
+     stale.  */
+  recompute_pool_offsets (pool);
+
 #ifdef ASM_OUTPUT_POOL_PROLOGUE
   ASM_OUTPUT_POOL_PROLOGUE (asm_out_file, fnname, fndecl, pool->offset);
 #endif
-- 
cgit v1.1


From 69a71a6d071a5eae8a06282351e53d8c383aba9a Mon Sep 17 00:00:00 2001
From: Martin Jambor <mjambor@suse.cz>
Date: Fri, 2 Dec 2016 15:42:15 +0100
Subject: [hsa] Exclude parallel outlines from hsa_callable_functions_p

2016-12-09  Martin Jambor  <mjambor@suse.cz>

	* hsa.c (hsa_callable_function_p): Return false for artificial
	functions.

From-SVN: r243184
---
 gcc/ChangeLog | 5 +++++
 gcc/hsa.c     | 5 ++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 92501fc..0880c84 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-09  Martin Jambor  <mjambor@suse.cz>
+
+	* hsa.c (hsa_callable_function_p): Return false for artificial
+	functions.
+
 2016-12-02  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	PR rtl-optimization/78561
diff --git a/gcc/hsa.c b/gcc/hsa.c
index f881e78..31e3252 100644
--- a/gcc/hsa.c
+++ b/gcc/hsa.c
@@ -90,7 +90,10 @@ bool
 hsa_callable_function_p (tree fndecl)
 {
   return (lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl))
-	  && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl)));
+	  && !lookup_attribute ("oacc function", DECL_ATTRIBUTES (fndecl))
+	  /* At this point, this is enough to identify clones for
+	     parallel, which for HSA would need to be kernels anyway.  */
+	  && !DECL_ARTIFICIAL (fndecl));
 }
 
 /* Allocate HSA structures that are are used when dealing with different
-- 
cgit v1.1


From c5af52eb8cc208890ccb3bdce75b35bf5bbaa8bf Mon Sep 17 00:00:00 2001
From: Cesar Philippidis <cesar@codesourcery.com>
Date: Fri, 2 Dec 2016 06:54:39 -0800
Subject: c-parser.c (c_parser_pragma): Error when PRAGMA_OACC_{ENTER_DATA...

	gcc/c/
	* c-parser.c (c_parser_pragma): Error when PRAGMA_OACC_{ENTER_DATA,
	EXIT_DATA,WAIT} are not used in compound statements.
	(c_parser_oacc_enter_exit_data): Update diagnostics.

	gcc/cp/
	* parser.c (cp_parser_oacc_enter_exit_data): Update diagnostics.
	(cp_parser_pragma): Error when PRAGMA_OACC_{ENTER_DATA,
	EXIT_DATA,WAIT} are not used in compound statements.

	gcc/testsuite/
	* c-c++-common/goacc/data-2.c: Adjust test.
	* c-c++-common/goacc/executeables-1.c: New test.
	* g++.dg/goacc/data-1.C: Adjust test.


Co-Authored-By: James Norris <jnorris@codesourcery.com>

From-SVN: r243185
---
 gcc/c/ChangeLog                                   |  7 +++
 gcc/c/c-parser.c                                  | 42 ++++++++++---
 gcc/cp/ChangeLog                                  |  7 +++
 gcc/cp/parser.c                                   | 75 +++++++++++++++++------
 gcc/testsuite/ChangeLog                           |  7 +++
 gcc/testsuite/c-c++-common/goacc/data-2.c         | 12 ++--
 gcc/testsuite/c-c++-common/goacc/executeables-1.c | 74 ++++++++++++++++++++++
 gcc/testsuite/g++.dg/goacc/data-1.C               | 16 ++---
 8 files changed, 199 insertions(+), 41 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/goacc/executeables-1.c

diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog
index dec9c9d..f3626e2 100644
--- a/gcc/c/ChangeLog
+++ b/gcc/c/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-02  Cesar Philippidis  <cesar@codesourcery.com>
+	    James Norris  <jnorris@codesourcery.com>
+
+	* c-parser.c (c_parser_pragma): Error when PRAGMA_OACC_{ENTER_DATA,
+	EXIT_DATA,WAIT} are not used in compound statements.
+	(c_parser_oacc_enter_exit_data): Update diagnostics.
+
 2016-11-21  Bernd Edlinger  <bernd.edlinger@hotmail.de>
 
 	PR c++/71973
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index 00fe731..f7bf9c4 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -10145,10 +10145,24 @@ c_parser_pragma (c_parser *parser, enum pragma_context context, bool *if_p)
       return false;
 
     case PRAGMA_OACC_ENTER_DATA:
+      if (context != pragma_compound)
+	{
+	  if (context == pragma_stmt)
+	    c_parser_error (parser, "%<#pragma acc enter data%> may only be "
+			    "used in compound statements");
+	  goto bad_stmt;
+	}
       c_parser_oacc_enter_exit_data (parser, true);
       return false;
 
     case PRAGMA_OACC_EXIT_DATA:
+      if (context != pragma_compound)
+	{
+	  if (context == pragma_stmt)
+	    c_parser_error (parser, "%<#pragma acc exit data%> may only be "
+			    "used in compound statements");
+	  goto bad_stmt;
+	}
       c_parser_oacc_enter_exit_data (parser, false);
       return false;
 
@@ -10305,6 +10319,16 @@ c_parser_pragma (c_parser *parser, enum pragma_context context, bool *if_p)
       c_parser_cilk_grainsize (parser, if_p);
       return false;
 
+    case PRAGMA_OACC_WAIT:
+      if (context != pragma_compound)
+	{
+	  if (context == pragma_stmt)
+	    c_parser_error (parser, "%<#pragma acc enter data%> may only be "
+			    "used in compound statements");
+	  goto bad_stmt;
+	}
+	/* FALL THROUGH.  */
+
     default:
       if (id < PRAGMA_FIRST_EXTERNAL)
 	{
@@ -13871,28 +13895,26 @@ c_parser_oacc_enter_exit_data (c_parser *parser, bool enter)
 {
   location_t loc = c_parser_peek_token (parser)->location;
   tree clauses, stmt;
+  const char *p = "";
 
   c_parser_consume_pragma (parser);
 
-  if (!c_parser_next_token_is (parser, CPP_NAME))
+  if (c_parser_next_token_is (parser, CPP_NAME))
     {
-      c_parser_error (parser, enter
-		      ? "expected %<data%> in %<#pragma acc enter data%>"
-		      : "expected %<data%> in %<#pragma acc exit data%>");
-      c_parser_skip_to_pragma_eol (parser);
-      return;
+      p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value);
+      c_parser_consume_token (parser);
     }
 
-  const char *p = IDENTIFIER_POINTER (c_parser_peek_token (parser)->value);
   if (strcmp (p, "data") != 0)
     {
-      c_parser_error (parser, "invalid pragma");
+      error_at (loc, enter
+		? "expected %<data%> after %<#pragma acc enter%>"
+		: "expected %<data%> after %<#pragma acc exit%>");
+      parser->error = true;
       c_parser_skip_to_pragma_eol (parser);
       return;
     }
 
-  c_parser_consume_token (parser);
-
   if (enter)
     clauses = c_parser_oacc_all_clauses (parser, OACC_ENTER_DATA_CLAUSE_MASK,
 					 "#pragma acc enter data");
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 4977ff2..d39c222 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-02  Cesar Philippidis  <cesar@codesourcery.com>
+	    James Norris  <jnorris@codesourcery.com>
+
+	* parser.c (cp_parser_oacc_enter_exit_data): Update diagnostics.
+	(cp_parser_pragma): Error when PRAGMA_OACC_{ENTER_DATA,
+	EXIT_DATA,WAIT} are not used in compound statements.
+
 2016-12-01  Jason Merrill  <jason@redhat.com>
 
 	* call.c (add_function_candidate): Also exclude inherited ctors
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 843cbe2..08f5f9e 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -36175,23 +36175,18 @@ static tree
 cp_parser_oacc_enter_exit_data (cp_parser *parser, cp_token *pragma_tok,
 				bool enter)
 {
+  location_t loc = pragma_tok->location;
   tree stmt, clauses;
+  const char *p = "";
 
-  if (cp_lexer_next_token_is (parser->lexer, CPP_PRAGMA_EOL)
-     || cp_lexer_next_token_is_not (parser->lexer, CPP_NAME))
-    {
-      cp_parser_error (parser, enter
-		       ? "expected %<data%> in %<#pragma acc enter data%>"
-		       : "expected %<data%> in %<#pragma acc exit data%>");
-      cp_parser_skip_to_pragma_eol (parser, pragma_tok);
-      return NULL_TREE;
-    }
+  if (cp_lexer_next_token_is (parser->lexer, CPP_NAME))
+    p = IDENTIFIER_POINTER (cp_lexer_peek_token (parser->lexer)->u.value);
 
-  const char *p =
-    IDENTIFIER_POINTER (cp_lexer_peek_token (parser->lexer)->u.value);
   if (strcmp (p, "data") != 0)
     {
-      cp_parser_error (parser, "invalid pragma");
+      error_at (loc, enter
+		? "expected %<data%> after %<#pragma acc enter%>"
+		: "expected %<data%> after %<#pragma acc exit%>");
       cp_parser_skip_to_pragma_eol (parser, pragma_tok);
       return NULL_TREE;
     }
@@ -36207,8 +36202,8 @@ cp_parser_oacc_enter_exit_data (cp_parser *parser, cp_token *pragma_tok,
 
   if (find_omp_clause (clauses, OMP_CLAUSE_MAP) == NULL_TREE)
     {
-      error_at (pragma_tok->location,
-		"%<#pragma acc enter data%> has no data movement clause");
+      error_at (loc, "%<#pragma acc %s data%> has no data movement clause",
+		enter ? "enter" : "exit");
       return NULL_TREE;
     }
 
@@ -38083,6 +38078,30 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context, bool *if_p)
       cp_parser_oacc_declare (parser, pragma_tok);
       return false;
 
+    case PRAGMA_OACC_ENTER_DATA:
+      if (context == pragma_stmt)
+	{
+	  cp_parser_error (parser, "%<#pragma acc enter data%> may only be "
+			   "used in compound statements");
+	  break;
+	}
+      else if (context != pragma_compound)
+	goto bad_stmt;
+      cp_parser_omp_construct (parser, pragma_tok, if_p);
+      return true;
+
+    case PRAGMA_OACC_EXIT_DATA:
+      if (context == pragma_stmt)
+	{
+	  cp_parser_error (parser, "%<#pragma acc exit data%> may only be "
+			   "used in compound statements");
+	  break;
+	}
+      else if (context != pragma_compound)
+	goto bad_stmt;
+      cp_parser_omp_construct (parser, pragma_tok, if_p);
+      return true;
+
     case PRAGMA_OACC_ROUTINE:
       if (context != pragma_external)
 	{
@@ -38093,17 +38112,37 @@ cp_parser_pragma (cp_parser *parser, enum pragma_context context, bool *if_p)
       cp_parser_oacc_routine (parser, pragma_tok, context);
       return false;
 
+    case PRAGMA_OACC_UPDATE:
+      if (context == pragma_stmt)
+	{
+	  cp_parser_error (parser, "%<#pragma acc update%> may only be "
+			   "used in compound statements");
+	  break;
+	}
+      else if (context != pragma_compound)
+	goto bad_stmt;
+      cp_parser_omp_construct (parser, pragma_tok, if_p);
+      return true;
+
+    case PRAGMA_OACC_WAIT:
+      if (context == pragma_stmt)
+	{
+	  cp_parser_error (parser, "%<#pragma acc wait%> may only be "
+			   "used in compound statements");
+	  break;
+	}
+      else if (context != pragma_compound)
+	goto bad_stmt;
+      cp_parser_omp_construct (parser, pragma_tok, if_p);
+      return true;
+
     case PRAGMA_OACC_ATOMIC:
     case PRAGMA_OACC_CACHE:
     case PRAGMA_OACC_DATA:
-    case PRAGMA_OACC_ENTER_DATA:
-    case PRAGMA_OACC_EXIT_DATA:
     case PRAGMA_OACC_HOST_DATA:
     case PRAGMA_OACC_KERNELS:
     case PRAGMA_OACC_PARALLEL:
     case PRAGMA_OACC_LOOP:
-    case PRAGMA_OACC_UPDATE:
-    case PRAGMA_OACC_WAIT:
     case PRAGMA_OMP_ATOMIC:
     case PRAGMA_OMP_CRITICAL:
     case PRAGMA_OMP_DISTRIBUTE:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9970806..f1b5c35 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-02  Cesar Philippidis  <cesar@codesourcery.com>
+	    James Norris  <jnorris@codesourcery.com>
+
+	* c-c++-common/goacc/data-2.c: Adjust test.
+	* c-c++-common/goacc/executeables-1.c: New test.
+	* g++.dg/goacc/data-1.C: Adjust test.
+
 2016-12-02  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	PR rtl-optimization/78561
diff --git a/gcc/testsuite/c-c++-common/goacc/data-2.c b/gcc/testsuite/c-c++-common/goacc/data-2.c
index a67d8a4..1043bf8a 100644
--- a/gcc/testsuite/c-c++-common/goacc/data-2.c
+++ b/gcc/testsuite/c-c++-common/goacc/data-2.c
@@ -10,12 +10,14 @@ foo (void)
 #pragma acc exit data delete (a) if (0)
 #pragma acc exit data copyout (b) if (a)
 #pragma acc exit data delete (b)
-#pragma acc enter /* { dg-error "expected 'data' in" } */
-#pragma acc exit /* { dg-error "expected 'data' in" } */
+#pragma acc enter /* { dg-error "expected 'data' after" } */
+#pragma acc exit /* { dg-error "expected 'data' after" } */
 #pragma acc enter data /* { dg-error "has no data movement clause" } */
-#pragma acc exit data /* { dg-error "has no data movement clause" } */
-#pragma acc enter Data /* { dg-error "invalid pragma before" } */
-#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
+#pragma acc exit data /* { dg-error "no data movement clause" } */
+#pragma acc enter Data /* { dg-error "expected 'data' after" } */
+#pragma acc exit copyout (b) /* { dg-error "expected 'data' after" } */
+#pragma acc enter for /* { dg-error "expected 'data' after" } */
+#pragma acc enter data2 /* { dg-error "expected 'data' after" } */
 }
 
 /* { dg-error "has no data movement clause" "" { target *-*-* } 8 } */
diff --git a/gcc/testsuite/c-c++-common/goacc/executeables-1.c b/gcc/testsuite/c-c++-common/goacc/executeables-1.c
new file mode 100644
index 0000000..da89437
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/goacc/executeables-1.c
@@ -0,0 +1,74 @@
+/* { dg-do compile } */
+
+void
+foo (void)
+{
+  const int N = 32;
+  float x[N], y[N];
+  int flag = 0;
+
+  if (flag)
+#pragma acc update host (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+  flag = 1;
+
+  while (flag)
+#pragma acc update host (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+  flag = 2;
+
+#pragma acc enter data create (x[0:N])
+  {
+    if (flag)
+#pragma acc update host (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+    flag = 1;
+  }
+
+  if (flag)
+  while (flag)
+#pragma acc update host (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+  flag = 2;
+
+  if (flag)
+#pragma acc wait /* { dg-error "may only be used in compound statements" } */
+  flag = 1;
+
+  while (flag)
+#pragma acc wait /* { dg-error "may only be used in compound statements" } */
+  flag = 2;
+
+#pragma acc enter data create (x[0:N])
+  {
+    if (flag)
+#pragma acc wait /* { dg-error "may only be used in compound statements" } */
+    flag = 1;
+  }
+
+  if (flag)
+#pragma acc enter data create (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+  flag = 1;
+
+  while (flag)
+#pragma acc enter data create (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+  flag = 2;
+
+#pragma acc enter data create (x[0:N])
+  {
+    if (flag)
+#pragma acc enter data create (y[0:N]) /* { dg-error "may only be used in compound statements" } */
+    flag = 1;
+  }
+
+  if (flag)
+#pragma acc exit data delete (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+  flag = 1;
+
+  while (flag)
+#pragma acc exit data delete (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+  flag = 2;
+
+#pragma acc enter data create (x[0:N])
+  {
+    if (flag)
+#pragma acc exit data delete (x[0:N]) /* { dg-error "may only be used in compound statements" } */
+    flag = 1;
+  }
+}
diff --git a/gcc/testsuite/g++.dg/goacc/data-1.C b/gcc/testsuite/g++.dg/goacc/data-1.C
index 54676dc..2b210dc 100644
--- a/gcc/testsuite/g++.dg/goacc/data-1.C
+++ b/gcc/testsuite/g++.dg/goacc/data-1.C
@@ -8,12 +8,12 @@ foo (int &a, int (&b)[100], int &n)
 #pragma acc exit data delete (a) if (0)
 #pragma acc exit data copyout (b) if (a)
 #pragma acc exit data delete (b)
-#pragma acc enter /* { dg-error "expected 'data' in" } */
-#pragma acc exit /* { dg-error "expected 'data' in" } */
+#pragma acc enter /* { dg-error "expected 'data' after" } */
+#pragma acc exit /* { dg-error "expected 'data' after" } */
 #pragma acc enter data /* { dg-error "has no data movement clause" } */
 #pragma acc exit data /* { dg-error "has no data movement clause" } */
-#pragma acc enter Data /* { dg-error "invalid pragma before" } */
-#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
+#pragma acc enter Data /* { dg-error "expected 'data' after" } */
+#pragma acc exit copyout (b) /* { dg-error "expected 'data' after" } */
 }
 
 template<typename T>
@@ -27,12 +27,12 @@ foo (T &a, T (&b)[100], T &n)
 #pragma acc exit data delete (a) if (0)
 #pragma acc exit data copyout (b) if (a)
 #pragma acc exit data delete (b)
-#pragma acc enter /* { dg-error "expected 'data' in" } */
-#pragma acc exit /* { dg-error "expected 'data' in" } */
+#pragma acc enter /* { dg-error "expected 'data' after" } */
+#pragma acc exit /* { dg-error "expected 'data' after" } */
 #pragma acc enter data /* { dg-error "has no data movement clause" } */
 #pragma acc exit data /* { dg-error "has no data movement clause" } */
-#pragma acc enter Data /* { dg-error "invalid pragma before" } */
-#pragma acc exit copyout (b) /* { dg-error "invalid pragma before" } */
+#pragma acc enter Data /* { dg-error "expected 'data' after" } */
+#pragma acc exit copyout (b) /* { dg-error "expected 'data' after" } */
 }
 
 /* { dg-error "has no data movement clause" "" { target *-*-* } 6 } */
-- 
cgit v1.1


From f1bca06f624245fde8a485deb2a589ba5d752537 Mon Sep 17 00:00:00 2001
From: Georg-Johann Lay <gjl@gcc.gnu.org>
Date: Fri, 2 Dec 2016 15:08:27 +0000
Subject: avr.c: Fix coding rule glitches.

	* config/avr/avr.c: Fix coding rule glitches.

From-SVN: r243186
---
 gcc/ChangeLog        |   6 +-
 gcc/config/avr/avr.c | 361 +++++++++++++++++++++++++--------------------------
 2 files changed, 184 insertions(+), 183 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0880c84..5849b0f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,8 @@
-2016-12-09  Martin Jambor  <mjambor@suse.cz>
+2016-12-02  Georg-Johann Lay  <avr@gjlay.de>
+
+	* config/avr/avr.c: Fix coding rule glitches.
+
+2016-12-02  Martin Jambor  <mjambor@suse.cz>
 
 	* hsa.c (hsa_callable_function_p): Return false for artificial
 	functions.
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 48f4788..8ceeff4 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -747,8 +747,8 @@ avr_option_override (void)
      introduces additional code in LIM and increases reg pressure.  */
 
   maybe_set_param_value (PARAM_ALLOW_STORE_DATA_RACES, 1,
-      global_options.x_param_values,
-      global_options_set.x_param_values);
+                         global_options.x_param_values,
+                         global_options_set.x_param_values);
 
   /* Unwind tables currently require a frame pointer for correctness,
      see toplev.c:process_options().  */
@@ -1034,7 +1034,7 @@ avr_set_current_function (tree decl)
   if (cfun->machine->is_OS_task + cfun->machine->is_OS_main
       + (cfun->machine->is_signal || cfun->machine->is_interrupt) > 1)
     error_at (loc, "function attributes %qs, %qs and %qs are mutually"
-               " exclusive", "OS_task", "OS_main", isr);
+              " exclusive", "OS_task", "OS_main", isr);
 
   /* 'naked' will hide effects of 'OS_task' and 'OS_main'.  */
 
@@ -1299,7 +1299,7 @@ avr_return_addr_rtx (int count, rtx tem)
 
   /* Can only return this function's return address. Others not supported.  */
   if (count)
-     return NULL;
+    return NULL;
 
   if (AVR_3_BYTE_PC)
     {
@@ -1313,7 +1313,7 @@ avr_return_addr_rtx (int count, rtx tem)
   r = gen_rtx_PLUS (Pmode, tem, r);
   r = gen_frame_mem (Pmode, memory_address (Pmode, r));
   r = gen_rtx_ROTATE (HImode, r, GEN_INT (8));
-  return  r;
+  return r;
 }
 
 /* Return 1 if the function epilogue is just a single "ret".  */
@@ -2093,7 +2093,6 @@ avr_asm_function_begin_epilogue (FILE *file)
 static bool
 avr_cannot_modify_jumps_p (void)
 {
-
   /* Naked Functions must not have any instructions after
      their epilogue, see PR42240 */
 
@@ -2698,7 +2697,7 @@ avr_print_operand (FILE *file, rtx x, int code)
       else if (code == 'b')
         {
           if (GET_CODE (addr) != PLUS)
-               fatal_insn ("bad address, not (reg+disp):", addr);
+            fatal_insn ("bad address, not (reg+disp):", addr);
 
           avr_print_operand_address (file, VOIDmode, XEXP (addr, 0));
         }
@@ -2708,7 +2707,7 @@ avr_print_operand (FILE *file, rtx x, int code)
             fatal_insn ("bad address, not post_inc or pre_dec:", addr);
 
           if (code == 'p')
-	    /* X, Y, Z */
+            /* X, Y, Z */
             avr_print_operand_address (file, VOIDmode, XEXP (addr, 0));
           else
             avr_print_operand (file, XEXP (addr, 0), 0);  /* r26, r28, r30 */
@@ -3723,8 +3722,6 @@ output_movhi (rtx_insn *insn, rtx xop[], int *plen)
       return avr_out_lpm (insn, xop, plen);
     }
 
-  gcc_assert (2 == GET_MODE_SIZE (GET_MODE (dest)));
-
   if (REG_P (dest))
     {
       if (REG_P (src)) /* mov r,r */
@@ -3825,8 +3822,8 @@ out_movqi_r_mr (rtx_insn *insn, rtx op[], int *plen)
     }
 
   if (GET_CODE (x) == PLUS
-           && REG_P (XEXP (x, 0))
-           && CONST_INT_P (XEXP (x, 1)))
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1)))
     {
       /* memory access by reg+disp */
 
@@ -4016,7 +4013,7 @@ out_movhi_r_mr (rtx_insn *insn, rtx op[], int *plen)
                            "ldd %B0,Y+63"    CR_TAB
                            "sbiw r28,%o1-62", op, plen, -4)
 
-              : avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
+            : avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
                            "sbci r29,hi8(-%o1)" CR_TAB
                            "ld %A0,Y"           CR_TAB
                            "ldd %B0,Y+1"        CR_TAB
@@ -4385,7 +4382,7 @@ avr_out_movsi_mr_r_reg_no_disp_tiny (rtx_insn *insn, rtx op[], int *l)
 
   if (reg_base == reg_src)
     {
-	  /* "ld r26,-X" is undefined */
+      /* "ld r26,-X" is undefined */
       if (reg_unused_after (insn, base))
         {
           return *l = 7, ("mov __tmp_reg__, %B1"  CR_TAB
@@ -4672,6 +4669,7 @@ output_movsisf (rtx_insn *insn, rtx operands[], int *l)
     l = &dummy;
 
   gcc_assert (4 == GET_MODE_SIZE (GET_MODE (dest)));
+
   if (REG_P (dest))
     {
       if (REG_P (src)) /* mov r,r */
@@ -4717,7 +4715,7 @@ output_movsisf (rtx_insn *insn, rtx operands[], int *l)
       const char *templ;
 
       if (src == CONST0_RTX (GET_MODE (dest)))
-	  operands[1] = zero_reg_rtx;
+        operands[1] = zero_reg_rtx;
 
       templ = out_movsi_mr_r (insn, operands, real_l);
 
@@ -4785,7 +4783,7 @@ avr_out_load_psi_reg_disp_tiny (rtx_insn *insn, rtx *op, int *plen)
                           TINY_SBIW (%I1, %J1, 1)     CR_TAB
                           "ld %A0,%b1"                CR_TAB
                           "mov %B0,__tmp_reg__", op, plen, -8);
-   }
+    }
   else
     {
       avr_asm_len (TINY_ADIW (%I1, %J1, %o1)   CR_TAB
@@ -4914,9 +4912,9 @@ avr_out_load_psi (rtx_insn *insn, rtx *op, int *plen)
                             "ldd %A0,%A1" CR_TAB
                             "mov %B0,__tmp_reg__", op, plen, -4);
 
-        return avr_asm_len ("ldd %A0,%A1" CR_TAB
-                            "ldd %B0,%B1" CR_TAB
-                            "ldd %C0,%C1", op, plen, -3);
+      return avr_asm_len ("ldd %A0,%A1" CR_TAB
+                          "ldd %B0,%B1" CR_TAB
+                          "ldd %C0,%C1", op, plen, -3);
     }
   else if (GET_CODE (base) == PRE_DEC) /* (--R) */
     return avr_asm_len ("ld %C0,%1" CR_TAB
@@ -5191,14 +5189,14 @@ avr_out_movqi_mr_r_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
                    TINY_ADIW (%I0, %J0, %o0) CR_TAB
                    "st %b0,__tmp_reg__", op, plen, -4);
     }
-    else
+  else
     {
       avr_asm_len (TINY_ADIW (%I0, %J0, %o0) CR_TAB
                    "st %b0,%1", op, plen, -3);
     }
 
   if (!reg_unused_after (insn, XEXP (x, 0)))
-      avr_asm_len (TINY_SBIW (%I0, %J0, %o0), op, plen, 2);
+    avr_asm_len (TINY_SBIW (%I0, %J0, %o0), op, plen, 2);
 
   return "";
 }
@@ -5410,11 +5408,11 @@ avr_out_movhi_mr_r_reg_no_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
     }
 
   return !mem_volatile_p && reg_unused_after (insn, base)
-      ? avr_asm_len ("st %0+,%A1" CR_TAB
-                     "st %0,%B1", op, plen, -2)
-      : avr_asm_len (TINY_ADIW (%E0, %F0, 1) CR_TAB
-                     "st %0,%B1"             CR_TAB
-                     "st -%0,%A1", op, plen, -4);
+    ? avr_asm_len ("st %0+,%A1" CR_TAB
+                   "st %0,%B1", op, plen, -2)
+    : avr_asm_len (TINY_ADIW (%E0, %F0, 1) CR_TAB
+                   "st %0,%B1"             CR_TAB
+                   "st -%0,%A1", op, plen, -4);
 }
 
 static const char*
@@ -5797,8 +5795,8 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
               && reg_unused_after (insn, xreg))
             {
               return AVR_TINY
-                  ? avr_asm_len (TINY_ADIW (%A0, %B0, %n1), xop, plen, 2)
-                  : avr_asm_len ("adiw %0,%n1", xop, plen, 1);
+                ? avr_asm_len (TINY_ADIW (%A0, %B0, %n1), xop, plen, 2)
+                : avr_asm_len ("adiw %0,%n1", xop, plen, 1);
             }
         }
 
@@ -5973,7 +5971,7 @@ out_shift_with_cnt (const char *templ, rtx_insn *insn, rtx operands[],
       int max_len = 10;  /* If larger than this, always use a loop.  */
 
       if (count <= 0)
-          return;
+        return;
 
       if (count < 8 && !scratch)
         use_zero_reg = true;
@@ -6044,7 +6042,7 @@ out_shift_with_cnt (const char *templ, rtx_insn *insn, rtx operands[],
     fatal_insn ("bad shift insn:", insn);
 
   if (second_label)
-      avr_asm_len ("rjmp 2f", op, plen, 1);
+    avr_asm_len ("rjmp 2f", op, plen, 1);
 
   avr_asm_len ("1:", op, plen, 0);
   avr_asm_len (templ, op, plen, t_len);
@@ -8774,9 +8772,9 @@ avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
 	      xop[3] = all_regs_rtx[dest.regno_msb];
 	      avr_asm_len ("ldi %3,127", xop, plen, 1);
 	      avr_asm_len ((have_carry && lsb_in_tmp_reg ? "adc __tmp_reg__,%3"
-			   : have_carry ? "adc %2,%3"
-			   : lsb_in_tmp_reg ? "add __tmp_reg__,%3"
-			   : "add %2,%3"),
+			    : have_carry ? "adc %2,%3"
+			    : lsb_in_tmp_reg ? "add __tmp_reg__,%3"
+			    : "add %2,%3"),
 			   xop, plen, 1);
 	    }
 	  else
@@ -8860,7 +8858,7 @@ avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
                      "lsl __tmp_reg__", &all_regs_rtx[s0], plen, 2);
 
       sign_in_carry = true;
-  }
+    }
 
   gcc_assert (sign_in_carry + msb_in_carry + lsb_in_carry <= 1);
 
@@ -8979,150 +8977,150 @@ avr_out_round (rtx_insn *insn ATTRIBUTE_UNUSED, rtx *xop, int *plen)
 
 
 /* Create RTL split patterns for byte sized rotate expressions.  This
-  produces a series of move instructions and considers overlap situations.
-  Overlapping non-HImode operands need a scratch register.  */
+   produces a series of move instructions and considers overlap situations.
+   Overlapping non-HImode operands need a scratch register.  */
 
 bool
 avr_rotate_bytes (rtx operands[])
 {
-    machine_mode mode = GET_MODE (operands[0]);
-    bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
-    bool same_reg = rtx_equal_p (operands[0], operands[1]);
-    int num = INTVAL (operands[2]);
-    rtx scratch = operands[3];
-    /* Work out if byte or word move is needed.  Odd byte rotates need QImode.
-       Word move if no scratch is needed, otherwise use size of scratch.  */
-    machine_mode move_mode = QImode;
-    int move_size, offset, size;
-
-    if (num & 0xf)
-      move_mode = QImode;
-    else if ((mode == SImode && !same_reg) || !overlapped)
-      move_mode = HImode;
-    else
-      move_mode = GET_MODE (scratch);
-
-    /* Force DI rotate to use QI moves since other DI moves are currently split
-       into QI moves so forward propagation works better.  */
-    if (mode == DImode)
-      move_mode = QImode;
-    /* Make scratch smaller if needed.  */
-    if (SCRATCH != GET_CODE (scratch)
-        && HImode == GET_MODE (scratch)
-        && QImode == move_mode)
-      scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0);
-
-    move_size = GET_MODE_SIZE (move_mode);
-    /* Number of bytes/words to rotate.  */
-    offset = (num  >> 3) / move_size;
-    /* Number of moves needed.  */
-    size = GET_MODE_SIZE (mode) / move_size;
-    /* Himode byte swap is special case to avoid a scratch register.  */
-    if (mode == HImode && same_reg)
-      {
-	/* HImode byte swap, using xor.  This is as quick as using scratch.  */
-	rtx src, dst;
-	src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
-	dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
-	if (!rtx_equal_p (dst, src))
-	  {
-	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
-	     emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
-	     emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
-	  }
-      }
-    else
-      {
+  machine_mode mode = GET_MODE (operands[0]);
+  bool overlapped = reg_overlap_mentioned_p (operands[0], operands[1]);
+  bool same_reg = rtx_equal_p (operands[0], operands[1]);
+  int num = INTVAL (operands[2]);
+  rtx scratch = operands[3];
+  /* Work out if byte or word move is needed.  Odd byte rotates need QImode.
+     Word move if no scratch is needed, otherwise use size of scratch.  */
+  machine_mode move_mode = QImode;
+  int move_size, offset, size;
+
+  if (num & 0xf)
+    move_mode = QImode;
+  else if ((mode == SImode && !same_reg) || !overlapped)
+    move_mode = HImode;
+  else
+    move_mode = GET_MODE (scratch);
+
+  /* Force DI rotate to use QI moves since other DI moves are currently split
+     into QI moves so forward propagation works better.  */
+  if (mode == DImode)
+    move_mode = QImode;
+  /* Make scratch smaller if needed.  */
+  if (SCRATCH != GET_CODE (scratch)
+      && HImode == GET_MODE (scratch)
+      && QImode == move_mode)
+    scratch = simplify_gen_subreg (move_mode, scratch, HImode, 0);
+
+  move_size = GET_MODE_SIZE (move_mode);
+  /* Number of bytes/words to rotate.  */
+  offset = (num  >> 3) / move_size;
+  /* Number of moves needed.  */
+  size = GET_MODE_SIZE (mode) / move_size;
+  /* Himode byte swap is special case to avoid a scratch register.  */
+  if (mode == HImode && same_reg)
+    {
+      /* HImode byte swap, using xor.  This is as quick as using scratch.  */
+      rtx src, dst;
+      src = simplify_gen_subreg (move_mode, operands[1], mode, 0);
+      dst = simplify_gen_subreg (move_mode, operands[0], mode, 1);
+      if (!rtx_equal_p (dst, src))
+        {
+          emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+          emit_move_insn (src, gen_rtx_XOR (QImode, src, dst));
+          emit_move_insn (dst, gen_rtx_XOR (QImode, dst, src));
+        }
+    }
+  else
+    {
 #define MAX_SIZE 8 /* GET_MODE_SIZE (DImode) / GET_MODE_SIZE (QImode)  */
-	/* Create linked list of moves to determine move order.  */
-	struct {
-	  rtx src, dst;
-	  int links;
-	} move[MAX_SIZE + 8];
-	int blocked, moves;
-
-	gcc_assert (size <= MAX_SIZE);
-	/* Generate list of subreg moves.  */
-	for (int i = 0; i < size; i++)
-          {
-	    int from = i;
-	    int to = (from + offset) % size;
-	    move[i].src = simplify_gen_subreg (move_mode, operands[1],
-                                               mode, from * move_size);
-	    move[i].dst = simplify_gen_subreg (move_mode, operands[0],
-                                               mode, to * move_size);
-            move[i].links = -1;
-          }
-	/* Mark dependence where a dst of one move is the src of another move.
-	   The first move is a conflict as it must wait until second is
-	   performed.  We ignore moves to self - we catch this later.  */
-	if (overlapped)
-	  for (int i = 0; i < size; i++)
-	    if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
-	      for (int j = 0; j < size; j++)
-		if (j != i && rtx_equal_p (move[j].src, move[i].dst))
-		  {
-		    /* The dst of move i is the src of move j.  */
-		    move[i].links = j;
-		    break;
-		  }
-
-	blocked = -1;
-	moves = 0;
-	/* Go through move list and perform non-conflicting moves.  As each
-	   non-overlapping move is made, it may remove other conflicts
-	   so the process is repeated until no conflicts remain.  */
-	do
-	  {
-	    blocked = -1;
-	    moves = 0;
-	    /* Emit move where dst is not also a src or we have used that
-	       src already.  */
-	    for (int i = 0; i < size; i++)
-	      if (move[i].src != NULL_RTX)
-		{
-		  if (move[i].links == -1
-		      || move[move[i].links].src == NULL_RTX)
-		    {
-		      moves++;
-		      /* Ignore NOP moves to self.  */
-		      if (!rtx_equal_p (move[i].dst, move[i].src))
-			emit_move_insn (move[i].dst, move[i].src);
-
-		      /* Remove  conflict from list.  */
-		      move[i].src = NULL_RTX;
-		    }
-		  else
-		    blocked = i;
-		}
+      /* Create linked list of moves to determine move order.  */
+      struct {
+        rtx src, dst;
+        int links;
+      } move[MAX_SIZE + 8];
+      int blocked, moves;
+
+      gcc_assert (size <= MAX_SIZE);
+      /* Generate list of subreg moves.  */
+      for (int i = 0; i < size; i++)
+        {
+          int from = i;
+          int to = (from + offset) % size;
+          move[i].src = simplify_gen_subreg (move_mode, operands[1],
+                                             mode, from * move_size);
+          move[i].dst = simplify_gen_subreg (move_mode, operands[0],
+                                             mode, to * move_size);
+          move[i].links = -1;
+        }
+      /* Mark dependence where a dst of one move is the src of another move.
+         The first move is a conflict as it must wait until second is
+         performed.  We ignore moves to self - we catch this later.  */
+      if (overlapped)
+        for (int i = 0; i < size; i++)
+          if (reg_overlap_mentioned_p (move[i].dst, operands[1]))
+            for (int j = 0; j < size; j++)
+              if (j != i && rtx_equal_p (move[j].src, move[i].dst))
+                {
+                  /* The dst of move i is the src of move j.  */
+                  move[i].links = j;
+                  break;
+                }
 
-	    /* Check for deadlock. This is when no moves occurred and we have
-	       at least one blocked move.  */
-	    if (moves == 0 && blocked != -1)
-	      {
-		/* Need to use scratch register to break deadlock.
-		   Add move to put dst of blocked move into scratch.
-		   When this move occurs, it will break chain deadlock.
-		   The scratch register is substituted for real move.  */
-
-		gcc_assert (SCRATCH != GET_CODE (scratch));
-
-		move[size].src = move[blocked].dst;
-		move[size].dst =  scratch;
-		/* Scratch move is never blocked.  */
-		move[size].links = -1;
-		/* Make sure we have valid link.  */
-		gcc_assert (move[blocked].links != -1);
-		/* Replace src of  blocking move with scratch reg.  */
-		move[move[blocked].links].src = scratch;
-		/* Make dependent on scratch move occurring.  */
-		move[blocked].links = size;
-		size=size+1;
-	      }
-	  }
-	while (blocked != -1);
-      }
-    return true;
+      blocked = -1;
+      moves = 0;
+      /* Go through move list and perform non-conflicting moves.  As each
+         non-overlapping move is made, it may remove other conflicts
+         so the process is repeated until no conflicts remain.  */
+      do
+        {
+          blocked = -1;
+          moves = 0;
+          /* Emit move where dst is not also a src or we have used that
+             src already.  */
+          for (int i = 0; i < size; i++)
+            if (move[i].src != NULL_RTX)
+              {
+                if (move[i].links == -1
+                    || move[move[i].links].src == NULL_RTX)
+                  {
+                    moves++;
+                    /* Ignore NOP moves to self.  */
+                    if (!rtx_equal_p (move[i].dst, move[i].src))
+                      emit_move_insn (move[i].dst, move[i].src);
+
+                    /* Remove  conflict from list.  */
+                    move[i].src = NULL_RTX;
+                  }
+                else
+                  blocked = i;
+              }
+
+          /* Check for deadlock. This is when no moves occurred and we have
+             at least one blocked move.  */
+          if (moves == 0 && blocked != -1)
+            {
+              /* Need to use scratch register to break deadlock.
+                 Add move to put dst of blocked move into scratch.
+                 When this move occurs, it will break chain deadlock.
+                 The scratch register is substituted for real move.  */
+
+              gcc_assert (SCRATCH != GET_CODE (scratch));
+
+              move[size].src = move[blocked].dst;
+              move[size].dst =  scratch;
+              /* Scratch move is never blocked.  */
+              move[size].links = -1;
+              /* Make sure we have valid link.  */
+              gcc_assert (move[blocked].links != -1);
+              /* Replace src of  blocking move with scratch reg.  */
+              move[move[blocked].links].src = scratch;
+              /* Make dependent on scratch move occurring.  */
+              move[blocked].links = size;
+              size=size+1;
+            }
+        }
+      while (blocked != -1);
+    }
+  return true;
 }
 
 
@@ -9900,7 +9898,6 @@ avr_asm_output_aligned_decl_common (FILE * stream,
       && SYMBOL_REF_P ((symbol = XEXP (mem, 0)))
       && (SYMBOL_REF_FLAGS (symbol) & (SYMBOL_FLAG_IO | SYMBOL_FLAG_ADDRESS)))
     {
-
       if (!local_p)
 	{
 	  fprintf (stream, "\t.globl\t");
@@ -10139,7 +10136,7 @@ avr_encode_section_info (tree decl, rtx rtl, int new_decl_p)
       && TREE_CODE (decl) != FUNCTION_DECL
       && MEM_P (rtl)
       && SYMBOL_REF_P (XEXP (rtl, 0)))
-   {
+    {
       rtx sym = XEXP (rtl, 0);
       tree type = TREE_TYPE (decl);
       tree attr = DECL_ATTRIBUTES (decl);
@@ -10345,7 +10342,7 @@ avr_adjust_reg_alloc_order (void)
       17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
       0, 1,
       32, 33, 34, 35
-  };
+    };
   static const int tiny_order_0[] = {
     20, 21,
     22, 23,
@@ -10366,7 +10363,7 @@ avr_adjust_reg_alloc_order (void)
       17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
       0, 1,
       32, 33, 34, 35
-  };
+    };
   static const int tiny_order_1[] = {
     22, 23,
     24, 25,
@@ -10386,7 +10383,7 @@ avr_adjust_reg_alloc_order (void)
       17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2,
       1, 0,
       32, 33, 34, 35
-  };
+    };
 
   /* Select specific register allocation order.
      Tiny Core (ATtiny4/5/9/10/20/40) devices have only 16 registers,
@@ -10397,7 +10394,7 @@ avr_adjust_reg_alloc_order (void)
                       : (AVR_TINY ? tiny_order_0 : order_0));
 
   for (size_t i = 0; i < ARRAY_SIZE (order_0); ++i)
-      reg_alloc_order[i] = order[i];
+    reg_alloc_order[i] = order[i];
 }
 
 
@@ -10767,10 +10764,10 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
                 *total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 5 : 4);
             }
 
-	   if (mode == DImode)
-	     *total *= 2;
+	  if (mode == DImode)
+	    *total *= 2;
 
-	   return true;
+	  return true;
 
 	default:
 	  return false;
@@ -13187,7 +13184,7 @@ avr_expand_delay_cycles (rtx operands0)
       emit_insn (gen_delay_cycles_1 (gen_int_mode (loop_count, QImode),
                                      avr_mem_clobber()));
       cycles -= cycles_used;
-      }
+    }
 
   while (cycles >= 2)
     {
-- 
cgit v1.1


From de7b57234525f55e5edfe8db77ca7ac2a943468f Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Fri, 2 Dec 2016 15:22:43 +0000
Subject: Add support for ARMv8-M's Secure Extensions flag and intrinsics

    gcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	        Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config.gcc (extra_headers): Added arm_cmse.h.
	* config/arm/arm-arches.def (ARM_ARCH):
	(armv8-m): Add FL2_CMSE.
	(armv8-m.main): Likewise.
	(armv8-m.main+dsp): Likewise.
	* config/arm/arm-c.c
	(arm_cpu_builtins): Added __ARM_FEATURE_CMSE macro.
	* config/arm/arm-flags.h: Define FL2_CMSE.
	* config/arm.c (arm_arch_cmse): New.
	(arm_option_override): New error for unsupported cmse target.
	* config/arm/arm.h (arm_arch_cmse): New.
	* config/arm/arm.opt (mcmse): New.
	* config/arm/arm_cmse.h: New file.
	* doc/invoke.texi (ARM Options): Add -mcmse.
	* doc/sourcebuild.texi (arm_cmse_ok): Add new effective target.
	* doc/extend.texi: Add ARMv8-M Security Extensions entry.

    gcc/testsuite/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	        Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* gcc.target/arm/cmse/cmse.exp: New.
	* gcc.target/arm/cmse/cmse-1.c: New.
	* gcc.target/arm/cmse/cmse-12.c: New.
	* lib/target-supports.exp
	(check_effective_target_arm_cmse_ok): New.

    libgcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	    Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/t-arm (HAVE_CMSE): New.
	* config/arm/cmse.c: New.


Co-Authored-By: Thomas Preud'homme <thomas.preudhomme@arm.com>

From-SVN: r243187
---
 gcc/ChangeLog                               |  20 +++
 gcc/config.gcc                              |   2 +-
 gcc/config/arm/arm-arches.def               |   6 +-
 gcc/config/arm/arm-c.c                      |   8 ++
 gcc/config/arm/arm-flags.h                  |   1 +
 gcc/config/arm/arm.c                        |   7 +
 gcc/config/arm/arm.h                        |   3 +
 gcc/config/arm/arm.opt                      |   4 +
 gcc/config/arm/arm_cmse.h                   | 192 ++++++++++++++++++++++++++++
 gcc/doc/extend.texi                         |  26 ++++
 gcc/doc/invoke.texi                         |   8 +-
 gcc/doc/sourcebuild.texi                    |   4 +
 gcc/testsuite/ChangeLog                     |   9 ++
 gcc/testsuite/gcc.target/arm/cmse/cmse-1.c  |  67 ++++++++++
 gcc/testsuite/gcc.target/arm/cmse/cmse-12.c |  14 ++
 gcc/testsuite/gcc.target/arm/cmse/cmse.exp  |  50 ++++++++
 gcc/testsuite/lib/target-supports.exp       |  13 ++
 libgcc/ChangeLog                            |   6 +
 libgcc/config/arm/cmse.c                    | 108 ++++++++++++++++
 libgcc/config/arm/t-arm                     |  12 ++
 20 files changed, 555 insertions(+), 5 deletions(-)
 create mode 100644 gcc/config/arm/arm_cmse.h
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-12.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse.exp
 create mode 100644 libgcc/config/arm/cmse.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5849b0f..f24e99e 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	* config.gcc (extra_headers): Added arm_cmse.h.
+	* config/arm/arm-arches.def (ARM_ARCH):
+	(armv8-m): Add FL2_CMSE.
+	(armv8-m.main): Likewise.
+	(armv8-m.main+dsp): Likewise.
+	* config/arm/arm-c.c
+	(arm_cpu_builtins): Added __ARM_FEATURE_CMSE macro.
+	* config/arm/arm-flags.h: Define FL2_CMSE.
+	* config/arm.c (arm_arch_cmse): New.
+	(arm_option_override): New error for unsupported cmse target.
+	* config/arm/arm.h (arm_arch_cmse): New.
+	* config/arm/arm.opt (mcmse): New.
+	* config/arm/arm_cmse.h: New file.
+	* doc/invoke.texi (ARM Options): Add -mcmse.
+	* doc/sourcebuild.texi (arm_cmse_ok): Add new effective target.
+	* doc/extend.texi: Add ARMv8-M Security Extensions entry.
+
 2016-12-02  Georg-Johann Lay  <avr@gjlay.de>
 
 	* config/avr/avr.c: Fix coding rule glitches.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 98267d8..1fa34ac 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -323,7 +323,7 @@ arc*-*-*)
 arm*-*-*)
 	cpu_type=arm
 	extra_objs="arm-builtins.o aarch-common.o"
-	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h"
+	extra_headers="mmintrin.h arm_neon.h arm_acle.h arm_fp16.h arm_cmse.h"
 	target_type_format_char='%'
 	c_target_objs="arm-c.o"
 	cxx_target_objs="arm-c.o"
diff --git a/gcc/config/arm/arm-arches.def b/gcc/config/arm/arm-arches.def
index cd79bc5..71cabcc 100644
--- a/gcc/config/arm/arm-arches.def
+++ b/gcc/config/arm/arm-arches.def
@@ -70,10 +70,10 @@ ARM_ARCH ("armv8.2-a+fp16", cortexa53,  8A,
 	  ARM_FSET_MAKE (FL_CO_PROC | FL_CRC32 | FL_FOR_ARCH8A,
 			 FL2_FOR_ARCH8_2A | FL2_FP16INST))
 ARM_ARCH("armv8-m.base", cortexm23, 8M_BASE,
-	 ARM_FSET_MAKE_CPU1 (			      FL_FOR_ARCH8M_BASE))
+	  ARM_FSET_MAKE (FL_FOR_ARCH8M_BASE, FL2_CMSE))
 ARM_ARCH("armv8-m.main", cortexm7, 8M_MAIN,
-	 ARM_FSET_MAKE_CPU1(FL_CO_PROC |	      FL_FOR_ARCH8M_MAIN))
+	  ARM_FSET_MAKE (FL_CO_PROC | FL_FOR_ARCH8M_MAIN, FL2_CMSE))
 ARM_ARCH("armv8-m.main+dsp", cortexm33, 8M_MAIN,
-	 ARM_FSET_MAKE_CPU1(FL_CO_PROC | FL_ARCH7EM | FL_FOR_ARCH8M_MAIN))
+	  ARM_FSET_MAKE (FL_CO_PROC | FL_ARCH7EM | FL_FOR_ARCH8M_MAIN, FL2_CMSE))
 ARM_ARCH("iwmmxt",  iwmmxt,     5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT))
 ARM_ARCH("iwmmxt2", iwmmxt2,    5TE,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT | FL_IWMMXT2))
diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 74417a6..b592134 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -77,6 +77,14 @@ arm_cpu_builtins (struct cpp_reader* pfile)
 
   def_or_undef_macro (pfile, "__ARM_32BIT_STATE", TARGET_32BIT);
 
+  if (arm_arch8 && !arm_arch_notm)
+    {
+      if (arm_arch_cmse && use_cmse)
+	builtin_define_with_int_value ("__ARM_FEATURE_CMSE", 3);
+      else
+	builtin_define ("__ARM_FEATURE_CMSE");
+    }
+
   if (TARGET_ARM_FEATURE_LDREX)
     builtin_define_with_int_value ("__ARM_FEATURE_LDREX",
 				   TARGET_ARM_FEATURE_LDREX);
diff --git a/gcc/config/arm/arm-flags.h b/gcc/config/arm/arm-flags.h
index 7ce059b..fb49838 100644
--- a/gcc/config/arm/arm-flags.h
+++ b/gcc/config/arm/arm-flags.h
@@ -70,6 +70,7 @@
 #define FL2_ARCH8_2   (1U << 1)		/* Architecture 8.2.  */
 #define FL2_FP16INST  (1U << 2)		/* FP16 Instructions for ARMv8.2 and
 					   later.  */
+#define FL2_CMSE      (1U << 3)		/* ARMv8-M Security Extensions.  */
 
 /* Flags that only effect tuning, not available instructions.  */
 #define FL_TUNE		(FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 74cb64c..0d7d38a 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -909,6 +909,9 @@ int arm_condexec_masklen = 0;
 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 int arm_arch_crc = 0;
 
+/* Nonzero if chip supports the ARMv8-M security extensions.  */
+int arm_arch_cmse = 0;
+
 /* Nonzero if the core has a very small, high-latency, multiply unit.  */
 int arm_m_profile_small_mul = 0;
 
@@ -3227,6 +3230,7 @@ arm_option_override (void)
   arm_arch_no_volatile_ce = ARM_FSET_HAS_CPU1 (insn_flags, FL_NO_VOLATILE_CE);
   arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
   arm_arch_crc = ARM_FSET_HAS_CPU1 (insn_flags, FL_CRC32);
+  arm_arch_cmse = ARM_FSET_HAS_CPU2 (insn_flags, FL2_CMSE);
   arm_m_profile_small_mul = ARM_FSET_HAS_CPU1 (insn_flags, FL_SMALLMUL);
   arm_fp16_inst = ARM_FSET_HAS_CPU2 (insn_flags, FL2_FP16INST);
   if (arm_fp16_inst)
@@ -3494,6 +3498,9 @@ arm_option_override (void)
   if (target_slow_flash_data || target_pure_code)
     arm_disable_literal_pool = true;
 
+  if (use_cmse && !arm_arch_cmse)
+    error ("target CPU does not support ARMv8-M Security Extensions");
+
   /* Disable scheduling fusion by default if it's not armv7 processor
      or doesn't prefer ldrd/strd.  */
   if (flag_schedule_fusion == 2
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 464710b..3d62743 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -523,6 +523,9 @@ extern bool arm_disable_literal_pool;
 /* Nonzero if chip supports the ARMv8 CRC instructions.  */
 extern int arm_arch_crc;
 
+/* Nonzero if chip supports the ARMv8-M Security Extensions.  */
+extern int arm_arch_cmse;
+
 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT  (MASK_APCS_FRAME)
 #endif
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index 8856976..a37facc 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -105,6 +105,10 @@ mfloat-abi=
 Target RejectNegative Joined Enum(float_abi_type) Var(arm_float_abi) Init(TARGET_DEFAULT_FLOAT_ABI)
 Specify if floating point hardware should be used.
 
+mcmse
+Target RejectNegative Var(use_cmse)
+Specify that the compiler should target secure code as per ARMv8-M Security Extensions.
+
 Enum
 Name(float_abi_type) Type(enum float_abi_type)
 Known floating-point ABIs (for use with the -mfloat-abi= option):
diff --git a/gcc/config/arm/arm_cmse.h b/gcc/config/arm/arm_cmse.h
new file mode 100644
index 0000000..894343b
--- /dev/null
+++ b/gcc/config/arm/arm_cmse.h
@@ -0,0 +1,192 @@
+/* ARMv8-M Secure Extensions intrinsics include file.
+
+   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#ifndef _GCC_ARM_CMSE_H
+#define _GCC_ARM_CMSE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __ARM_FEATURE_CMSE & 1
+
+#include <stddef.h>
+#include <stdint.h>
+
+#ifdef __ARM_BIG_ENDIAN
+
+typedef union {
+  struct cmse_address_info {
+#if __ARM_FEATURE_CMSE & 2
+    unsigned idau_region:8;
+    unsigned idau_region_valid:1;
+    unsigned secure:1;
+    unsigned nonsecure_readwrite_ok:1;
+    unsigned nonsecure_read_ok:1;
+#else
+    unsigned :12;
+#endif
+    unsigned readwrite_ok:1;
+    unsigned read_ok:1;
+#if __ARM_FEATURE_CMSE & 2
+    unsigned sau_region_valid:1;
+#else
+    unsigned :1;
+#endif
+    unsigned mpu_region_valid:1;
+#if __ARM_FEATURE_CMSE & 2
+    unsigned sau_region:8;
+#else
+    unsigned :8;
+#endif
+    unsigned mpu_region:8;
+  } flags;
+  unsigned value;
+} cmse_address_info_t;
+
+#else
+
+typedef union {
+  struct cmse_address_info {
+    unsigned mpu_region:8;
+#if __ARM_FEATURE_CMSE & 2
+    unsigned sau_region:8;
+#else
+    unsigned :8;
+#endif
+    unsigned mpu_region_valid:1;
+#if __ARM_FEATURE_CMSE & 2
+    unsigned sau_region_valid:1;
+#else
+    unsigned :1;
+#endif
+    unsigned read_ok:1;
+    unsigned readwrite_ok:1;
+#if __ARM_FEATURE_CMSE & 2
+    unsigned nonsecure_read_ok:1;
+    unsigned nonsecure_readwrite_ok:1;
+    unsigned secure:1;
+    unsigned idau_region_valid:1;
+    unsigned idau_region:8;
+#else
+    unsigned :12;
+#endif
+  } flags;
+  unsigned value;
+} cmse_address_info_t;
+
+#endif /* __ARM_BIG_ENDIAN */
+
+#define cmse_TT_fptr(p) (__cmse_TT_fptr ((__cmse_fptr)(p)))
+
+typedef void (*__cmse_fptr)(void);
+
+#define __CMSE_TT_ASM(flags) \
+{ \
+  cmse_address_info_t __result; \
+   __asm__ ("tt" # flags " %0,%1" \
+	   : "=r"(__result) \
+	   : "r"(__p) \
+	   : "memory"); \
+  return __result; \
+}
+
+__extension__ static __inline __attribute__ ((__always_inline__))
+cmse_address_info_t
+__cmse_TT_fptr (__cmse_fptr __p)
+__CMSE_TT_ASM ()
+
+__extension__ static __inline __attribute__ ((__always_inline__))
+cmse_address_info_t
+cmse_TT (void *__p)
+__CMSE_TT_ASM ()
+
+#define cmse_TTT_fptr(p) (__cmse_TTT_fptr ((__cmse_fptr)(p)))
+
+__extension__ static __inline __attribute__ ((__always_inline__))
+cmse_address_info_t
+__cmse_TTT_fptr (__cmse_fptr __p)
+__CMSE_TT_ASM (t)
+
+__extension__ static __inline __attribute__ ((__always_inline__))
+cmse_address_info_t
+cmse_TTT (void *__p)
+__CMSE_TT_ASM (t)
+
+#if __ARM_FEATURE_CMSE & 2
+
+#define cmse_TTA_fptr(p) (__cmse_TTA_fptr ((__cmse_fptr)(p)))
+
+__extension__ static __inline __attribute__ ((__always_inline__))
+cmse_address_info_t
+__cmse_TTA_fptr (__cmse_fptr __p)
+__CMSE_TT_ASM (a)
+
+__extension__ static __inline __attribute__ ((__always_inline__))
+cmse_address_info_t
+cmse_TTA (void *__p)
+__CMSE_TT_ASM (a)
+
+#define cmse_TTAT_fptr(p) (__cmse_TTAT_fptr ((__cmse_fptr)(p)))
+
+__extension__ static __inline cmse_address_info_t
+__attribute__ ((__always_inline__))
+__cmse_TTAT_fptr (__cmse_fptr __p)
+__CMSE_TT_ASM (at)
+
+__extension__ static __inline cmse_address_info_t
+__attribute__ ((__always_inline__))
+cmse_TTAT (void *__p)
+__CMSE_TT_ASM (at)
+
+#define CMSE_AU_NONSECURE	2
+#define CMSE_MPU_NONSECURE	16
+#define CMSE_NONSECURE		18
+
+#define cmse_nsfptr_create(p) ((typeof ((p))) ((intptr_t) (p) & ~1))
+
+#define cmse_is_nsfptr(p) (!((intptr_t) (p) & 1))
+
+#endif /* __ARM_FEATURE_CMSE & 2 */
+
+#define CMSE_MPU_UNPRIV		4
+#define CMSE_MPU_READWRITE	1
+#define CMSE_MPU_READ		8
+
+__extension__ void *
+cmse_check_address_range (void *, size_t, int);
+
+#define cmse_check_pointed_object(p, f) \
+  ((typeof ((p))) cmse_check_address_range ((p), sizeof (*(p)), (f)))
+
+#endif /* __ARM_FEATURE_CMSE & 1 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _GCC_ARM_CMSE_H */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index c40e289..6c7fff2 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -11794,6 +11794,7 @@ instructions, but allow the compiler to schedule those calls.
 * ARM iWMMXt Built-in Functions::
 * ARM C Language Extensions (ACLE)::
 * ARM Floating Point Status and Control Intrinsics::
+* ARM ARMv8-M Security Extensions::
 * AVR Built-in Functions::
 * Blackfin Built-in Functions::
 * FR-V Built-in Functions::
@@ -12639,6 +12640,31 @@ unsigned int __builtin_arm_get_fpscr ()
 void __builtin_arm_set_fpscr (unsigned int)
 @end smallexample
 
+@node ARM ARMv8-M Security Extensions
+@subsection ARM ARMv8-M Security Extensions
+
+GCC implements the ARMv8-M Security Extensions as described in the ARMv8-M
+Security Extensions: Requiremenets on Development Tools Engineering
+Specification, which can be found at
+@uref{http://infocenter.arm.com/help/topic/com.arm.doc.ecm0359818/ECM0359818_armv8m_security_extensions_reqs_on_dev_tools_1_0.pdf}.
+
+As part of the Security Extensions GCC implements the intrinsics below.  FPTR
+is used here to mean any function pointer type.
+
+@smallexample
+cmse_address_info_t cmse_TT (void *)
+cmse_address_info_t cmse_TT_fptr (FPTR)
+cmse_address_info_t cmse_TTT (void *)
+cmse_address_info_t cmse_TTT_fptr (FPTR)
+cmse_address_info_t cmse_TTA (void *)
+cmse_address_info_t cmse_TTA_fptr (FPTR)
+cmse_address_info_t cmse_TTAT (void *)
+cmse_address_info_t cmse_TTAT_fptr (FPTR)
+void * cmse_check_address_range (void *, size_t, int)
+typeof(p) cmse_nsfptr_create (FPTR p)
+intptr_t cmse_is_nsfptr (FPTR)
+@end smallexample
+
 @node AVR Built-in Functions
 @subsection AVR Built-in Functions
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fd549ec..034ae98 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -649,7 +649,8 @@ Objective-C and Objective-C++ Dialects}.
 -mslow-flash-data @gol
 -masm-syntax-unified @gol
 -mrestrict-it @gol
--mpure-code}
+-mpure-code @gol
+-mcmse}
 
 @emph{AVR Options}
 @gccoptlist{-mmcu=@var{mcu} -mabsdata -maccumulate-args @gol
@@ -15378,6 +15379,11 @@ Additionally, when compiling for ELF object format give all text sections the
 ELF processor-specific section attribute @code{SHF_ARM_PURECODE}.  This option
 is only available when generating non-pic code for ARMv7-M targets.
 
+@item -mcmse
+@opindex mcmse
+Generate secure code as per the "ARMv8-M Security Extensions: Requirements on
+Development Tools Engineering Specification", which can be found on
+@url{http://infocenter.arm.com/help/topic/com.arm.doc.ecm0359818/ECM0359818_armv8m_security_extensions_reqs_on_dev_tools_1_0.pdf}.
 @end table
 
 @node AVR Options
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index fdda301..e7fdd2d 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1674,6 +1674,10 @@ ARM target generates Thumb-1 code for @code{-mthumb} with
 ARM target for which divmod transform is disabled, if it supports hardware
 div instruction.
 
+@item arm_cmse_ok
+ARM target supports ARMv8-M Security Extensions, enabled by the @code{-mcmse}
+option.
+
 @end table
 
 @subsubsection AArch64-specific attributes
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index f1b5c35..1e3d651 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,12 @@
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	* gcc.target/arm/cmse/cmse.exp: New.
+	* gcc.target/arm/cmse/cmse-1.c: New.
+	* gcc.target/arm/cmse/cmse-12.c: New.
+	* lib/target-supports.exp
+	(check_effective_target_arm_cmse_ok): New.
+
 2016-12-02  Cesar Philippidis  <cesar@codesourcery.com>
 	    James Norris  <jnorris@codesourcery.com>
 
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c
new file mode 100644
index 0000000..d5b9a2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c
@@ -0,0 +1,67 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mcmse -fdump-rtl-expand" }  */
+
+#include <arm_cmse.h>
+
+extern int a;
+extern int bar (void);
+
+int foo (char * p)
+{
+  cmse_address_info_t cait;
+
+  cait = cmse_TT (&a);
+  if (cait.flags.mpu_region)
+    a++;
+
+  cait = cmse_TT_fptr (&bar);
+  if (cait.flags.mpu_region)
+    a+= bar ();
+
+  cait = cmse_TTA (&a);
+  if (cait.flags.mpu_region)
+    a++;
+
+  cait = cmse_TTA_fptr (&bar);
+  if (cait.flags.mpu_region)
+    a+= bar ();
+
+  cait = cmse_TTT (&a);
+  if (cait.flags.mpu_region)
+    a++;
+
+  cait = cmse_TTT_fptr (&bar);
+  if (cait.flags.mpu_region)
+    a+= bar ();
+
+  cait = cmse_TTAT (&a);
+  if (cait.flags.mpu_region)
+    a++;
+
+  cait = cmse_TTAT_fptr (&bar);
+  if (cait.flags.mpu_region)
+    a+= bar ();
+
+  p = (char *) cmse_check_address_range ((void *) p, sizeof (char), 0);
+  p = (char *) cmse_check_address_range ((void *) p, sizeof (char),
+					 CMSE_MPU_UNPRIV);
+  p = (char *) cmse_check_address_range ((void *) p, sizeof (char),
+					 CMSE_MPU_READWRITE);
+  p = (char *) cmse_check_address_range ((void *) p, sizeof (char),
+					 CMSE_MPU_UNPRIV | CMSE_MPU_READ);
+  p = (char *) cmse_check_address_range ((void *) p, sizeof (char),
+					 CMSE_AU_NONSECURE
+					 | CMSE_MPU_NONSECURE);
+  p = (char *) cmse_check_address_range ((void *) p, sizeof (char),
+					 CMSE_NONSECURE | CMSE_MPU_UNPRIV);
+
+  p = (char *) cmse_check_pointed_object (p, CMSE_NONSECURE | CMSE_MPU_UNPRIV);
+
+  return a;
+}
+/* { dg-final { scan-assembler-times "\ttt " 2 } } */
+/* { dg-final { scan-assembler-times "ttt " 2 } } */
+/* { dg-final { scan-assembler-times "tta " 2 } } */
+/* { dg-final { scan-assembler-times "ttat " 2 } } */
+/* { dg-final { scan-assembler-times "bl.cmse_check_address_range" 7 } } */
+/* { dg-final { scan-assembler-not "cmse_check_pointed_object" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-12.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-12.c
new file mode 100644
index 0000000..87a2f13
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-12.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" }  */
+#include <arm_cmse.h>
+
+char *
+foo (char * p)
+{
+  if (!cmse_is_nsfptr (p))
+    return cmse_nsfptr_create (p);
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler-not "cmse_is_nsfptr" } } */
+/* { dg-final { scan-assembler-not "cmse_nsfptr_create" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse.exp b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
new file mode 100644
index 0000000..f797dba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
@@ -0,0 +1,50 @@
+#   Copyright (C) 1997-2016 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# GCC testsuite for ARMv8-M Security Extensions using the `dg.exp' driver.
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# Exit immediately if the target does not support -mcmse.
+if ![check_effective_target_arm_cmse_ok] then {
+    return
+}
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+set saved-dg-do-what-default ${dg-do-what-default}
+set dg-do-what-default "assemble"
+
+set saved-lto_torture_options ${LTO_TORTURE_OPTIONS}
+set LTO_TORTURE_OPTIONS ""
+
+# These are for both baseline and mainline.
+gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] \
+	"" $DEFAULT_CFLAGS
+
+set LTO_TORTURE_OPTIONS ${saved-lto_torture_options}
+set dg-do-what-default ${saved-dg-do-what-default}
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 798cf6b..0fc0baf 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3916,6 +3916,19 @@ proc check_effective_target_arm_thumb1_cbz_ok {} {
     }
 }
 
+# Return 1 if this is an ARM target where ARMv8-M Security Extensions is
+# available.
+
+proc check_effective_target_arm_cmse_ok {} {
+    return [check_no_compiler_messages arm_cmse object {
+	int
+	foo (void)
+	{
+	  asm ("bxns r0");
+	}
+    } "-mcmse"];
+}
+
 # Return 1 if this compilation turns on string_ops_prefer_neon on.
 
 proc check_effective_target_arm_tune_string_ops_prefer_neon { } {
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 438021a..f323f43 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	* config/arm/t-arm (HAVE_CMSE): New.
+	* config/arm/cmse.c: New.
+
 2016-11-28  Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
 
 	PR gcc/74748
diff --git a/libgcc/config/arm/cmse.c b/libgcc/config/arm/cmse.c
new file mode 100644
index 0000000..fe3a229
--- /dev/null
+++ b/libgcc/config/arm/cmse.c
@@ -0,0 +1,108 @@
+/* ARMv8-M Security Extensions routines.
+   Copyright (C) 2015-2016 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+
+#if __ARM_FEATURE_CMSE & 1
+
+#include <arm_cmse.h>
+
+/* ARM intrinsic function to perform a permission check on a given
+   address range.  See ACLE changes for ARMv8-M.  */
+
+void *
+cmse_check_address_range (void *p, size_t size, int flags)
+{
+  cmse_address_info_t permb, perme;
+  char *pb = (char *) p, *pe;
+
+  /* Check if the range wraps around.  */
+  if (UINTPTR_MAX - (uintptr_t) p < size)
+    return NULL;
+
+  /* Check if an unknown flag is present.  */
+  int known = CMSE_MPU_UNPRIV | CMSE_MPU_READWRITE | CMSE_MPU_READ;
+  int known_secure_level = CMSE_MPU_UNPRIV;
+#if __ARM_FEATURE_CMSE & 2
+  known |= CMSE_AU_NONSECURE | CMSE_MPU_NONSECURE;
+  known_secure_level |= CMSE_MPU_NONSECURE;
+#endif
+  if (flags & (~known))
+    return NULL;
+
+  /* Execute the right variant of the TT instructions.  */
+  pe = pb + size - 1;
+  const int singleCheck = (((uintptr_t) pb ^ (uintptr_t) pe) < 32);
+  switch (flags & known_secure_level)
+    {
+    case 0:
+      permb = cmse_TT (pb);
+      perme = singleCheck ? permb : cmse_TT (pe);
+      break;
+    case CMSE_MPU_UNPRIV:
+      permb = cmse_TTT (pb);
+      perme = singleCheck ? permb : cmse_TTT (pe);
+      break;
+#if __ARM_FEATURE_CMSE & 2
+    case CMSE_MPU_NONSECURE:
+      permb = cmse_TTA (pb);
+      perme = singleCheck ? permb : cmse_TTA (pe);
+      break;
+    case CMSE_MPU_UNPRIV | CMSE_MPU_NONSECURE:
+      permb = cmse_TTAT (pb);
+      perme = singleCheck ? permb : cmse_TTAT (pe);
+      break;
+#endif
+    default:
+      /* Invalid flag, eg.  CMSE_MPU_NONSECURE specified but
+	 __ARM_FEATURE_CMSE & 2 == 0.  */
+      return NULL;
+    }
+
+  /* Check that the range does not cross MPU, SAU, or IDAU boundaries.  */
+  if (permb.value != perme.value)
+    return NULL;
+
+  /* Check the permissions on the range.  */
+  switch (flags & (~known_secure_level))
+    {
+#if __ARM_FEATURE_CMSE & 2
+    case CMSE_MPU_READ | CMSE_MPU_READWRITE | CMSE_AU_NONSECURE:
+    case		 CMSE_MPU_READWRITE | CMSE_AU_NONSECURE:
+      return permb.flags.nonsecure_readwrite_ok	? p : NULL;
+    case CMSE_MPU_READ | CMSE_AU_NONSECURE:
+      return permb.flags.nonsecure_read_ok	? p : NULL;
+    case CMSE_AU_NONSECURE:
+      return permb.flags.secure			? NULL : p;
+#endif
+    case CMSE_MPU_READ | CMSE_MPU_READWRITE:
+    case		 CMSE_MPU_READWRITE:
+      return permb.flags.readwrite_ok		? p : NULL;
+    case CMSE_MPU_READ:
+      return permb.flags.read_ok		? p : NULL;
+    default:
+      return NULL;
+    }
+}
+
+
+#endif /* __ARM_FEATURE_CMSE & 1.  */
diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
index 4e17e99..5618143 100644
--- a/libgcc/config/arm/t-arm
+++ b/libgcc/config/arm/t-arm
@@ -1,3 +1,15 @@
 LIB1ASMSRC = arm/lib1funcs.S
 LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi _thumb1_case_shi \
 	_thumb1_case_uhi _thumb1_case_si
+
+HAVE_CMSE:=$(findstring __ARM_FEATURE_CMSE,$(shell $(gcc_compile_bare) -dM -E - </dev/null))
+ifneq ($(shell $(gcc_compile_bare) -E -mcmse - </dev/null 2>/dev/null),)
+CMSE_OPTS:=-mcmse
+endif
+
+ifdef HAVE_CMSE
+libgcc-objects += cmse.o cmse_nonsecure_call.o
+
+cmse.o: $(srcdir)/config/arm/cmse.c
+	$(gcc_compile) -c $(CMSE_OPTS) $<
+endif
-- 
cgit v1.1


From 97b0656d67d2d39a79556bd200d3b6c41b2e5d6f Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Fri, 2 Dec 2016 15:24:40 +0000
Subject: Handling ARMv8-M Security Extension's cmse_nonsecure_entry attribute

    gcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	    Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/arm.c (arm_handle_cmse_nonsecure_entry): New.
	(arm_attribute_table): Added cmse_nonsecure_entry
	(arm_compute_func_type): Handle cmse_nonsecure_entry.
	(cmse_func_args_or_return_in_stack): New.
	(arm_handle_cmse_nonsecure_entry): New.
	* config/arm/arm.h (ARM_FT_CMSE_ENTRY): New macro define.
	(IS_CMSE_ENTRY): Likewise.
	* doc/extend.texi (ARM ARMv8-M Security Extensions): New attribute.

    gcc/testsuite/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	    Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* gcc.target/arm/cmse/cmse-3.c: New.


Co-Authored-By: Thomas Preud'homme <thomas.preudhomme@arm.com>

From-SVN: r243188
---
 gcc/ChangeLog                              |  12 +++
 gcc/config/arm/arm.c                       | 114 +++++++++++++++++++++++++++++
 gcc/config/arm/arm.h                       |   2 +
 gcc/doc/extend.texi                        |   3 +
 gcc/testsuite/ChangeLog                    |   5 ++
 gcc/testsuite/gcc.target/arm/cmse/cmse-3.c |  37 ++++++++++
 6 files changed, 173 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-3.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f24e99e..7eb56d2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,18 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* config/arm/arm.c (arm_handle_cmse_nonsecure_entry): New.
+	(arm_attribute_table): Added cmse_nonsecure_entry
+	(arm_compute_func_type): Handle cmse_nonsecure_entry.
+	(cmse_func_args_or_return_in_stack): New.
+	(arm_handle_cmse_nonsecure_entry): New.
+	* config/arm/arm.h (ARM_FT_CMSE_ENTRY): New macro define.
+	(IS_CMSE_ENTRY): Likewise.
+	* doc/extend.texi (ARM ARMv8-M Security Extensions): New attribute.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* config.gcc (extra_headers): Added arm_cmse.h.
 	* config/arm/arm-arches.def (ARM_ARCH):
 	(armv8-m): Add FL2_CMSE.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 0d7d38a..7761564 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -136,6 +136,7 @@ static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 #endif
+static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 static int arm_comp_type_attributes (const_tree, const_tree);
@@ -344,6 +345,9 @@ static const struct attribute_spec arm_attribute_table[] =
   { "notshared",    0, 0, false, true, false, arm_handle_notshared_attribute,
     false },
 #endif
+  /* ARMv8-M Security Extensions support.  */
+  { "cmse_nonsecure_entry", 0, 0, true, false, false,
+    arm_handle_cmse_nonsecure_entry, false },
   { NULL,           0, 0, false, false, false, NULL, false }
 };
 
@@ -3633,6 +3637,9 @@ arm_compute_func_type (void)
   else
     type |= arm_isr_value (TREE_VALUE (a));
 
+  if (lookup_attribute ("cmse_nonsecure_entry", attr))
+    type |= ARM_FT_CMSE_ENTRY;
+
   return type;
 }
 
@@ -6634,6 +6641,113 @@ arm_handle_notshared_attribute (tree *node,
 }
 #endif
 
+/* This function returns true if a function with declaration FNDECL and type
+   FNTYPE uses the stack to pass arguments or return variables and false
+   otherwise.  This is used for functions with the attributes
+   'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
+   diagnostic messages if the stack is used.  NAME is the name of the attribute
+   used.  */
+
+static bool
+cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
+{
+  function_args_iterator args_iter;
+  CUMULATIVE_ARGS args_so_far_v;
+  cumulative_args_t args_so_far;
+  bool first_param = true;
+  tree arg_type, prev_arg_type = NULL_TREE, ret_type;
+
+  /* Error out if any argument is passed on the stack.  */
+  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
+  args_so_far = pack_cumulative_args (&args_so_far_v);
+  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
+    {
+      rtx arg_rtx;
+      machine_mode arg_mode = TYPE_MODE (arg_type);
+
+      prev_arg_type = arg_type;
+      if (VOID_TYPE_P (arg_type))
+	continue;
+
+      if (!first_param)
+	arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
+      arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
+      if (!arg_rtx
+	  || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
+	{
+	  error ("%qE attribute not available to functions with arguments "
+		 "passed on the stack", name);
+	  return true;
+	}
+      first_param = false;
+    }
+
+  /* Error out for variadic functions since we cannot control how many
+     arguments will be passed and thus stack could be used.  stdarg_p () is not
+     used for the checking to avoid browsing arguments twice.  */
+  if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
+    {
+      error ("%qE attribute not available to functions with variable number "
+	     "of arguments", name);
+      return true;
+    }
+
+  /* Error out if return value is passed on the stack.  */
+  ret_type = TREE_TYPE (fntype);
+  if (arm_return_in_memory (ret_type, fntype))
+    {
+      error ("%qE attribute not available to functions that return value on "
+	     "the stack", name);
+      return true;
+    }
+  return false;
+}
+
+/* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
+   function will check whether the attribute is allowed here and will add the
+   attribute to the function declaration tree or otherwise issue a warning.  */
+
+static tree
+arm_handle_cmse_nonsecure_entry (tree *node, tree name,
+				 tree /* args */,
+				 int /* flags */,
+				 bool *no_add_attrs)
+{
+  tree fndecl;
+
+  if (!use_cmse)
+    {
+      *no_add_attrs = true;
+      warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
+	       name);
+      return NULL_TREE;
+    }
+
+  /* Ignore attribute for function types.  */
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  fndecl = *node;
+
+  /* Warn for static linkage functions.  */
+  if (!TREE_PUBLIC (fndecl))
+    {
+      warning (OPT_Wattributes, "%qE attribute has no effect on functions "
+	       "with static linkage", name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
+						TREE_TYPE (fndecl));
+  return NULL_TREE;
+}
+
 /* Return 0 if the attributes for two types are incompatible, 1 if they
    are compatible, and 2 if they are nearly compatible (which causes a
    warning to be generated).  */
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3d62743..928fad4 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1385,6 +1385,7 @@ enum reg_class
 #define ARM_FT_VOLATILE		(1 << 4) /* Does not return.  */
 #define ARM_FT_NESTED		(1 << 5) /* Embedded inside another func.  */
 #define ARM_FT_STACKALIGN	(1 << 6) /* Called with misaligned stack.  */
+#define ARM_FT_CMSE_ENTRY	(1 << 7) /* ARMv8-M non-secure entry function.  */
 
 /* Some macros to test these flags.  */
 #define ARM_FUNC_TYPE(t)	(t & ARM_FT_TYPE_MASK)
@@ -1393,6 +1394,7 @@ enum reg_class
 #define IS_NAKED(t)        	(t & ARM_FT_NAKED)
 #define IS_NESTED(t)       	(t & ARM_FT_NESTED)
 #define IS_STACKALIGN(t)       	(t & ARM_FT_STACKALIGN)
+#define IS_CMSE_ENTRY(t)	(t & ARM_FT_CMSE_ENTRY)
 
 
 /* Structure used to hold the function stack frame layout.  Offsets are
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 6c7fff2..d72af3e 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12648,6 +12648,9 @@ Security Extensions: Requiremenets on Development Tools Engineering
 Specification, which can be found at
 @uref{http://infocenter.arm.com/help/topic/com.arm.doc.ecm0359818/ECM0359818_armv8m_security_extensions_reqs_on_dev_tools_1_0.pdf}.
 
+As part of the Security Extensions GCC implements a new function attribute
+@code{cmse_nonsecure_entry}.
+
 As part of the Security Extensions GCC implements the intrinsics below.  FPTR
 is used here to mean any function pointer type.
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1e3d651..4e1240e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,11 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* gcc.target/arm/cmse/cmse-3.c: New.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* gcc.target/arm/cmse/cmse.exp: New.
 	* gcc.target/arm/cmse/cmse-1.c: New.
 	* gcc.target/arm/cmse/cmse-12.c: New.
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c
new file mode 100644
index 0000000..2c2920e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" }  */
+
+struct span {
+  int a, b;
+};
+struct span2 {
+  float a, b, c, d;
+};
+
+union test_union
+{
+  long long a;
+  int b;
+  struct span2 c;
+} test_union;
+
+void __attribute__ ((cmse_nonsecure_entry))
+foo (long long a, int b, long long c) {} /* { dg-error "not available to functions with arguments passed on the stack" } */
+
+void __attribute__ ((cmse_nonsecure_entry))
+bar (long long a, int b, struct span c) {} /* { dg-error "not available to functions with arguments passed on the stack" } */
+
+void __attribute__ ((cmse_nonsecure_entry))
+baz (int a, ...) {} /* { dg-error "not available to functions with variable number of arguments" } */
+
+struct span __attribute__ ((cmse_nonsecure_entry))
+qux (void) { /* { dg-error "not available to functions that return value on the stack" } */
+  struct span ret = {0, 0};
+  return ret;
+}
+
+void __attribute__ ((cmse_nonsecure_entry))
+norf (struct span2 a) {}
+
+void __attribute__ ((cmse_nonsecure_entry))
+foo2 (long long a, int b, union test_union c) {} /* { dg-error "not available to functions with arguments passed on the stack" } */
-- 
cgit v1.1


From 9ad1f699b81ce32d1193301ee2c0c188abf64d28 Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Fri, 2 Dec 2016 15:27:03 +0000
Subject: ARMv8-M Security Extension's cmse_nonsecure_entry: __acle_se label
 and bxns return

    gcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	    Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/arm.c (use_return_insn): Change to return with  bxns
	when cmse_nonsecure_entry.
	(output_return_instruction): Likewise.
	(arm_output_function_prologue): Likewise.
	(thumb_pop): Likewise.
	(thumb_exit): Likewise.
	(thumb2_expand_return): Assert that entry functions always have simple
	returns.
	(arm_expand_epilogue): Handle entry functions.
	(arm_function_ok_for_sibcall): Disable sibcall for entry functions.
	(arm_asm_declare_function_name): New.
	* config/arm/arm-protos.h (arm_asm_declare_function_name): New.
	* config/arm/elf.h (ASM_DECLARE_FUNCTION_NAME): Redefine to
	use arm_asm_declare_function_name.

    gcc/testsuite/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
		Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* gcc.target/arm/cmse/cmse-4.c: New.
	* gcc.target/arm/cmse/cmse-9.c: New.
	* gcc.target/arm/cmse/cmse-10.c: New.


Co-Authored-By: Thomas Preud'homme <thomas.preudhomme@arm.com>

From-SVN: r243189
---
 gcc/ChangeLog                               | 18 +++++++
 gcc/config/arm/arm-protos.h                 |  1 +
 gcc/config/arm/arm.c                        | 83 +++++++++++++++++++++++++----
 gcc/config/arm/elf.h                        | 11 +---
 gcc/testsuite/ChangeLog                     |  7 +++
 gcc/testsuite/gcc.target/arm/cmse/cmse-10.c |  9 ++++
 gcc/testsuite/gcc.target/arm/cmse/cmse-4.c  | 27 ++++++++++
 gcc/testsuite/gcc.target/arm/cmse/cmse-9.c  | 12 +++++
 8 files changed, 148 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-10.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-4.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-9.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7eb56d2..eeb8c3c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,24 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* config/arm/arm.c (use_return_insn): Change to return with  bxns
+	when cmse_nonsecure_entry.
+	(output_return_instruction): Likewise.
+	(arm_output_function_prologue): Likewise.
+	(thumb_pop): Likewise.
+	(thumb_exit): Likewise.
+	(thumb2_expand_return): Assert that entry functions always have simple
+	returns.
+	(arm_expand_epilogue): Handle entry functions.
+	(arm_function_ok_for_sibcall): Disable sibcall for entry functions.
+	(arm_asm_declare_function_name): New.
+	* config/arm/arm-protos.h (arm_asm_declare_function_name): New.
+	* config/arm/elf.h (ASM_DECLARE_FUNCTION_NAME): Redefine to
+	use arm_asm_declare_function_name.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* config/arm/arm.c (arm_handle_cmse_nonsecure_entry): New.
 	(arm_attribute_table): Added cmse_nonsecure_entry
 	(arm_compute_func_type): Handle cmse_nonsecure_entry.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 68e9bea..634a5de 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -33,6 +33,7 @@ extern int arm_volatile_func (void);
 extern void arm_expand_prologue (void);
 extern void arm_expand_epilogue (bool);
 extern void arm_declare_function_name (FILE *, const char *, tree);
+extern void arm_asm_declare_function_name (FILE *, const char *, tree);
 extern void thumb2_expand_return (bool);
 extern const char *arm_strip_name_encoding (const char *);
 extern void arm_asm_output_labelref (FILE *, const char *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 7761564..db7e0c8 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3866,6 +3866,11 @@ use_return_insn (int iscond, rtx sibling)
 	return 0;
     }
 
+  /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
+     several instructions if anything needs to be popped.  */
+  if (saved_int_regs && IS_CMSE_ENTRY (func_type))
+    return 0;
+
   /* If there are saved registers but the LR isn't saved, then we need
      two instructions for the return.  */
   if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
@@ -6906,6 +6911,11 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
   if (IS_INTERRUPT (func_type))
     return false;
 
+  /* ARMv8-M non-secure entry functions need to return with bxns which is only
+     generated for entry functions themselves.  */
+  if (IS_CMSE_ENTRY (arm_current_func_type ()))
+    return false;
+
   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
     {
       /* Check that the return value locations are the same.  For
@@ -18568,6 +18578,7 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
 	 (e.g. interworking) then we can load the return address
 	 directly into the PC.  Otherwise we must load it into LR.  */
       if (really_return
+	  && !IS_CMSE_ENTRY (func_type)
 	  && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
 	return_reg = reg_names[PC_REGNUM];
       else
@@ -18708,8 +18719,10 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
 	  break;
 
 	default:
+	  if (IS_CMSE_ENTRY (func_type))
+	    snprintf (instr, sizeof (instr), "bxns%s\t%%|lr", conditional);
 	  /* Use bx if it's available.  */
-	  if (arm_arch5 || arm_arch4t)
+	  else if (arm_arch5 || arm_arch4t)
 	    sprintf (instr, "bx%s\t%%|lr", conditional);
 	  else
 	    sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
@@ -18722,6 +18735,44 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
   return "";
 }
 
+/* Output in FILE asm statements needed to declare the NAME of the function
+   defined by its DECL node.  */
+
+void
+arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
+{
+  size_t cmse_name_len;
+  char *cmse_name = 0;
+  char cmse_prefix[] = "__acle_se_";
+
+  /* When compiling with ARMv8-M Security Extensions enabled, we should print an
+     extra function label for each function with the 'cmse_nonsecure_entry'
+     attribute.  This extra function label should be prepended with
+     '__acle_se_', telling the linker that it needs to create secure gateway
+     veneers for this function.  */
+  if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
+				    DECL_ATTRIBUTES (decl)))
+    {
+      cmse_name_len = sizeof (cmse_prefix) + strlen (name);
+      cmse_name = XALLOCAVEC (char, cmse_name_len);
+      snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
+      targetm.asm_out.globalize_label (file, cmse_name);
+
+      ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
+      ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
+    }
+
+  ARM_DECLARE_FUNCTION_NAME (file, name, decl);
+  ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
+  ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
+  ASM_OUTPUT_LABEL (file, name);
+
+  if (cmse_name)
+    ASM_OUTPUT_LABEL (file, cmse_name);
+
+  ARM_OUTPUT_FN_UNWIND (file, TRUE);
+}
+
 /* Write the function name into the code section, directly preceding
    the function prologue.
 
@@ -18771,10 +18822,6 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
 {
   unsigned long func_type;
 
-  /* ??? Do we want to print some of the below anyway?  */
-  if (TARGET_THUMB1)
-    return;
-
   /* Sanity check.  */
   gcc_assert (!arm_ccfsm_state && !arm_target_insn);
 
@@ -18809,6 +18856,8 @@ arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
     asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
   if (IS_STACKALIGN (func_type))
     asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
+  if (IS_CMSE_ENTRY (func_type))
+    asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
 
   asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
 	       crtl->args.size,
@@ -22915,8 +22964,8 @@ thumb_pop (FILE *f, unsigned long mask)
   if (mask & (1 << PC_REGNUM))
     {
       /* Catch popping the PC.  */
-      if (TARGET_INTERWORK || TARGET_BACKTRACE
-	  || crtl->calls_eh_return)
+      if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
+	  || IS_CMSE_ENTRY (arm_current_func_type ()))
 	{
 	  /* The PC is never poped directly, instead
 	     it is popped into r3 and then BX is used.  */
@@ -22977,7 +23026,10 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
       if (crtl->calls_eh_return)
 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
 
-      asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
+      if (IS_CMSE_ENTRY (arm_current_func_type ()))
+	asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+      else
+	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
       return;
     }
   /* Otherwise if we are not supporting interworking and we have not created
@@ -22986,7 +23038,8 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
   else if (!TARGET_INTERWORK
 	   && !TARGET_BACKTRACE
 	   && !is_called_in_ARM_mode (current_function_decl)
-	   && !crtl->calls_eh_return)
+	   && !crtl->calls_eh_return
+	   && !IS_CMSE_ENTRY (arm_current_func_type ()))
     {
       asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
       return;
@@ -23209,7 +23262,10 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
     asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
 
   /* Return to caller.  */
-  asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
+  if (IS_CMSE_ENTRY (arm_current_func_type ()))
+    asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+  else
+    asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
 }
 
 /* Scan INSN just before assembler is output for it.
@@ -24095,6 +24151,12 @@ thumb2_expand_return (bool simple_return)
 
   if (!simple_return && saved_regs_mask)
     {
+      /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
+	 functions or adapt code to handle according to ACLE.  This path should
+	 not be reachable for cmse_nonsecure_entry functions though we prefer
+	 to assert it for now to ensure that future code changes do not silently
+	 change this behavior.  */
+      gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
       if (num_regs == 1)
         {
           rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
@@ -24512,6 +24574,7 @@ arm_expand_epilogue (bool really_return)
 
       if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
           && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
+	  && !IS_CMSE_ENTRY (func_type)
           && !IS_STACKALIGN (func_type)
           && really_return
           && crtl->args.pretend_args_size == 0
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index bc4eb86..03931ee 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -75,16 +75,7 @@
 
 /* We might need a ARM specific header to function declarations.  */
 #undef  ASM_DECLARE_FUNCTION_NAME
-#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)		\
-  do								\
-    {								\
-      ARM_DECLARE_FUNCTION_NAME (FILE, NAME, DECL);		\
-      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "function");	\
-      ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));		\
-      ASM_OUTPUT_LABEL(FILE, NAME);				\
-      ARM_OUTPUT_FN_UNWIND (FILE, TRUE);			\
-    }								\
-  while (0)
+#define ASM_DECLARE_FUNCTION_NAME arm_asm_declare_function_name
 
 /* We might need an ARM specific trailer for function declarations.  */
 #undef  ASM_DECLARE_FUNCTION_SIZE
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 4e1240e..e31a1b3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,13 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* gcc.target/arm/cmse/cmse-4.c: New.
+	* gcc.target/arm/cmse/cmse-9.c: New.
+	* gcc.target/arm/cmse/cmse-10.c: New.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* gcc.target/arm/cmse/cmse-3.c: New.
 
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-10.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-10.c
new file mode 100644
index 0000000..1a91ac3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-10.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" }  */
+
+void
+foo (void) {}
+
+/* { dg-final { scan-assembler-not "bxns" } } */
+/* { dg-final { scan-assembler "foo:" } } */
+/* { dg-final { scan-assembler-not "__acle_se_foo:" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c
new file mode 100644
index 0000000..6f930ab
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" }  */
+
+struct span {
+  int a, b;
+};
+
+extern int qux (void);
+
+void __attribute__ ((cmse_nonsecure_entry))
+foo (void) {}
+
+static void __attribute__ ((cmse_nonsecure_entry))
+bar (void) {} /* { dg-warning "has no effect on functions with static linkage" } */
+
+int __attribute__ ((cmse_nonsecure_entry))
+baz (void)
+{
+  return qux ();
+}
+
+/* { dg-final { scan-assembler-times "bxns" 2 } } */
+/* { dg-final { scan-assembler "foo:" } } */
+/* { dg-final { scan-assembler "__acle_se_foo:" } } */
+/* { dg-final { scan-assembler-not "__acle_se_bar:" } } */
+/* { dg-final { scan-assembler "baz:" } } */
+/* { dg-final { scan-assembler "__acle_se_baz:" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c
new file mode 100644
index 0000000..1d97f0e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-skip-if "Testing exclusion of -mcmse" { arm-*-* } { "-mcmse" } { "" } }  */
+
+
+int __attribute__ ((cmse_nonsecure_entry))
+foo (int a)
+{ /* { dg-warning "attribute ignored without -mcmse option" } */
+  return a + 1;
+}
+
+/* { dg-final { scan-assembler "foo:" } } */
+/* { dg-final { scan-assembler-not "__acle_se_foo:" } } */
-- 
cgit v1.1


From de954d6a5fb7dbb2c4c0a646a5e59727b06847c1 Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Fri, 2 Dec 2016 15:29:03 +0000
Subject: ARMv8-M Security Extension's cmse_nonsecure_entry: clear registers

    gcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
		Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/arm.c (output_return_instruction): Clear
	registers.
	(thumb2_expand_return): Likewise.
	(thumb1_expand_epilogue): Likewise.
	(thumb_exit): Likewise.
	(arm_expand_epilogue): Likewise.
	(cmse_nonsecure_entry_clear_before_return): New.
	(comp_not_to_clear_mask_str_un): New.
	(compute_not_to_clear_mask): New.
	* config/arm/thumb1.md (*epilogue_insns): Change length attribute.
	* config/arm/thumb2.md (*thumb2_return): Disable for
	cmse_nonsecure_entry functions.
	(*thumb2_cmse_entry_return): Duplicate thumb2_return pattern for
	cmse_nonsecure_entry functions.

    gcc/testsuite/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
		Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* gcc.target/arm/cmse/cmse.exp: Test different multilibs separate.
	* gcc.target/arm/cmse/struct-1.c: New.
	* gcc.target/arm/cmse/bitfield-1.c: New.
	* gcc.target/arm/cmse/bitfield-2.c: New.
	* gcc.target/arm/cmse/bitfield-3.c: New.
	* gcc.target/arm/cmse/baseline/cmse-2.c: New.
	* gcc.target/arm/cmse/baseline/softfp.c: New.
	* gcc.target/arm/cmse/mainline/soft/cmse-5.c: New.
	* gcc.target/arm/cmse/mainline/hard/cmse-5.c: New.
	* gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c: New.
	* gcc.target/arm/cmse/mainline/softfp/cmse-5.c: New.
	* gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c: New.


Co-Authored-By: Thomas Preud'homme <thomas.preudhomme@arm.com>

From-SVN: r243190
---
 gcc/ChangeLog                                      |  18 +
 gcc/config/arm/arm.c                               | 486 ++++++++++++++++++++-
 gcc/config/arm/thumb1.md                           |   9 +-
 gcc/config/arm/thumb2.md                           |  21 +-
 gcc/testsuite/ChangeLog                            |  16 +
 .../gcc.target/arm/cmse/baseline/cmse-2.c          |  19 +
 .../gcc.target/arm/cmse/baseline/softfp.c          |  29 ++
 gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c     |  39 ++
 gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c     |  36 ++
 gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c     |  37 ++
 gcc/testsuite/gcc.target/arm/cmse/cmse.exp         |  20 +
 .../gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c  |  45 ++
 .../gcc.target/arm/cmse/mainline/hard/cmse-5.c     |  38 ++
 .../gcc.target/arm/cmse/mainline/soft/cmse-5.c     |  24 +
 .../arm/cmse/mainline/softfp-sp/cmse-5.c           |  46 ++
 .../gcc.target/arm/cmse/mainline/softfp/cmse-5.c   |  38 ++
 gcc/testsuite/gcc.target/arm/cmse/struct-1.c       |  33 ++
 17 files changed, 948 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/struct-1.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index eeb8c3c..bb1e524 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,24 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* config/arm/arm.c (output_return_instruction): Clear
+	registers.
+	(thumb2_expand_return): Likewise.
+	(thumb1_expand_epilogue): Likewise.
+	(thumb_exit): Likewise.
+	(arm_expand_epilogue): Likewise.
+	(cmse_nonsecure_entry_clear_before_return): New.
+	(comp_not_to_clear_mask_str_un): New.
+	(compute_not_to_clear_mask): New.
+	* config/arm/thumb1.md (*epilogue_insns): Change length attribute.
+	* config/arm/thumb2.md (*thumb2_return): Disable for
+	cmse_nonsecure_entry functions.
+	(*thumb2_cmse_entry_return): Duplicate thumb2_return pattern for
+	cmse_nonsecure_entry functions.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* config/arm/arm.c (use_return_insn): Change to return with  bxns
 	when cmse_nonsecure_entry.
 	(output_return_instruction): Likewise.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index db7e0c8..6a9db85 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -16297,6 +16297,279 @@ note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
   return;
 }
 
+/* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
+   and unions in the context of ARMv8-M Security Extensions.  It is used as a
+   helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
+   functions.  The PADDING_BITS_TO_CLEAR pointer can be the base to either one
+   or four masks, depending on whether it is being computed for a
+   'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
+   respectively.  The tree for the type of the argument or a field within an
+   argument is passed in ARG_TYPE, the current register this argument or field
+   starts in is kept in the pointer REGNO and updated accordingly, the bit this
+   argument or field starts at is passed in STARTING_BIT and the last used bit
+   is kept in LAST_USED_BIT which is also updated accordingly.  */
+
+static unsigned HOST_WIDE_INT
+comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
+			       uint32_t * padding_bits_to_clear,
+			       unsigned starting_bit, int * last_used_bit)
+
+{
+  unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
+
+  if (TREE_CODE (arg_type) == RECORD_TYPE)
+    {
+      unsigned current_bit = starting_bit;
+      tree field;
+      long int offset, size;
+
+
+      field = TYPE_FIELDS (arg_type);
+      while (field)
+	{
+	  /* The offset within a structure is always an offset from
+	     the start of that structure.  Make sure we take that into the
+	     calculation of the register based offset that we use here.  */
+	  offset = starting_bit;
+	  offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
+	  offset %= 32;
+
+	  /* This is the actual size of the field, for bitfields this is the
+	     bitfield width and not the container size.  */
+	  size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
+
+	  if (*last_used_bit != offset)
+	    {
+	      if (offset < *last_used_bit)
+		{
+		  /* This field's offset is before the 'last_used_bit', that
+		     means this field goes on the next register.  So we need to
+		     pad the rest of the current register and increase the
+		     register number.  */
+		  uint32_t mask;
+		  mask  = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
+		  mask++;
+
+		  padding_bits_to_clear[*regno] |= mask;
+		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
+		  (*regno)++;
+		}
+	      else
+		{
+		  /* Otherwise we pad the bits between the last field's end and
+		     the start of the new field.  */
+		  uint32_t mask;
+
+		  mask = ((uint32_t)-1) >> (32 - offset);
+		  mask -= ((uint32_t) 1 << *last_used_bit) - 1;
+		  padding_bits_to_clear[*regno] |= mask;
+		}
+	      current_bit = offset;
+	    }
+
+	  /* Calculate further padding bits for inner structs/unions too.  */
+	  if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
+	    {
+	      *last_used_bit = current_bit;
+	      not_to_clear_reg_mask
+		|= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
+						  padding_bits_to_clear, offset,
+						  last_used_bit);
+	    }
+	  else
+	    {
+	      /* Update 'current_bit' with this field's size.  If the
+		 'current_bit' lies in a subsequent register, update 'regno' and
+		 reset 'current_bit' to point to the current bit in that new
+		 register.  */
+	      current_bit += size;
+	      while (current_bit >= 32)
+		{
+		  current_bit-=32;
+		  not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
+		  (*regno)++;
+		}
+	      *last_used_bit = current_bit;
+	    }
+
+	  field = TREE_CHAIN (field);
+	}
+      not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
+    }
+  else if (TREE_CODE (arg_type) == UNION_TYPE)
+    {
+      tree field, field_t;
+      int i, regno_t, field_size;
+      int max_reg = -1;
+      int max_bit = -1;
+      uint32_t mask;
+      uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
+	= {-1, -1, -1, -1};
+
+      /* To compute the padding bits in a union we only consider bits as
+	 padding bits if they are always either a padding bit or fall outside a
+	 fields size for all fields in the union.  */
+      field = TYPE_FIELDS (arg_type);
+      while (field)
+	{
+	  uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
+	    = {0U, 0U, 0U, 0U};
+	  int last_used_bit_t = *last_used_bit;
+	  regno_t = *regno;
+	  field_t = TREE_TYPE (field);
+
+	  /* If the field's type is either a record or a union make sure to
+	     compute their padding bits too.  */
+	  if (RECORD_OR_UNION_TYPE_P (field_t))
+	    not_to_clear_reg_mask
+	      |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
+						&padding_bits_to_clear_t[0],
+						starting_bit, &last_used_bit_t);
+	  else
+	    {
+	      field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
+	      regno_t = (field_size / 32) + *regno;
+	      last_used_bit_t = (starting_bit + field_size) % 32;
+	    }
+
+	  for (i = *regno; i < regno_t; i++)
+	    {
+	      /* For all but the last register used by this field only keep the
+		 padding bits that were padding bits in this field.  */
+	      padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
+	    }
+
+	    /* For the last register, keep all padding bits that were padding
+	       bits in this field and any padding bits that are still valid
+	       as padding bits but fall outside of this field's size.  */
+	    mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
+	    padding_bits_to_clear_res[regno_t]
+	      &= padding_bits_to_clear_t[regno_t] | mask;
+
+	  /* Update the maximum size of the fields in terms of registers used
+	     ('max_reg') and the 'last_used_bit' in said register.  */
+	  if (max_reg < regno_t)
+	    {
+	      max_reg = regno_t;
+	      max_bit = last_used_bit_t;
+	    }
+	  else if (max_reg == regno_t && max_bit < last_used_bit_t)
+	    max_bit = last_used_bit_t;
+
+	  field = TREE_CHAIN (field);
+	}
+
+      /* Update the current padding_bits_to_clear using the intersection of the
+	 padding bits of all the fields.  */
+      for (i=*regno; i < max_reg; i++)
+	padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
+
+      /* Do not keep trailing padding bits, we do not know yet whether this
+	 is the end of the argument.  */
+      mask = ((uint32_t) 1 << max_bit) - 1;
+      padding_bits_to_clear[max_reg]
+	|= padding_bits_to_clear_res[max_reg] & mask;
+
+      *regno = max_reg;
+      *last_used_bit = max_bit;
+    }
+  else
+    /* This function should only be used for structs and unions.  */
+    gcc_unreachable ();
+
+  return not_to_clear_reg_mask;
+}
+
+/* In the context of ARMv8-M Security Extensions, this function is used for both
+   'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
+   registers are used when returning or passing arguments, which is then
+   returned as a mask.  It will also compute a mask to indicate padding/unused
+   bits for each of these registers, and passes this through the
+   PADDING_BITS_TO_CLEAR pointer.  The tree of the argument type is passed in
+   ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
+   the starting register used to pass this argument or return value is passed
+   in REGNO.  It makes use of 'comp_not_to_clear_mask_str_un' to compute these
+   for struct and union types.  */
+
+static unsigned HOST_WIDE_INT
+compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
+			     uint32_t * padding_bits_to_clear)
+
+{
+  int last_used_bit = 0;
+  unsigned HOST_WIDE_INT not_to_clear_mask;
+
+  if (RECORD_OR_UNION_TYPE_P (arg_type))
+    {
+      not_to_clear_mask
+	= comp_not_to_clear_mask_str_un (arg_type, &regno,
+					 padding_bits_to_clear, 0,
+					 &last_used_bit);
+
+
+      /* If the 'last_used_bit' is not zero, that means we are still using a
+	 part of the last 'regno'.  In such cases we must clear the trailing
+	 bits.  Otherwise we are not using regno and we should mark it as to
+	 clear.  */
+      if (last_used_bit != 0)
+	padding_bits_to_clear[regno]
+	  |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
+      else
+	not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
+    }
+  else
+    {
+      not_to_clear_mask = 0;
+      /* We are not dealing with structs nor unions.  So these arguments may be
+	 passed in floating point registers too.  In some cases a BLKmode is
+	 used when returning or passing arguments in multiple VFP registers.  */
+      if (GET_MODE (arg_rtx) == BLKmode)
+	{
+	  int i, arg_regs;
+	  rtx reg;
+
+	  /* This should really only occur when dealing with the hard-float
+	     ABI.  */
+	  gcc_assert (TARGET_HARD_FLOAT_ABI);
+
+	  for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
+	    {
+	      reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
+	      gcc_assert (REG_P (reg));
+
+	      not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
+
+	      /* If we are dealing with DF mode, make sure we don't
+		 clear either of the registers it addresses.  */
+	      arg_regs = ARM_NUM_REGS (GET_MODE (reg));
+	      if (arg_regs > 1)
+		{
+		  unsigned HOST_WIDE_INT mask;
+		  mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
+		  mask -= HOST_WIDE_INT_1U << REGNO (reg);
+		  not_to_clear_mask |= mask;
+		}
+	    }
+	}
+      else
+	{
+	  /* Otherwise we can rely on the MODE to determine how many registers
+	     are being used by this argument.  */
+	  int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
+	  not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
+	  if (arg_regs > 1)
+	    {
+	      unsigned HOST_WIDE_INT
+	      mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
+	      mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
+	      not_to_clear_mask |= mask;
+	    }
+	}
+    }
+
+  return not_to_clear_mask;
+}
+
 /* Rewrite move insn into subtract of 0 if the condition codes will
    be useful in next conditional jump insn.  */
 
@@ -18720,7 +18993,42 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
 
 	default:
 	  if (IS_CMSE_ENTRY (func_type))
-	    snprintf (instr, sizeof (instr), "bxns%s\t%%|lr", conditional);
+	    {
+	      /* Check if we have to clear the 'GE bits' which is only used if
+		 parallel add and subtraction instructions are available.  */
+	      if (TARGET_INT_SIMD)
+		snprintf (instr, sizeof (instr),
+			  "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
+	      else
+		snprintf (instr, sizeof (instr),
+			  "msr%s\tAPSR_nzcvq, %%|lr", conditional);
+
+	      output_asm_insn (instr, & operand);
+	      if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
+		{
+		  /* Clear the cumulative exception-status bits (0-4,7) and the
+		     condition code bits (28-31) of the FPSCR.  We need to
+		     remember to clear the first scratch register used (IP) and
+		     save and restore the second (r4).  */
+		  snprintf (instr, sizeof (instr), "push\t{%%|r4}");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
+		  output_asm_insn (instr, & operand);
+		  snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
+		  output_asm_insn (instr, & operand);
+		}
+	      snprintf (instr, sizeof (instr), "bxns\t%%|lr");
+	    }
 	  /* Use bx if it's available.  */
 	  else if (arm_arch5 || arm_arch4t)
 	    sprintf (instr, "bx%s\t%%|lr", conditional);
@@ -23027,7 +23335,11 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
 	asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
 
       if (IS_CMSE_ENTRY (arm_current_func_type ()))
-	asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+	{
+	  asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
+		       reg_containing_return_addr);
+	  asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+	}
       else
 	asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
       return;
@@ -23263,7 +23575,18 @@ thumb_exit (FILE *f, int reg_containing_return_addr)
 
   /* Return to caller.  */
   if (IS_CMSE_ENTRY (arm_current_func_type ()))
-    asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+    {
+      /* This is for the cases where LR is not being used to contain the return
+         address.  It may therefore contain information that we might not want
+	 to leak, hence it must be cleared.  The value in R0 will never be a
+	 secret at this point, so it is safe to use it, see the clearing code
+	 in 'cmse_nonsecure_entry_clear_before_return'.  */
+      if (reg_containing_return_addr != LR_REGNUM)
+	asm_fprintf (f, "\tmov\tlr, r0\n");
+
+      asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
+      asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
+    }
   else
     asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
 }
@@ -24130,6 +24453,149 @@ thumb1_expand_prologue (void)
     cfun->machine->lr_save_eliminated = 0;
 }
 
+/* Clear caller saved registers not used to pass return values and leaked
+   condition flags before exiting a cmse_nonsecure_entry function.  */
+
+void
+cmse_nonsecure_entry_clear_before_return (void)
+{
+  uint64_t to_clear_mask[2];
+  uint32_t padding_bits_to_clear = 0;
+  uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
+  int regno, maxregno = IP_REGNUM;
+  tree result_type;
+  rtx result_rtl;
+
+  to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
+  to_clear_mask[0] |= (1ULL << IP_REGNUM);
+
+  /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
+     registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
+     to make sure the instructions used to clear them are present.  */
+  if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
+    {
+      uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
+      maxregno = LAST_VFP_REGNUM;
+
+      float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
+      to_clear_mask[0] |= float_mask;
+
+      float_mask = (1ULL << (maxregno - 63)) - 1;
+      to_clear_mask[1] = float_mask;
+
+      /* Make sure we don't clear the two scratch registers used to clear the
+	 relevant FPSCR bits in output_return_instruction.  */
+      emit_use (gen_rtx_REG (SImode, IP_REGNUM));
+      to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
+      emit_use (gen_rtx_REG (SImode, 4));
+      to_clear_mask[0] &= ~(1ULL << 4);
+    }
+
+  /* If the user has defined registers to be caller saved, these are no longer
+     restored by the function before returning and must thus be cleared for
+     security purposes.  */
+  for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
+    {
+      /* We do not touch registers that can be used to pass arguments as per
+	 the AAPCS, since these should never be made callee-saved by user
+	 options.  */
+      if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
+	continue;
+      if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
+	continue;
+      if (call_used_regs[regno])
+	to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
+    }
+
+  /* Make sure we do not clear the registers used to return the result in.  */
+  result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
+  if (!VOID_TYPE_P (result_type))
+    {
+      result_rtl = arm_function_value (result_type, current_function_decl, 0);
+
+      /* No need to check that we return in registers, because we don't
+	 support returning on stack yet.  */
+      to_clear_mask[0]
+	&= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
+				       padding_bits_to_clear_ptr);
+    }
+
+  if (padding_bits_to_clear != 0)
+    {
+      rtx reg_rtx;
+      /* Padding bits to clear is not 0 so we know we are dealing with
+	 returning a composite type, which only uses r0.  Let's make sure that
+	 r1-r3 is cleared too, we will use r1 as a scratch register.  */
+      gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
+
+      reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
+
+      /* Fill the lower half of the negated padding_bits_to_clear.  */
+      emit_move_insn (reg_rtx,
+		      GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
+
+      /* Also fill the top half of the negated padding_bits_to_clear.  */
+      if (((~padding_bits_to_clear) >> 16) > 0)
+	emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
+						      GEN_INT (16),
+						      GEN_INT (16)),
+				GEN_INT ((~padding_bits_to_clear) >> 16)));
+
+      emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
+			   gen_rtx_REG (SImode, R0_REGNUM),
+			   reg_rtx));
+    }
+
+  for (regno = R0_REGNUM; regno <= maxregno; regno++)
+    {
+      if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
+	continue;
+
+      if (IS_VFP_REGNUM (regno))
+	{
+	  /* If regno is an even vfp register and its successor is also to
+	     be cleared, use vmov.  */
+	  if (TARGET_VFP_DOUBLE
+	      && VFP_REGNO_OK_FOR_DOUBLE (regno)
+	      && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
+	    {
+	      emit_move_insn (gen_rtx_REG (DFmode, regno),
+			      CONST1_RTX (DFmode));
+	      emit_use (gen_rtx_REG (DFmode, regno));
+	      regno++;
+	    }
+	  else
+	    {
+	      emit_move_insn (gen_rtx_REG (SFmode, regno),
+			      CONST1_RTX (SFmode));
+	      emit_use (gen_rtx_REG (SFmode, regno));
+	    }
+	}
+      else
+	{
+	  if (TARGET_THUMB1)
+	    {
+	      if (regno == R0_REGNUM)
+		emit_move_insn (gen_rtx_REG (SImode, regno),
+				const0_rtx);
+	      else
+		/* R0 has either been cleared before, see code above, or it
+		   holds a return value, either way it is not secret
+		   information.  */
+		emit_move_insn (gen_rtx_REG (SImode, regno),
+				gen_rtx_REG (SImode, R0_REGNUM));
+	      emit_use (gen_rtx_REG (SImode, regno));
+	    }
+	  else
+	    {
+	      emit_move_insn (gen_rtx_REG (SImode, regno),
+			      gen_rtx_REG (SImode, LR_REGNUM));
+	      emit_use (gen_rtx_REG (SImode, regno));
+	    }
+	}
+    }
+}
+
 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
    POP instruction can be generated.  LR should be replaced by PC.  All
    the checks required are already done by  USE_RETURN_INSN ().  Hence,
@@ -24179,6 +24645,8 @@ thumb2_expand_return (bool simple_return)
     }
   else
     {
+      if (IS_CMSE_ENTRY (arm_current_func_type ()))
+	cmse_nonsecure_entry_clear_before_return ();
       emit_jump_insn (simple_return_rtx);
     }
 }
@@ -24237,6 +24705,10 @@ thumb1_expand_epilogue (void)
 
   if (! df_regs_ever_live_p (LR_REGNUM))
     emit_use (gen_rtx_REG (SImode, LR_REGNUM));
+
+  /* Clear all caller-saved regs that are not used to return.  */
+  if (IS_CMSE_ENTRY (arm_current_func_type ()))
+    cmse_nonsecure_entry_clear_before_return ();
 }
 
 /* Epilogue code for APCS frame.  */
@@ -24671,6 +25143,14 @@ arm_expand_epilogue (bool really_return)
 				   stack_pointer_rtx, stack_pointer_rtx);
     }
 
+    /* Clear all caller-saved regs that are not used to return.  */
+    if (IS_CMSE_ENTRY (arm_current_func_type ()))
+      {
+	/* CMSE_ENTRY always returns.  */
+	gcc_assert (really_return);
+	cmse_nonsecure_entry_clear_before_return ();
+      }
+
   if (!really_return)
     return;
 
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index 5f0dffb..73a7381 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -1869,8 +1869,13 @@
   "*
     return thumb1_unexpanded_epilogue ();
   "
-  ; Length is absolute worst case
-  [(set_attr "length" "44")
+  ; Length is absolute worst case, when using CMSE and if this is an entry
+  ; function an extra 4 (MSR) bytes will be added.
+  [(set (attr "length")
+	(if_then_else
+	 (match_test "IS_CMSE_ENTRY (arm_current_func_type ())")
+	 (const_int 48)
+	 (const_int 44)))
    (set_attr "type" "block")
    ;; We don't clobber the conditions, but the potential length of this
    ;; operation is sufficient to make conditionalizing the sequence
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index affcd83..9029a2f 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1114,12 +1114,31 @@
 
 (define_insn "*thumb2_return"
   [(simple_return)]
-  "TARGET_THUMB2"
+  "TARGET_THUMB2 && !IS_CMSE_ENTRY (arm_current_func_type ())"
   "* return output_return_instruction (const_true_rtx, true, false, true);"
   [(set_attr "type" "branch")
    (set_attr "length" "4")]
 )
 
+(define_insn "*thumb2_cmse_entry_return"
+  [(simple_return)]
+  "TARGET_THUMB2 && IS_CMSE_ENTRY (arm_current_func_type ())"
+  "* return output_return_instruction (const_true_rtx, true, false, true);"
+  [(set_attr "type" "branch")
+   ; This is a return from a cmse_nonsecure_entry function so code will be
+   ; added to clear the APSR and potentially the FPSCR if VFP is available, so
+   ; we adapt the length accordingly.
+   (set (attr "length")
+     (if_then_else (match_test "TARGET_HARD_FLOAT")
+      (const_int 12)
+      (const_int 8)))
+   ; We do not support predicate execution of returns from cmse_nonsecure_entry
+   ; functions because we need to clear the APSR.  Since predicable has to be
+   ; a constant, we had to duplicate the thumb2_return pattern for CMSE entry
+   ; functions.
+   (set_attr "predicable" "no")]
+)
+
 (define_insn_and_split "thumb2_eh_return"
   [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
 		    VUNSPEC_EH_RETURN)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e31a1b3..7cb1616 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,22 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* gcc.target/arm/cmse/cmse.exp: Test different multilibs separate.
+	* gcc.target/arm/cmse/struct-1.c: New.
+	* gcc.target/arm/cmse/bitfield-1.c: New.
+	* gcc.target/arm/cmse/bitfield-2.c: New.
+	* gcc.target/arm/cmse/bitfield-3.c: New.
+	* gcc.target/arm/cmse/baseline/cmse-2.c: New.
+	* gcc.target/arm/cmse/baseline/softfp.c: New.
+	* gcc.target/arm/cmse/mainline/soft/cmse-5.c: New.
+	* gcc.target/arm/cmse/mainline/hard/cmse-5.c: New.
+	* gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c: New.
+	* gcc.target/arm/cmse/mainline/softfp/cmse-5.c: New.
+	* gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c: New.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* gcc.target/arm/cmse/cmse-4.c: New.
 	* gcc.target/arm/cmse/cmse-9.c: New.
 	* gcc.target/arm/cmse/cmse-10.c: New.
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
new file mode 100644
index 0000000..814502d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-2.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_base_ok } */
+/* { dg-add-options arm_arch_v8m_base } */
+/* { dg-options "-mcmse" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "movs\tr1, r0" } } */
+/* { dg-final { scan-assembler "movs\tr2, r0" } } */
+/* { dg-final { scan-assembler "movs\tr3, r0" } } */
+/* { dg-final { scan-assembler "mov\tip, r0" } } */
+/* { dg-final { scan-assembler "mov\tlr, r0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq," } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c
new file mode 100644
index 0000000..0069fcd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/softfp.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_base_ok } */
+/* { dg-add-options arm_arch_v8m_base } */
+/* { dg-options "-mcmse -mfloat-abi=softfp" } */
+
+double __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
+
+double
+foo (double a)
+{
+  return bar (1.0f, 2.0) + a;
+}
+
+float __attribute__ ((cmse_nonsecure_entry))
+baz (float a, double b)
+{
+  return (float) bar (a, b);
+}
+
+/* Make sure we are not using FP instructions, since ARMv8-M Baseline does not
+   support such instructions.  */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+/* { dg-final { scan-assembler-not "vmrs" } } */
+
+/* Just double checking that we are still doing cmse though.  */
+/* { dg-final { scan-assembler-not "vmrs" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c b/gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c
new file mode 100644
index 0000000..fccc51d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  unsigned short  a : 6;
+  unsigned char	  b : 3;
+  unsigned char	  c;
+  unsigned short  d : 8;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = 63u;
+  t.b = 7u;
+  t.c = 255u;
+  t.d = 255u;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != 63u
+      || t.b != 7u
+      || t.c != 255u
+      || t.d != 255u)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tr1, #1855" } } */
+/* { dg-final { scan-assembler "movt\tr1, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c b/gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c
new file mode 100644
index 0000000..e6aee3c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-2.c
@@ -0,0 +1,36 @@
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  short	      a : 7;
+  signed char b : 3;
+  short	      c : 11;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = -64;
+  t.b = -4 ;
+  t.c = -1024;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != -64
+      || t.b != -4
+      || t.c != -1024)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tr1, #1919" } } */
+/* { dg-final { scan-assembler "movt\tr1, 2047" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c b/gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c
new file mode 100644
index 0000000..285a2b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/bitfield-3.c
@@ -0,0 +1,37 @@
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  short	      a;
+  signed char b : 2;
+  short		: 1;
+  signed char c : 3;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = -32768;
+  t.b = -2;
+  t.c = -4;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != -32768
+      || t.b != -2
+      || t.c != -4)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tr1, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr1, 63" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse.exp b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
index f797dba..38f1841 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
@@ -43,6 +43,26 @@ set LTO_TORTURE_OPTIONS ""
 gcc-dg-runtest [lsort [glob $srcdir/$subdir/*.c]] \
 	"" $DEFAULT_CFLAGS
 
+if {[check_effective_target_arm_arch_v8m_base_ok]} then {
+    # Baseline only
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/baseline/*.c]] \
+	    "" $DEFAULT_CFLAGS
+}
+
+if {[check_effective_target_arm_arch_v8m_main_ok]} then {
+    # Mainline -mfloat-abi=soft
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/soft/*.c]] \
+	    "-mfloat-abi=soft" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/softfp/*.c]] \
+	    "" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/softfp-sp/*.c]] \
+	    "" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/hard/*.c]] \
+	    "" $DEFAULT_CFLAGS
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/hard-sp/*.c]] \
+	    "" $DEFAULT_CFLAGS
+}
+
 set LTO_TORTURE_OPTIONS ${saved-lto_torture_options}
 set dg-do-what-default ${saved-dg-do-what-default}
 
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
new file mode 100644
index 0000000..88dec27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-5.c
@@ -0,0 +1,45 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler-not "vmov\.f32\ts0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts7, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts8, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts9, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts10, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts11, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts12, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
new file mode 100644
index 0000000..29f60ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-5.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler-not "vmov\.f32\ts0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
new file mode 100644
index 0000000..a7229ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-5.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
+/* { dg-options "-mcmse -mfloat-abi=soft" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
new file mode 100644
index 0000000..7734d77
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-5.c
@@ -0,0 +1,46 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "__acle_se_foo:" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts7, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts8, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts9, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts10, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts11, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts12, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts13, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts14, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f32\ts15, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
new file mode 100644
index 0000000..6addaa1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-5.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" }  */
+
+extern float bar (void);
+
+float __attribute__ ((cmse_nonsecure_entry))
+foo (void)
+{
+  return bar ();
+}
+/* { dg-final { scan-assembler "__acle_se_foo:" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, lr" } } */
+/* { dg-final { scan-assembler "mov\tr1, lr" } } */
+/* { dg-final { scan-assembler "mov\tr2, lr" } } */
+/* { dg-final { scan-assembler "mov\tr3, lr" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td0, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td1, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td2, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td3, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td4, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td5, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td6, #1\.0" } } */
+/* { dg-final { scan-assembler "vmov\.f64\td7, #1\.0" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq, lr" { target { arm_arch_v8m_main_ok && { ! arm_dsp } } } } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvqg, lr" { target { arm_arch_v8m_main_ok && arm_dsp } } } } */
+/* { dg-final { scan-assembler "push\t{r4}" } } */
+/* { dg-final { scan-assembler "vmrs\tip, fpscr" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65376" } } */
+/* { dg-final { scan-assembler "movt\tr4, #4095" } } */
+/* { dg-final { scan-assembler "and\tip, r4" } } */
+/* { dg-final { scan-assembler "vmsr\tfpscr, ip" } } */
+/* { dg-final { scan-assembler "pop\t{r4}" } } */
+/* { dg-final { scan-assembler "mov\tip, lr" } } */
+/* { dg-final { scan-assembler "bxns" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/struct-1.c b/gcc/testsuite/gcc.target/arm/cmse/struct-1.c
new file mode 100644
index 0000000..2d366a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/struct-1.c
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-options "--save-temps -mcmse -Wl,--section-start,.gnu.sgstubs=0x20400000" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned short  b;
+} test_st;
+
+test_st __attribute__ ((cmse_nonsecure_entry)) foo (void)
+{
+  test_st t;
+  t.a = 255u;
+  t.b = 32767u;
+  return t;
+}
+
+int
+main (void)
+{
+  test_st t;
+  t = foo ();
+  if (t.a != 255u || t.b != 32767u)
+    __builtin_abort ();
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movs\tr1, #255" } } */
+/* { dg-final { scan-assembler "movt\tr1, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr0(, r0)?, r1" } } */
+/* { dg-final { scan-assembler "bxns" } } */
+
+
-- 
cgit v1.1


From 32ce1e4f244830404328e5a45d062c2f5bee662d Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Fri, 2 Dec 2016 15:30:37 +0000
Subject: Handling ARMv8-M Security Extension's cmse_nonsecure_call attribute

    gcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	        Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/arm.c (gimplify.h): New include.
	(arm_handle_cmse_nonsecure_call): New.
	(arm_attribute_table): Added cmse_nonsecure_call.
	(arm_comp_type_attributes): Deny compatibility of function types
	with without the cmse_nonsecure_call attribute.
	* doc/extend.texi (ARM ARMv8-M Security Extensions): New attribute.

    gcc/testsuite/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	        Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* gcc.target/arm/cmse/cmse-3.c: Add tests.
	* gcc.target/arm/cmse/cmse-4.c: Add tests.
	* gcc.target/arm/cmse/cmse-15.c: New.


Co-Authored-By: Thomas Preud'homme <thomas.preudhomme@arm.com>

From-SVN: r243191
---
 gcc/ChangeLog                               | 10 ++++
 gcc/config/arm/arm.c                        | 84 +++++++++++++++++++++++++++++
 gcc/doc/extend.texi                         |  4 +-
 gcc/testsuite/ChangeLog                     |  7 +++
 gcc/testsuite/gcc.target/arm/cmse/cmse-15.c | 72 +++++++++++++++++++++++++
 gcc/testsuite/gcc.target/arm/cmse/cmse-3.c  |  8 +++
 gcc/testsuite/gcc.target/arm/cmse/cmse-4.c  |  7 +++
 7 files changed, 190 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-15.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bb1e524..ce79fdd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,16 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* config/arm/arm.c (gimplify.h): New include.
+	(arm_handle_cmse_nonsecure_call): New.
+	(arm_attribute_table): Added cmse_nonsecure_call.
+	(arm_comp_type_attributes): Deny compatibility of function types
+	with without the cmse_nonsecure_call attribute.
+	* doc/extend.texi (ARM ARMv8-M Security Extensions): New attribute.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* config/arm/arm.c (output_return_instruction): Clear
 	registers.
 	(thumb2_expand_return): Likewise.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 6a9db85..a6b07b2 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -63,6 +63,7 @@
 #include "tm-constrs.h"
 #include "rtl-iter.h"
 #include "optabs-libfuncs.h"
+#include "gimplify.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -137,6 +138,7 @@ static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
 #endif
 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
+static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
 static int arm_comp_type_attributes (const_tree, const_tree);
@@ -348,6 +350,8 @@ static const struct attribute_spec arm_attribute_table[] =
   /* ARMv8-M Security Extensions support.  */
   { "cmse_nonsecure_entry", 0, 0, true, false, false,
     arm_handle_cmse_nonsecure_entry, false },
+  { "cmse_nonsecure_call", 0, 0, true, false, false,
+    arm_handle_cmse_nonsecure_call, true },
   { NULL,           0, 0, false, false, false, NULL, false }
 };
 
@@ -6753,6 +6757,78 @@ arm_handle_cmse_nonsecure_entry (tree *node, tree name,
   return NULL_TREE;
 }
 
+
+/* Called upon detection of the use of the cmse_nonsecure_call attribute, this
+   function will check whether the attribute is allowed here and will add the
+   attribute to the function type tree or otherwise issue a diagnostic.  The
+   reason we check this at declaration time is to only allow the use of the
+   attribute with declarations of function pointers and not function
+   declarations.  This function checks NODE is of the expected type and issues
+   diagnostics otherwise using NAME.  If it is not of the expected type
+   *NO_ADD_ATTRS will be set to true.  */
+
+static tree
+arm_handle_cmse_nonsecure_call (tree *node, tree name,
+				 tree /* args */,
+				 int /* flags */,
+				 bool *no_add_attrs)
+{
+  tree decl = NULL_TREE, fntype = NULL_TREE;
+  tree main_variant, type;
+
+  if (!use_cmse)
+    {
+      *no_add_attrs = true;
+      warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
+	       name);
+      return NULL_TREE;
+    }
+
+  if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
+    {
+      decl = *node;
+      fntype = TREE_TYPE (decl);
+    }
+
+  while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
+    fntype = TREE_TYPE (fntype);
+
+  if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
+    {
+	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
+		 "function pointer", name);
+	*no_add_attrs = true;
+	return NULL_TREE;
+    }
+
+  *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
+
+  if (*no_add_attrs)
+    return NULL_TREE;
+
+  /* Prevent trees being shared among function types with and without
+     cmse_nonsecure_call attribute.  */
+  type = TREE_TYPE (decl);
+
+  type = build_distinct_type_copy (type);
+  TREE_TYPE (decl) = type;
+  fntype = type;
+
+  while (TREE_CODE (fntype) != FUNCTION_TYPE)
+    {
+      type = fntype;
+      fntype = TREE_TYPE (fntype);
+      fntype = build_distinct_type_copy (fntype);
+      TREE_TYPE (type) = fntype;
+    }
+
+  /* Construct a type attribute and add it to the function type.  */
+  tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
+			  TYPE_ATTRIBUTES (fntype));
+  TYPE_ATTRIBUTES (fntype) = attrs;
+  return NULL_TREE;
+}
+
 /* Return 0 if the attributes for two types are incompatible, 1 if they
    are compatible, and 2 if they are nearly compatible (which causes a
    warning to be generated).  */
@@ -6793,6 +6869,14 @@ arm_comp_type_attributes (const_tree type1, const_tree type2)
   if (l1 != l2)
     return 0;
 
+  l1 = lookup_attribute ("cmse_nonsecure_call",
+			 TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("cmse_nonsecure_call",
+			 TYPE_ATTRIBUTES (type2)) != NULL;
+
+  if (l1 != l2)
+    return 0;
+
   return 1;
 }
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index d72af3e..0fa59ff 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12648,8 +12648,8 @@ Security Extensions: Requiremenets on Development Tools Engineering
 Specification, which can be found at
 @uref{http://infocenter.arm.com/help/topic/com.arm.doc.ecm0359818/ECM0359818_armv8m_security_extensions_reqs_on_dev_tools_1_0.pdf}.
 
-As part of the Security Extensions GCC implements a new function attribute
-@code{cmse_nonsecure_entry}.
+As part of the Security Extensions GCC implements two new function attributes:
+@code{cmse_nonsecure_entry} and @code{cmse_nonsecure_call}.
 
 As part of the Security Extensions GCC implements the intrinsics below.  FPTR
 is used here to mean any function pointer type.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7cb1616..ca431e4 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,13 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* gcc.target/arm/cmse/cmse-3.c: Add tests.
+	* gcc.target/arm/cmse/cmse-4.c: Add tests.
+	* gcc.target/arm/cmse/cmse-15.c: New.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* gcc.target/arm/cmse/cmse.exp: Test different multilibs separate.
 	* gcc.target/arm/cmse/struct-1.c: New.
 	* gcc.target/arm/cmse/bitfield-1.c: New.
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
new file mode 100644
index 0000000..4e9ace1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-15.c
@@ -0,0 +1,72 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+int __attribute__ ((cmse_nonsecure_call)) (*ns_foo) (void);
+int (*s_bar) (void);
+int __attribute__ ((cmse_nonsecure_call)) (**ns_foo2) (void);
+int (**s_bar2) (void);
+
+typedef int __attribute__ ((cmse_nonsecure_call)) ns_foo_t (void);
+typedef int s_bar_t (void);
+typedef int __attribute__ ((cmse_nonsecure_call)) (* ns_foo_ptr) (void);
+typedef int (*s_bar_ptr) (void);
+
+int nonsecure0 (ns_foo_t * ns_foo_p)
+{
+  return ns_foo_p ();
+}
+
+int nonsecure1 (ns_foo_t ** ns_foo_p)
+{
+  return (*ns_foo_p) ();
+}
+
+int nonsecure2 (ns_foo_ptr ns_foo_p)
+{
+  return ns_foo_p ();
+}
+int nonsecure3 (ns_foo_ptr * ns_foo_p)
+{
+  return (*ns_foo_p) ();
+}
+
+int secure0 (s_bar_t * s_bar_p)
+{
+  return s_bar_p ();
+}
+
+int secure1 (s_bar_t ** s_bar_p)
+{
+  return (*s_bar_p) ();
+}
+
+int secure2 (s_bar_ptr s_bar_p)
+{
+  return s_bar_p ();
+}
+
+int secure3 (s_bar_ptr * s_bar_p)
+{
+  return (*s_bar_p) ();
+}
+
+int nonsecure4 (void)
+{
+  return ns_foo ();
+}
+
+int nonsecure5 (void)
+{
+  return (*ns_foo2) ();
+}
+
+int secure4 (void)
+{
+  return s_bar ();
+}
+
+int secure5 (void)
+{
+  return (*s_bar2) ();
+}
+/* { dg-final { scan-assembler-times "bl\\s+__gnu_cmse_nonsecure_call" 6 } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c
index 2c2920e..7f92a4c 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-3.c
@@ -35,3 +35,11 @@ norf (struct span2 a) {}
 
 void __attribute__ ((cmse_nonsecure_entry))
 foo2 (long long a, int b, union test_union c) {} /* { dg-error "not available to functions with arguments passed on the stack" } */
+
+typedef void __attribute__ ((cmse_nonsecure_call)) bar2 (long long a, int b, long long c); /* { dg-error "not available to functions with arguments passed on the stack" } */
+
+typedef void __attribute__ ((cmse_nonsecure_call)) baz2 (long long a, int b, struct span c); /* { dg-error "not available to functions with arguments passed on the stack" } */
+
+typedef struct span __attribute__ ((cmse_nonsecure_call)) qux2 (void); /* { dg-error "not available to functions that return value on the stack" } */
+
+typedef void __attribute__ ((cmse_nonsecure_call)) norf2 (int a, ...); /* { dg-error "not available to functions with variable number of arguments" } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c
index 6f930ab..d0999a4 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-4.c
@@ -19,9 +19,16 @@ baz (void)
   return qux ();
 }
 
+void __attribute__ ((cmse_nonsecure_call))
+quux (void) {} /* { dg-warning "attribute only applies to base type of a function pointer" } */
+
+int __attribute__ ((cmse_nonsecure_call)) norf; /* { dg-warning "attribute only applies to base type of a function pointer" } */
+
 /* { dg-final { scan-assembler-times "bxns" 2 } } */
 /* { dg-final { scan-assembler "foo:" } } */
 /* { dg-final { scan-assembler "__acle_se_foo:" } } */
 /* { dg-final { scan-assembler-not "__acle_se_bar:" } } */
 /* { dg-final { scan-assembler "baz:" } } */
 /* { dg-final { scan-assembler "__acle_se_baz:" } } */
+/* { dg-final { scan-assembler-not "__acle_se_quux:" } } */
+/* { dg-final { scan-assembler-not "__acle_se_norf:" } } */
-- 
cgit v1.1


From c92e08e3d766baf88c7507cd5224d4d241ff8d39 Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Fri, 2 Dec 2016 15:33:26 +0000
Subject: ARMv8-M Security Extension's cmse_nonsecure_call: use
 __gnu_cmse_nonsecure_call

    gcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
		Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/arm.c (detect_cmse_nonsecure_call): New.
	(cmse_nonsecure_call_clear_caller_saved): New.
	(arm_reorg): Use cmse_nonsecure_call_clear_caller_saved.
	(arm_function_ok_for_sibcall): Disable sibcalls for
	cmse_nonsecure_call.
	* config/arm/arm-protos.h (detect_cmse_nonsecure_call): New.
	* config/arm/arm.md (call): Handle cmse_nonsecure_entry.
	(call_value): Likewise.
	(nonsecure_call_internal): New.
	(nonsecure_call_value_internal): New.
	* config/arm/thumb1.md (*nonsecure_call_reg_thumb1_v5): New.
	(*nonsecure_call_value_reg_thumb1_v5): New.
	* config/arm/thumb2.md (*nonsecure_call_reg_thumb2): New.
	(*nonsecure_call_value_reg_thumb2): New.
	* config/arm/unspecs.md (UNSPEC_NONSECURE_MEM): New.

    libgcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
	       Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/cmse_nonsecure_call.S: New.
	* config/arm/t-arm: Compile cmse_nonsecure_call.S

    gcc/testsuite/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
		Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* gcc.target/arm/cmse/cmse.exp: Run tests in mainline dir.
	* gcc.target/arm/cmse/cmse-9.c: Added some extra tests.
	* gcc.target/arm/cmse/cmse-14.c: New.
	* gcc.target/arm/cmse/baseline/bitfield-4.c: New.
	* gcc.target/arm/cmse/baseline/bitfield-5.c: New.
	* gcc.target/arm/cmse/baseline/bitfield-6.c: New.
	* gcc.target/arm/cmse/baseline/bitfield-7.c: New.
	* gcc.target/arm/cmse/baseline/bitfield-8.c: New.
	* gcc.target/arm/cmse/baseline/bitfield-9.c: New.
	* gcc.target/arm/cmse/baseline/bitfield-and-union-1.c: New.
	* gcc.target/arm/cmse/baseline/cmse-11.c: New.
	* gcc.target/arm/cmse/baseline/cmse-13.c: New.
	* gcc.target/arm/cmse/baseline/cmse-6.c: New.
	* gcc.target/arm/cmse/baseline/union-1.c: New.
	* gcc.target/arm/cmse/baseline/union-2.c: New.
	* gcc.target/arm/cmse/mainline/bitfield-4.c: New.
	* gcc.target/arm/cmse/mainline/bitfield-5.c: New.
	* gcc.target/arm/cmse/mainline/bitfield-6.c: New.
	* gcc.target/arm/cmse/mainline/bitfield-7.c: New.
	* gcc.target/arm/cmse/mainline/bitfield-8.c: New.
	* gcc.target/arm/cmse/mainline/bitfield-9.c: New.
	* gcc.target/arm/cmse/mainline/bitfield-and-union-1.c: New.
	* gcc.target/arm/cmse/mainline/union-1.c: New.
	* gcc.target/arm/cmse/mainline/union-2.c: New.
	* gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c: New.
	* gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c: New.
	* gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c: New.
	* gcc.target/arm/cmse/mainline/hard/cmse-13.c: New.
	* gcc.target/arm/cmse/mainline/hard/cmse-7.c: New.
	* gcc.target/arm/cmse/mainline/hard/cmse-8.c: New.
	* gcc.target/arm/cmse/mainline/soft/cmse-13.c: New.
	* gcc.target/arm/cmse/mainline/soft/cmse-7.c: New.
	* gcc.target/arm/cmse/mainline/soft/cmse-8.c: New.
	* gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c: New.
	* gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c: New.
	* gcc.target/arm/cmse/mainline/softfp/cmse-13.c: New.
	* gcc.target/arm/cmse/mainline/softfp/cmse-7.c: New.
	* gcc.target/arm/cmse/mainline/softfp/cmse-8.c: New.


Co-Authored-By: Thomas Preud'homme <thomas.preudhomme@arm.com>

From-SVN: r243192
---
 gcc/ChangeLog                                      |  19 ++
 gcc/config/arm/arm-protos.h                        |   1 +
 gcc/config/arm/arm.c                               | 219 +++++++++++++++++++++
 gcc/config/arm/arm.md                              |  67 ++++++-
 gcc/config/arm/thumb1.md                           |  28 +++
 gcc/config/arm/thumb2.md                           |  28 +++
 gcc/config/arm/unspecs.md                          |   2 +
 gcc/testsuite/ChangeLog                            |  42 ++++
 .../gcc.target/arm/cmse/baseline/bitfield-4.c      |  57 ++++++
 .../gcc.target/arm/cmse/baseline/bitfield-5.c      |  53 +++++
 .../gcc.target/arm/cmse/baseline/bitfield-6.c      |  63 ++++++
 .../gcc.target/arm/cmse/baseline/bitfield-7.c      |  54 +++++
 .../gcc.target/arm/cmse/baseline/bitfield-8.c      |  57 ++++++
 .../gcc.target/arm/cmse/baseline/bitfield-9.c      |  56 ++++++
 .../arm/cmse/baseline/bitfield-and-union-1.c       |  96 +++++++++
 .../gcc.target/arm/cmse/baseline/cmse-11.c         |  22 +++
 .../gcc.target/arm/cmse/baseline/cmse-13.c         |  25 +++
 .../gcc.target/arm/cmse/baseline/cmse-6.c          |  21 ++
 .../gcc.target/arm/cmse/baseline/union-1.c         |  71 +++++++
 .../gcc.target/arm/cmse/baseline/union-2.c         |  86 ++++++++
 gcc/testsuite/gcc.target/arm/cmse/cmse-14.c        |  13 ++
 gcc/testsuite/gcc.target/arm/cmse/cmse-9.c         |  10 +-
 gcc/testsuite/gcc.target/arm/cmse/cmse.exp         |   2 +
 .../gcc.target/arm/cmse/mainline/bitfield-4.c      |  55 ++++++
 .../gcc.target/arm/cmse/mainline/bitfield-5.c      |  51 +++++
 .../gcc.target/arm/cmse/mainline/bitfield-6.c      |  61 ++++++
 .../gcc.target/arm/cmse/mainline/bitfield-7.c      |  52 +++++
 .../gcc.target/arm/cmse/mainline/bitfield-8.c      |  55 ++++++
 .../gcc.target/arm/cmse/mainline/bitfield-9.c      |  54 +++++
 .../arm/cmse/mainline/bitfield-and-union-1.c       |  94 +++++++++
 .../gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c |  43 ++++
 .../gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c  |  42 ++++
 .../gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c  |  41 ++++
 .../gcc.target/arm/cmse/mainline/hard/cmse-13.c    |  38 ++++
 .../gcc.target/arm/cmse/mainline/hard/cmse-7.c     |  34 ++++
 .../gcc.target/arm/cmse/mainline/hard/cmse-8.c     |  33 ++++
 .../gcc.target/arm/cmse/mainline/soft/cmse-13.c    |  27 +++
 .../gcc.target/arm/cmse/mainline/soft/cmse-7.c     |  27 +++
 .../gcc.target/arm/cmse/mainline/soft/cmse-8.c     |  26 +++
 .../arm/cmse/mainline/softfp-sp/cmse-7.c           |  26 +++
 .../arm/cmse/mainline/softfp-sp/cmse-8.c           |  25 +++
 .../gcc.target/arm/cmse/mainline/softfp/cmse-13.c  |  25 +++
 .../gcc.target/arm/cmse/mainline/softfp/cmse-7.c   |  26 +++
 .../gcc.target/arm/cmse/mainline/softfp/cmse-8.c   |  25 +++
 .../gcc.target/arm/cmse/mainline/union-1.c         |  69 +++++++
 .../gcc.target/arm/cmse/mainline/union-2.c         |  84 ++++++++
 libgcc/ChangeLog                                   |   6 +
 libgcc/config/arm/cmse_nonsecure_call.S            | 131 ++++++++++++
 libgcc/config/arm/t-arm                            |   2 +
 49 files changed, 2238 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/cmse-14.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c
 create mode 100644 gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c
 create mode 100644 libgcc/config/arm/cmse_nonsecure_call.S

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ce79fdd..807d406 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,25 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* config/arm/arm.c (detect_cmse_nonsecure_call): New.
+	(cmse_nonsecure_call_clear_caller_saved): New.
+	(arm_reorg): Use cmse_nonsecure_call_clear_caller_saved.
+	(arm_function_ok_for_sibcall): Disable sibcalls for
+	cmse_nonsecure_call.
+	* config/arm/arm-protos.h (detect_cmse_nonsecure_call): New.
+	* config/arm/arm.md (call): Handle cmse_nonsecure_entry.
+	(call_value): Likewise.
+	(nonsecure_call_internal): New.
+	(nonsecure_call_value_internal): New.
+	* config/arm/thumb1.md (*nonsecure_call_reg_thumb1_v5): New.
+	(*nonsecure_call_value_reg_thumb1_v5): New.
+	* config/arm/thumb2.md (*nonsecure_call_reg_thumb2): New.
+	(*nonsecure_call_value_reg_thumb2): New.
+	* config/arm/unspecs.md (UNSPEC_NONSECURE_MEM): New.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* config/arm/arm.c (gimplify.h): New include.
 	(arm_handle_cmse_nonsecure_call): New.
 	(arm_attribute_table): Added cmse_nonsecure_call.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 634a5de..05d73ab 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -137,6 +137,7 @@ extern int arm_const_double_inline_cost (rtx);
 extern bool arm_const_double_by_parts (rtx);
 extern bool arm_const_double_by_immediates (rtx);
 extern void arm_emit_call_insn (rtx, rtx, bool);
+bool detect_cmse_nonsecure_call (tree);
 extern const char *output_call (rtx *);
 void arm_emit_movpair (rtx, rtx);
 extern const char *output_mov_long_double_arm_from_arm (rtx *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index a6b07b2..f1df3a0 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -7000,6 +7000,15 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
   if (IS_CMSE_ENTRY (arm_current_func_type ()))
     return false;
 
+  /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
+     this would complicate matters for later code generation.  */
+  if (TREE_CODE (exp) == CALL_EXPR)
+    {
+      tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
+      if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
+	return false;
+    }
+
   if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
     {
       /* Check that the return value locations are the same.  For
@@ -16654,6 +16663,197 @@ compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
   return not_to_clear_mask;
 }
 
+/* Saves callee saved registers, clears callee saved registers and caller saved
+   registers not used to pass arguments before a cmse_nonsecure_call.  And
+   restores the callee saved registers after.  */
+
+static void
+cmse_nonsecure_call_clear_caller_saved (void)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      rtx_insn *insn;
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  uint64_t to_clear_mask, float_mask;
+	  rtx_insn *seq;
+	  rtx pat, call, unspec, reg, cleared_reg, tmp;
+	  unsigned int regno, maxregno;
+	  rtx address;
+	  CUMULATIVE_ARGS args_so_far_v;
+	  cumulative_args_t args_so_far;
+	  tree arg_type, fntype;
+	  bool using_r4, first_param = true;
+	  function_args_iterator args_iter;
+	  uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
+	  uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
+
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  if (!CALL_P (insn))
+	    continue;
+
+	  pat = PATTERN (insn);
+	  gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
+	  call = XVECEXP (pat, 0, 0);
+
+	  /* Get the real call RTX if the insn sets a value, ie. returns.  */
+	  if (GET_CODE (call) == SET)
+	      call = SET_SRC (call);
+
+	  /* Check if it is a cmse_nonsecure_call.  */
+	  unspec = XEXP (call, 0);
+	  if (GET_CODE (unspec) != UNSPEC
+	      || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
+	    continue;
+
+	  /* Determine the caller-saved registers we need to clear.  */
+	  to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
+	  maxregno = NUM_ARG_REGS - 1;
+	  /* Only look at the caller-saved floating point registers in case of
+	     -mfloat-abi=hard.  For -mfloat-abi=softfp we will be using the
+	     lazy store and loads which clear both caller- and callee-saved
+	     registers.  */
+	  if (TARGET_HARD_FLOAT_ABI)
+	    {
+	      float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
+	      float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
+	      to_clear_mask |= float_mask;
+	      maxregno = D7_VFP_REGNUM;
+	    }
+
+	  /* Make sure the register used to hold the function address is not
+	     cleared.  */
+	  address = RTVEC_ELT (XVEC (unspec, 0), 0);
+	  gcc_assert (MEM_P (address));
+	  gcc_assert (REG_P (XEXP (address, 0)));
+	  to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
+
+	  /* Set basic block of call insn so that df rescan is performed on
+	     insns inserted here.  */
+	  set_block_for_insn (insn, bb);
+	  df_set_flags (DF_DEFER_INSN_RESCAN);
+	  start_sequence ();
+
+	  /* Make sure the scheduler doesn't schedule other insns beyond
+	     here.  */
+	  emit_insn (gen_blockage ());
+
+	  /* Walk through all arguments and clear registers appropriately.
+	  */
+	  fntype = TREE_TYPE (MEM_EXPR (address));
+	  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
+				    NULL_TREE);
+	  args_so_far = pack_cumulative_args (&args_so_far_v);
+	  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
+	    {
+	      rtx arg_rtx;
+	      machine_mode arg_mode = TYPE_MODE (arg_type);
+
+	      if (VOID_TYPE_P (arg_type))
+		continue;
+
+	      if (!first_param)
+		arm_function_arg_advance (args_so_far, arg_mode, arg_type,
+					  true);
+
+	      arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
+					  true);
+	      gcc_assert (REG_P (arg_rtx));
+	      to_clear_mask
+		&= ~compute_not_to_clear_mask (arg_type, arg_rtx,
+					       REGNO (arg_rtx),
+					       padding_bits_to_clear_ptr);
+
+	      first_param = false;
+	    }
+
+	  /* Clear padding bits where needed.  */
+	  cleared_reg = XEXP (address, 0);
+	  reg = gen_rtx_REG (SImode, IP_REGNUM);
+	  using_r4 = false;
+	  for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
+	    {
+	      if (padding_bits_to_clear[regno] == 0)
+		continue;
+
+	      /* If this is a Thumb-1 target copy the address of the function
+		 we are calling from 'r4' into 'ip' such that we can use r4 to
+		 clear the unused bits in the arguments.  */
+	      if (TARGET_THUMB1 && !using_r4)
+		{
+		  using_r4 =  true;
+		  reg = cleared_reg;
+		  emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
+					  reg);
+		}
+
+	      tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
+	      emit_move_insn (reg, tmp);
+	      /* Also fill the top half of the negated
+		 padding_bits_to_clear.  */
+	      if (((~padding_bits_to_clear[regno]) >> 16) > 0)
+		{
+		  tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
+		  emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
+								GEN_INT (16),
+								GEN_INT (16)),
+					  tmp));
+		}
+
+	      emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
+				     gen_rtx_REG (SImode, regno),
+				     reg));
+
+	    }
+	  if (using_r4)
+	    emit_move_insn (cleared_reg,
+			    gen_rtx_REG (SImode, IP_REGNUM));
+
+	  /* We use right shift and left shift to clear the LSB of the address
+	     we jump to instead of using bic, to avoid having to use an extra
+	     register on Thumb-1.  */
+	  tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
+	  emit_insn (gen_rtx_SET (cleared_reg, tmp));
+	  tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
+	  emit_insn (gen_rtx_SET (cleared_reg, tmp));
+
+	  /* Clearing all registers that leak before doing a non-secure
+	     call.  */
+	  for (regno = R0_REGNUM; regno <= maxregno; regno++)
+	    {
+	      if (!(to_clear_mask & (1LL << regno)))
+		continue;
+
+	      /* If regno is an even vfp register and its successor is also to
+		 be cleared, use vmov.  */
+	      if (IS_VFP_REGNUM (regno))
+		{
+		  if (TARGET_VFP_DOUBLE
+		      && VFP_REGNO_OK_FOR_DOUBLE (regno)
+		      && to_clear_mask & (1LL << (regno + 1)))
+		    emit_move_insn (gen_rtx_REG (DFmode, regno++),
+				    CONST0_RTX (DFmode));
+		  else
+		    emit_move_insn (gen_rtx_REG (SFmode, regno),
+				    CONST0_RTX (SFmode));
+		}
+	      else
+		emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
+	    }
+
+	  seq = get_insns ();
+	  end_sequence ();
+	  emit_insn_before (seq, insn);
+
+	}
+    }
+}
+
 /* Rewrite move insn into subtract of 0 if the condition codes will
    be useful in next conditional jump insn.  */
 
@@ -16954,6 +17154,8 @@ arm_reorg (void)
   HOST_WIDE_INT address = 0;
   Mfix * fix;
 
+  if (use_cmse)
+    cmse_nonsecure_call_clear_caller_saved ();
   if (TARGET_THUMB1)
     thumb1_reorg ();
   else if (TARGET_THUMB2)
@@ -17326,6 +17528,23 @@ vfp_emit_fstmd (int base_reg, int count)
   return count * 8;
 }
 
+/* Returns true if -mcmse has been passed and the function pointed to by 'addr'
+   has the cmse_nonsecure_call attribute and returns false otherwise.  */
+
+bool
+detect_cmse_nonsecure_call (tree addr)
+{
+  if (!addr)
+    return FALSE;
+
+  tree fntype = TREE_TYPE (addr);
+  if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
+				    TYPE_ATTRIBUTES (fntype)))
+    return TRUE;
+  return FALSE;
+}
+
+
 /* Emit a call instruction with pattern PAT.  ADDR is the address of
    the call target.  */
 
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 5523baf..d561a4b 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -8052,6 +8052,7 @@
   "
   {
     rtx callee, pat;
+    tree addr = MEM_EXPR (operands[0]);
     
     /* In an untyped call, we can get NULL for operand 2.  */
     if (operands[2] == NULL_RTX)
@@ -8066,8 +8067,17 @@
 	: !REG_P (callee))
       XEXP (operands[0], 0) = force_reg (Pmode, callee);
 
-    pat = gen_call_internal (operands[0], operands[1], operands[2]);
-    arm_emit_call_insn (pat, XEXP (operands[0], 0), false);
+    if (detect_cmse_nonsecure_call (addr))
+      {
+	pat = gen_nonsecure_call_internal (operands[0], operands[1],
+					   operands[2]);
+	emit_call_insn (pat);
+      }
+    else
+      {
+	pat = gen_call_internal (operands[0], operands[1], operands[2]);
+	arm_emit_call_insn (pat, XEXP (operands[0], 0), false);
+      }
     DONE;
   }"
 )
@@ -8078,6 +8088,24 @@
 	      (use (match_operand 2 "" ""))
 	      (clobber (reg:SI LR_REGNUM))])])
 
+(define_expand "nonsecure_call_internal"
+  [(parallel [(call (unspec:SI [(match_operand 0 "memory_operand" "")]
+			       UNSPEC_NONSECURE_MEM)
+		    (match_operand 1 "general_operand" ""))
+	      (use (match_operand 2 "" ""))
+	      (clobber (reg:SI LR_REGNUM))
+	      (clobber (reg:SI 4))])]
+  "use_cmse"
+  "
+  {
+    rtx tmp;
+    tmp = copy_to_suggested_reg (XEXP (operands[0], 0),
+				 gen_rtx_REG (SImode, 4),
+				 SImode);
+
+    operands[0] = replace_equiv_address (operands[0], tmp);
+  }")
+
 (define_insn "*call_reg_armv5"
   [(call (mem:SI (match_operand:SI 0 "s_register_operand" "r"))
          (match_operand 1 "" ""))
@@ -8113,6 +8141,7 @@
   "
   {
     rtx pat, callee;
+    tree addr = MEM_EXPR (operands[1]);
     
     /* In an untyped call, we can get NULL for operand 2.  */
     if (operands[3] == 0)
@@ -8127,9 +8156,18 @@
 	: !REG_P (callee))
       XEXP (operands[1], 0) = force_reg (Pmode, callee);
 
-    pat = gen_call_value_internal (operands[0], operands[1],
-				   operands[2], operands[3]);
-    arm_emit_call_insn (pat, XEXP (operands[1], 0), false);
+    if (detect_cmse_nonsecure_call (addr))
+      {
+	pat = gen_nonsecure_call_value_internal (operands[0], operands[1],
+						 operands[2], operands[3]);
+	emit_call_insn (pat);
+      }
+    else
+      {
+	pat = gen_call_value_internal (operands[0], operands[1],
+				       operands[2], operands[3]);
+	arm_emit_call_insn (pat, XEXP (operands[1], 0), false);
+      }
     DONE;
   }"
 )
@@ -8141,6 +8179,25 @@
 	      (use (match_operand 3 "" ""))
 	      (clobber (reg:SI LR_REGNUM))])])
 
+(define_expand "nonsecure_call_value_internal"
+  [(parallel [(set (match_operand       0 "" "")
+		   (call (unspec:SI [(match_operand 1 "memory_operand" "")]
+				    UNSPEC_NONSECURE_MEM)
+			 (match_operand 2 "general_operand" "")))
+	      (use (match_operand 3 "" ""))
+	      (clobber (reg:SI LR_REGNUM))
+	      (clobber (reg:SI 4))])]
+  "use_cmse"
+  "
+  {
+    rtx tmp;
+    tmp = copy_to_suggested_reg (XEXP (operands[1], 0),
+				 gen_rtx_REG (SImode, 4),
+				 SImode);
+
+    operands[1] = replace_equiv_address (operands[1], tmp);
+  }")
+
 (define_insn "*call_value_reg_armv5"
   [(set (match_operand 0 "" "")
         (call (mem:SI (match_operand:SI 1 "s_register_operand" "r"))
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index 73a7381..f9e934f 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -1731,6 +1731,19 @@
    (set_attr "type" "call")]
 )
 
+(define_insn "*nonsecure_call_reg_thumb1_v5"
+  [(call (unspec:SI [(mem:SI (match_operand:SI 0 "register_operand" "l*r"))]
+		    UNSPEC_NONSECURE_MEM)
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (match_dup 0))]
+  "TARGET_THUMB1 && use_cmse && !SIBLING_CALL_P (insn)"
+  "bl\\t__gnu_cmse_nonsecure_call"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
 (define_insn "*call_reg_thumb1"
   [(call (mem:SI (match_operand:SI 0 "register_operand" "l*r"))
 	 (match_operand 1 "" ""))
@@ -1763,6 +1776,21 @@
    (set_attr "type" "call")]
 )
 
+(define_insn "*nonsecure_call_value_reg_thumb1_v5"
+  [(set (match_operand 0 "" "")
+	(call (unspec:SI
+	       [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))]
+	       UNSPEC_NONSECURE_MEM)
+	      (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (match_dup 1))]
+  "TARGET_THUMB1 && use_cmse"
+  "bl\\t__gnu_cmse_nonsecure_call"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
 (define_insn "*call_value_reg_thumb1"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 9029a2f..9b078a5 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -580,6 +580,19 @@
   [(set_attr "type" "call")]
 )
 
+(define_insn "*nonsecure_call_reg_thumb2"
+  [(call (unspec:SI [(mem:SI (match_operand:SI 0 "s_register_operand" "r"))]
+		    UNSPEC_NONSECURE_MEM)
+	 (match_operand 1 "" ""))
+   (use (match_operand 2 "" ""))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (match_dup 0))]
+  "TARGET_THUMB2 && use_cmse"
+  "bl\\t__gnu_cmse_nonsecure_call"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
 (define_insn "*call_value_reg_thumb2"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand:SI 1 "register_operand" "l*r"))
@@ -591,6 +604,21 @@
   [(set_attr "type" "call")]
 )
 
+(define_insn "*nonsecure_call_value_reg_thumb2"
+  [(set (match_operand 0 "" "")
+	(call
+	 (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))]
+		    UNSPEC_NONSECURE_MEM)
+	 (match_operand 2 "" "")))
+   (use (match_operand 3 "" ""))
+   (clobber (reg:SI LR_REGNUM))
+   (clobber (match_dup 1))]
+  "TARGET_THUMB2 && use_cmse"
+  "bl\t__gnu_cmse_nonsecure_call"
+  [(set_attr "length" "4")
+   (set_attr "type" "call")]
+)
+
 (define_insn "*thumb2_indirect_jump"
   [(set (pc)
 	(match_operand:SI 0 "register_operand" "l*r"))]
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index bee8795..1aa39e8 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -84,6 +84,8 @@
   UNSPEC_VRINTA         ; Represent a float to integral float rounding
                         ; towards nearest, ties away from zero.
   UNSPEC_PROBE_STACK    ; Probe stack memory reference
+  UNSPEC_NONSECURE_MEM	; Represent non-secure memory in ARMv8-M with
+			; security extension
 ])
 
 (define_c_enum "unspec" [
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ca431e4..e9786b7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,48 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* gcc.target/arm/cmse/cmse.exp: Run tests in mainline dir.
+	* gcc.target/arm/cmse/cmse-9.c: Added some extra tests.
+	* gcc.target/arm/cmse/cmse-14.c: New.
+	* gcc.target/arm/cmse/baseline/bitfield-4.c: New.
+	* gcc.target/arm/cmse/baseline/bitfield-5.c: New.
+	* gcc.target/arm/cmse/baseline/bitfield-6.c: New.
+	* gcc.target/arm/cmse/baseline/bitfield-7.c: New.
+	* gcc.target/arm/cmse/baseline/bitfield-8.c: New.
+	* gcc.target/arm/cmse/baseline/bitfield-9.c: New.
+	* gcc.target/arm/cmse/baseline/bitfield-and-union-1.c: New.
+	* gcc.target/arm/cmse/baseline/cmse-11.c: New.
+	* gcc.target/arm/cmse/baseline/cmse-13.c: New.
+	* gcc.target/arm/cmse/baseline/cmse-6.c: New.
+	* gcc.target/arm/cmse/baseline/union-1.c: New.
+	* gcc.target/arm/cmse/baseline/union-2.c: New.
+	* gcc.target/arm/cmse/mainline/bitfield-4.c: New.
+	* gcc.target/arm/cmse/mainline/bitfield-5.c: New.
+	* gcc.target/arm/cmse/mainline/bitfield-6.c: New.
+	* gcc.target/arm/cmse/mainline/bitfield-7.c: New.
+	* gcc.target/arm/cmse/mainline/bitfield-8.c: New.
+	* gcc.target/arm/cmse/mainline/bitfield-9.c: New.
+	* gcc.target/arm/cmse/mainline/bitfield-and-union-1.c: New.
+	* gcc.target/arm/cmse/mainline/union-1.c: New.
+	* gcc.target/arm/cmse/mainline/union-2.c: New.
+	* gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c: New.
+	* gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c: New.
+	* gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c: New.
+	* gcc.target/arm/cmse/mainline/hard/cmse-13.c: New.
+	* gcc.target/arm/cmse/mainline/hard/cmse-7.c: New.
+	* gcc.target/arm/cmse/mainline/hard/cmse-8.c: New.
+	* gcc.target/arm/cmse/mainline/soft/cmse-13.c: New.
+	* gcc.target/arm/cmse/mainline/soft/cmse-7.c: New.
+	* gcc.target/arm/cmse/mainline/soft/cmse-8.c: New.
+	* gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c: New.
+	* gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c: New.
+	* gcc.target/arm/cmse/mainline/softfp/cmse-13.c: New.
+	* gcc.target/arm/cmse/mainline/softfp/cmse-7.c: New.
+	* gcc.target/arm/cmse/mainline/softfp/cmse-8.c: New.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* gcc.target/arm/cmse/cmse-3.c: Add tests.
 	* gcc.target/arm/cmse/cmse-4.c: Add tests.
 	* gcc.target/arm/cmse/cmse-15.c: New.
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c
new file mode 100644
index 0000000..a6c1386
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-4.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char a;
+  unsigned int b:5;
+  unsigned int c:11, :0, d:8;
+  struct { unsigned int ee:2; } e;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+extern void foo (test_st st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+  r.values.v4 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 255" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #255" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #3" } } */
+/* { dg-final { scan-assembler "ands\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c
new file mode 100644
index 0000000..d51ce2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-5.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned short  b :5;
+  unsigned char	  c;
+  unsigned short  d :11;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #8191" } } */
+/* { dg-final { scan-assembler "movt\tr4, 255" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #2047" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr2, r4" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c
new file mode 100644
index 0000000..77e9104
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-6.c
@@ -0,0 +1,63 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char a;
+  unsigned int	b : 3;
+  unsigned int	c : 14;
+  unsigned int	d : 1;
+  struct {
+      unsigned int    ee  : 2;
+      unsigned short  ff  : 15;
+  } e;
+  unsigned char	g : 1;
+  unsigned char	  : 4;
+  unsigned char	h : 3;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+  r.values.v4 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 1023" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #3" } } */
+/* { dg-final { scan-assembler "movt\tr4, 32767" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #255" } } */
+/* { dg-final { scan-assembler "ands\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c
new file mode 100644
index 0000000..3d8941b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-7.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned short  b :5;
+  unsigned char	  c;
+  unsigned short  d :11;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #8191" } } */
+/* { dg-final { scan-assembler "movt\tr4, 255" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #2047" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr2, r4" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c
new file mode 100644
index 0000000..9ffbb71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-8.c
@@ -0,0 +1,57 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned int	    :0;
+  unsigned int	  b :1;
+  unsigned short    :0;
+  unsigned short  c;
+  unsigned int	    :0;
+  unsigned int	  d :21;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #255" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #1" } } */
+/* { dg-final { scan-assembler "movt\tr4, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 31" } } */
+/* { dg-final { scan-assembler "ands\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c
new file mode 100644
index 0000000..8a61418
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-9.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  char a:3;
+} test_st3;
+
+typedef struct
+{
+  char a:3;
+} test_st2;
+
+typedef struct
+{
+  test_st2 st2;
+  test_st3 st3;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #1799" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr1, r4" } } */
+/* { dg-final { scan-assembler "movs\tr2, r4" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c
new file mode 100644
index 0000000..642f4e0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/bitfield-and-union-1.c
@@ -0,0 +1,96 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned short a :11;
+} test_st_4;
+
+typedef union
+{
+  char	      a;
+  test_st_4 st4;
+}test_un_2;
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned int	    :0;
+  unsigned int	  b :1;
+  unsigned short    :0;
+  unsigned short  c;
+  unsigned int	    :0;
+  unsigned int	  d :21;
+} test_st_3;
+
+typedef struct
+{
+  unsigned char	  a :3;
+  unsigned int	  b :13;
+  test_un_2	  un2;
+} test_st_2;
+
+typedef union
+{
+  test_st_2 st2;
+  test_st_3 st3;
+}test_un_1;
+
+typedef struct
+{
+  unsigned char	  a :2;
+  unsigned char	    :0;
+  unsigned short  b :5;
+  unsigned char	    :0;
+  unsigned char	  c :4;
+  test_un_1	  un1;
+} test_st_1;
+
+typedef union
+{
+  test_st_1 st1;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st_1;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st_1);
+
+int
+main (void)
+{
+  read_st_1 r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+  r.values.v4 = 0xFFFFFFFF;
+
+  f (r.st1);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #7939" } } */
+/* { dg-final { scan-assembler "movt\tr4, 15" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 2047" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "movs\tr4, #1" } } */
+/* { dg-final { scan-assembler "movt\tr4, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr2, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 31" } } */
+/* { dg-final { scan-assembler "ands\tr3, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c
new file mode 100644
index 0000000..3007409
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-11.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_base_ok } */
+/* { dg-add-options arm_arch_v8m_base } */
+/* { dg-options "-mcmse" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (int);
+
+int
+foo (int a)
+{
+  return bar (bar (a + 1));
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr1, r4" } } */
+/* { dg-final { scan-assembler "movs\tr2, r4" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c
new file mode 100644
index 0000000..f2b931b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-13.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_base_ok } */
+/* { dg-add-options arm_arch_v8m_base } */
+/* { dg-options "-mcmse" } */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
+
+int
+foo (int a)
+{
+  return bar (1.0f, 2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler-not "movs\tr0, r4" } } */
+/* { dg-final { scan-assembler "\n\tmovs\tr1, r4" } } */
+/* { dg-final { scan-assembler-not "\n\tmovs\tr2, r4\n\tmovs\tr3, r4" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c
new file mode 100644
index 0000000..95da045
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/cmse-6.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_base_ok } */
+/* { dg-add-options arm_arch_v8m_base } */
+/* { dg-options "-mcmse" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
+
+int
+foo (int a)
+{
+  return bar (2.0) + a + 1;
+}
+
+/* Remember dont clear r0 and r1, because we are passing the double parameter
+ * for bar in them.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr2, r4" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c
new file mode 100644
index 0000000..ff18e83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-1.c
@@ -0,0 +1,71 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a :2;
+  unsigned char	    :0;
+  unsigned short  b :5;
+  unsigned char	    :0;
+  unsigned short  c :3;
+  unsigned char	    :0;
+  unsigned int	  d :9;
+} test_st_1;
+
+typedef struct
+{
+  unsigned short  a :7;
+  unsigned char	    :0;
+  unsigned char	  b :1;
+  unsigned char	    :0;
+  unsigned short  c :6;
+} test_st_2;
+
+typedef union
+{
+  test_st_1 st_1;
+  test_st_2 st_2;
+}test_un;
+
+typedef union
+{
+  test_un un;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_un;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
+
+int
+main (void)
+{
+  read_un r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+
+  f (r.un);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #8063" } } */
+/* { dg-final { scan-assembler "movt\tr4, 63" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #511" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr2, r4" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c
new file mode 100644
index 0000000..b2e024b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/baseline/union-2.c
@@ -0,0 +1,86 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a :2;
+  unsigned char	    :0;
+  unsigned short  b :5;
+  unsigned char	    :0;
+  unsigned short  c :3;
+  unsigned char	    :0;
+  unsigned int	  d :9;
+} test_st_1;
+
+typedef struct
+{
+  unsigned short  a :7;
+  unsigned char	    :0;
+  unsigned char	  b :1;
+  unsigned char	    :0;
+  unsigned short  c :6;
+} test_st_2;
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned int	    :0;
+  unsigned int	  b :1;
+  unsigned short    :0;
+  unsigned short  c;
+  unsigned int	    :0;
+  unsigned int	  d :21;
+} test_st_3;
+
+typedef union
+{
+  test_st_1 st_1;
+  test_st_2 st_2;
+  test_st_3 st_3;
+}test_un;
+
+typedef union
+{
+  test_un un;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_un;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
+
+int
+main (void)
+{
+  read_un r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+
+  f (r.un);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #8191" } } */
+/* { dg-final { scan-assembler "movt\tr4, 63" } } */
+/* { dg-final { scan-assembler "ands\tr0, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #511" } } */
+/* { dg-final { scan-assembler "movt\tr4, 65535" } } */
+/* { dg-final { scan-assembler "ands\tr1, r4" } } */
+/* { dg-final { scan-assembler "movw\tr4, #65535" } } */
+/* { dg-final { scan-assembler "movt\tr4, 31" } } */
+/* { dg-final { scan-assembler "ands\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr4, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "movs\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-14.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-14.c
new file mode 100644
index 0000000..701e9ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-14.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
+
+int foo (void)
+{
+  return bar ();
+}
+
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+/* { dg-final { scan-assembler-not "b\[^ y\n\]*\\s+bar" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c
index 1d97f0e..9e81e30 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-9.c
@@ -2,11 +2,19 @@
 /* { dg-skip-if "Testing exclusion of -mcmse" { arm-*-* } { "-mcmse" } { "" } }  */
 
 
+void __attribute__ ((cmse_nonsecure_call)) (*bar) (int); /* { dg-warning "attribute ignored without -mcmse option" } */
+typedef void __attribute__ ((cmse_nonsecure_call)) baz (int); /* { dg-warning "attribute ignored without -mcmse option" } */
+
 int __attribute__ ((cmse_nonsecure_entry))
-foo (int a)
+foo (int a, baz b)
 { /* { dg-warning "attribute ignored without -mcmse option" } */
+  bar (a);
+  b (a);
   return a + 1;
 }
 
+/* { dg-final { scan-assembler-not "bxns" } } */
+/* { dg-final { scan-assembler-not "blxns" } } */
+/* { dg-final { scan-assembler-not "bl\t__gnu_cmse_nonsecure_call" } } */
 /* { dg-final { scan-assembler "foo:" } } */
 /* { dg-final { scan-assembler-not "__acle_se_foo:" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse.exp b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
index 38f1841..66a8b7d 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse.exp
@@ -50,6 +50,8 @@ if {[check_effective_target_arm_arch_v8m_base_ok]} then {
 }
 
 if {[check_effective_target_arm_arch_v8m_main_ok]} then {
+    gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/*.c]] \
+	    "" $DEFAULT_CFLAGS
     # Mainline -mfloat-abi=soft
     gcc-dg-runtest [lsort [glob $srcdir/$subdir/mainline/soft/*.c]] \
 	    "-mfloat-abi=soft" $DEFAULT_CFLAGS
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c
new file mode 100644
index 0000000..c3b1396
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-4.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char a;
+  unsigned int b:5;
+  unsigned int c:11, :0, d:8;
+  struct { unsigned int ee:2; } e;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+extern void foo (test_st st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+  r.values.v4 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 255" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "mov\tip, #255" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "mov\tip, #3" } } */
+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c
new file mode 100644
index 0000000..0d02904
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-5.c
@@ -0,0 +1,51 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned short  b :5;
+  unsigned char	  c;
+  unsigned short  d :11;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tip, #8191" } } */
+/* { dg-final { scan-assembler "movt\tip, 255" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #2047" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c
new file mode 100644
index 0000000..005515a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-6.c
@@ -0,0 +1,61 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char a;
+  unsigned int	b : 3;
+  unsigned int	c : 14;
+  unsigned int	d : 1;
+  struct {
+      unsigned int    ee  : 2;
+      unsigned short  ff  : 15;
+  } e;
+  unsigned char	g : 1;
+  unsigned char	  : 4;
+  unsigned char	h : 3;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+  r.values.v4 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 1023" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "mov\tip, #3" } } */
+/* { dg-final { scan-assembler "movt\tip, 32767" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "mov\tip, #255" } } */
+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c
new file mode 100644
index 0000000..6dd218e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-7.c
@@ -0,0 +1,52 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned short  b :5;
+  unsigned char	  c;
+  unsigned short  d :11;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+
+/* { dg-final { scan-assembler "movw\tip, #8191" } } */
+/* { dg-final { scan-assembler "movt\tip, 255" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #2047" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c
new file mode 100644
index 0000000..c833bcb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-8.c
@@ -0,0 +1,55 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned int	    :0;
+  unsigned int	  b :1;
+  unsigned short    :0;
+  unsigned short  c;
+  unsigned int	    :0;
+  unsigned int	  d :21;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "mov\tip, #255" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "mov\tip, #1" } } */
+/* { dg-final { scan-assembler "movt\tip, 65535" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 31" } } */
+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c
new file mode 100644
index 0000000..d6e4cdb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-9.c
@@ -0,0 +1,54 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  char a:3;
+} test_st3;
+
+typedef struct
+{
+  char a:3;
+} test_st2;
+
+typedef struct
+{
+  test_st2 st2;
+  test_st3 st3;
+} test_st;
+
+typedef union
+{
+  test_st st;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st;
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st);
+
+int
+main (void)
+{
+  read_st r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+
+  f (r.st);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tip, #1799" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c
new file mode 100644
index 0000000..e139ba6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/bitfield-and-union-1.c
@@ -0,0 +1,94 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned short a :11;
+} test_st_4;
+
+typedef union
+{
+  char	      a;
+  test_st_4 st4;
+}test_un_2;
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned int	    :0;
+  unsigned int	  b :1;
+  unsigned short    :0;
+  unsigned short  c;
+  unsigned int	    :0;
+  unsigned int	  d :21;
+} test_st_3;
+
+typedef struct
+{
+  unsigned char	  a :3;
+  unsigned int	  b :13;
+  test_un_2	  un2;
+} test_st_2;
+
+typedef union
+{
+  test_st_2 st2;
+  test_st_3 st3;
+}test_un_1;
+
+typedef struct
+{
+  unsigned char	  a :2;
+  unsigned char	    :0;
+  unsigned short  b :5;
+  unsigned char	    :0;
+  unsigned char	  c :4;
+  test_un_1	  un1;
+} test_st_1;
+
+typedef union
+{
+  test_st_1 st1;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_st_1;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_st_1);
+
+int
+main (void)
+{
+  read_st_1 r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+  r.values.v4 = 0xFFFFFFFF;
+
+  f (r.st1);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tip, #7939" } } */
+/* { dg-final { scan-assembler "movt\tip, 15" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 2047" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "mov\tip, #1" } } */
+/* { dg-final { scan-assembler "movt\tip, 65535" } } */
+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 31" } } */
+/* { dg-final { scan-assembler "and\tr3, r3, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c
new file mode 100644
index 0000000..d90ad81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-13.c
@@ -0,0 +1,43 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" }  */
+
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
+
+int
+foo (int a)
+{
+  return bar (3.0f, 2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts0, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts1, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts2, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts3, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts4, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts5, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts6, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts7, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts8, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts9, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts10, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts11, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts12, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts13, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts14, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts15, .L" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c
new file mode 100644
index 0000000..c047cd5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-7.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
+
+int
+foo (int a)
+{
+  return bar () + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts0, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts1, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts2, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts3, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts4, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts5, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts6, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts7, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts8, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts9, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts10, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts11, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts12, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts13, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts14, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts15, .L" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c
new file mode 100644
index 0000000..20d2d4a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard-sp/cmse-8.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-sp-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
+
+int
+foo (int a)
+{
+  return bar (2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts0, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts1, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts2, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts3, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts4, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts5, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts6, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts7, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts8, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts9, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts10, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts11, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts12, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts13, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts14, .L" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts15, .L" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c
new file mode 100644
index 0000000..0af586a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-13.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" }  */
+
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
+
+int
+foo (int a)
+{
+  return bar (3.0f, 2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "vldr\.32\ts1, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.64\td0, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts0, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.64\td1, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts2, .L" } } */
+/* { dg-final { scan-assembler-not "vldr\.32\ts3, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td2, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td3, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td4, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td5, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td6, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td7, .L" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c
new file mode 100644
index 0000000..a5c64fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-7.c
@@ -0,0 +1,34 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
+
+int
+foo (int a)
+{
+  return bar () + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "vldr\.64\td0, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td1, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td2, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td3, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td4, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td5, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td6, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td7, .L" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c
new file mode 100644
index 0000000..5e041b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/hard/cmse-8.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=softfp } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=hard -mfpu=fpv5-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
+
+int
+foo (int a)
+{
+  return bar (2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler-not "vldr\.64\td0, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td1, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td2, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td3, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td4, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td5, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td6, .L" } } */
+/* { dg-final { scan-assembler "vldr\.64\td7, .L" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c
new file mode 100644
index 0000000..dbbd262
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-13.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
+/* { dg-options "-mcmse -mfloat-abi=soft" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
+
+int
+foo (int a)
+{
+  return bar (1.0f, 2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c
new file mode 100644
index 0000000..e335684
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-7.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
+/* { dg-options "-mcmse -mfloat-abi=soft" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
+
+int
+foo (int a)
+{
+  return bar () + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c
new file mode 100644
index 0000000..024a12e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/soft/cmse-8.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=hard" -mfloat-abi=softfp } {""} } */
+/* { dg-options "-mcmse -mfloat-abi=soft" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
+
+int
+foo (int a)
+{
+  return bar (2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler-not "vmov" } } */
+/* { dg-final { scan-assembler-not "vmsr" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c
new file mode 100644
index 0000000..fb195eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-7.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
+
+int
+foo (int a)
+{
+  return bar () + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c
new file mode 100644
index 0000000..22ed3f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp-sp/cmse-8.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing double precision" {*-*-*} {"-mfpu=fpv[4-5]-d16"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-sp-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
+
+int
+foo (int a)
+{
+  return bar (2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c
new file mode 100644
index 0000000..9634065
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-13.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (float, double);
+
+int
+foo (int a)
+{
+  return bar (1.0f, 2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "\n\tmov\tr1, r4" } } */
+/* { dg-final { scan-assembler-not "\n\tmov\tr2, r4\n\tmov\tr3, r4" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c
new file mode 100644
index 0000000..04f8466
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-7.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (void);
+
+int
+foo (int a)
+{
+  return bar () + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c
new file mode 100644
index 0000000..ffe94de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/softfp/cmse-8.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_arch_v8m_main_ok } */
+/* { dg-add-options arm_arch_v8m_main } */
+/* { dg-skip-if "Do not combine float-abi= hard | soft | softfp" {*-*-*} {"-mfloat-abi=soft" -mfloat-abi=hard } {""} } */
+/* { dg-skip-if "Skip these if testing single precision" {*-*-*} {"-mfpu=*-sp-*"} {""} } */
+/* { dg-options "-mcmse -mfloat-abi=softfp -mfpu=fpv5-d16" }  */
+
+int __attribute__ ((cmse_nonsecure_call)) (*bar) (double);
+
+int
+foo (int a)
+{
+  return bar (2.0) + a + 1;
+}
+
+/* Checks for saving and clearing prior to function call.  */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler-not "mov\tr0, r4" } } */
+/* { dg-final { scan-assembler-not "mov\tr1, r4" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+
+/* Now we check that we use the correct intrinsic to call.  */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c
new file mode 100644
index 0000000..1fc846c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-1.c
@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a :2;
+  unsigned char	    :0;
+  unsigned short  b :5;
+  unsigned char	    :0;
+  unsigned short  c :3;
+  unsigned char	    :0;
+  unsigned int	  d :9;
+} test_st_1;
+
+typedef struct
+{
+  unsigned short  a :7;
+  unsigned char	    :0;
+  unsigned char	  b :1;
+  unsigned char	    :0;
+  unsigned short  c :6;
+} test_st_2;
+
+typedef union
+{
+  test_st_1 st_1;
+  test_st_2 st_2;
+}test_un;
+
+typedef union
+{
+  test_un un;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_un;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
+
+int
+main (void)
+{
+  read_un r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+
+  f (r.un);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tip, #8063" } } */
+/* { dg-final { scan-assembler "movt\tip, 63" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #511" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr2, r4" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
+
diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c
new file mode 100644
index 0000000..420d0f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/union-2.c
@@ -0,0 +1,84 @@
+/* { dg-do compile } */
+/* { dg-options "-mcmse" } */
+
+typedef struct
+{
+  unsigned char	  a :2;
+  unsigned char	    :0;
+  unsigned short  b :5;
+  unsigned char	    :0;
+  unsigned short  c :3;
+  unsigned char	    :0;
+  unsigned int	  d :9;
+} test_st_1;
+
+typedef struct
+{
+  unsigned short  a :7;
+  unsigned char	    :0;
+  unsigned char	  b :1;
+  unsigned char	    :0;
+  unsigned short  c :6;
+} test_st_2;
+
+typedef struct
+{
+  unsigned char	  a;
+  unsigned int	    :0;
+  unsigned int	  b :1;
+  unsigned short    :0;
+  unsigned short  c;
+  unsigned int	    :0;
+  unsigned int	  d :21;
+} test_st_3;
+
+typedef union
+{
+  test_st_1 st_1;
+  test_st_2 st_2;
+  test_st_3 st_3;
+}test_un;
+
+typedef union
+{
+  test_un un;
+  struct
+    {
+      unsigned int v1;
+      unsigned int v2;
+      unsigned int v3;
+      unsigned int v4;
+    }values;
+} read_un;
+
+
+typedef void __attribute__ ((cmse_nonsecure_call)) (*foo_ns) (test_un);
+
+int
+main (void)
+{
+  read_un r;
+  foo_ns f;
+
+  f = (foo_ns) 0x200000;
+  r.values.v1 = 0xFFFFFFFF;
+  r.values.v2 = 0xFFFFFFFF;
+  r.values.v3 = 0xFFFFFFFF;
+
+  f (r.un);
+  return 0;
+}
+
+/* { dg-final { scan-assembler "movw\tip, #8191" } } */
+/* { dg-final { scan-assembler "movt\tip, 63" } } */
+/* { dg-final { scan-assembler "and\tr0, r0, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #511" } } */
+/* { dg-final { scan-assembler "movt\tip, 65535" } } */
+/* { dg-final { scan-assembler "and\tr1, r1, ip" } } */
+/* { dg-final { scan-assembler "movw\tip, #65535" } } */
+/* { dg-final { scan-assembler "movt\tip, 31" } } */
+/* { dg-final { scan-assembler "and\tr2, r2, ip" } } */
+/* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */
+/* { dg-final { scan-assembler "mov\tr3, r4" } } */
+/* { dg-final { scan-assembler "bl\t__gnu_cmse_nonsecure_call" } } */
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index f323f43..6627d26 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,6 +1,12 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* config/arm/cmse_nonsecure_call.S: New.
+	* config/arm/t-arm: Compile cmse_nonsecure_call.S
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* config/arm/t-arm (HAVE_CMSE): New.
 	* config/arm/cmse.c: New.
 
diff --git a/libgcc/config/arm/cmse_nonsecure_call.S b/libgcc/config/arm/cmse_nonsecure_call.S
new file mode 100644
index 0000000..68b6a1c
--- /dev/null
+++ b/libgcc/config/arm/cmse_nonsecure_call.S
@@ -0,0 +1,131 @@
+/* CMSE wrapper function used to save, clear and restore callee saved registers
+   for cmse_nonsecure_call's.
+
+   Copyright (C) 2016 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by the
+   Free Software Foundation; either version 3, or (at your option) any
+   later version.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+.syntax unified
+.thumb
+.global __gnu_cmse_nonsecure_call
+__gnu_cmse_nonsecure_call:
+#if defined(__ARM_ARCH_8M_MAIN__)
+push	    {r5-r11,lr}
+mov	    r7, r4
+mov	    r8, r4
+mov	    r9, r4
+mov	    r10, r4
+mov	    r11, r4
+mov	    ip, r4
+
+/* Save and clear callee-saved registers only if we are dealing with hard float
+   ABI.  The unused caller-saved registers have already been cleared by GCC
+   generated code.  */
+#ifdef __ARM_PCS_VFP
+vpush.f64   {d8-d15}
+mov	    r5, #0
+vmov	    d8, r5, r5
+#if __ARM_FP & 0x04
+vmov	    s18, s19, r5, r5
+vmov	    s20, s21, r5, r5
+vmov	    s22, s23, r5, r5
+vmov	    s24, s25, r5, r5
+vmov	    s26, s27, r5, r5
+vmov	    s28, s29, r5, r5
+vmov	    s30, s31, r5, r5
+#elif __ARM_FP & 0x08
+vmov.f64    d9, d8
+vmov.f64    d10, d8
+vmov.f64    d11, d8
+vmov.f64    d12, d8
+vmov.f64    d13, d8
+vmov.f64    d14, d8
+vmov.f64    d15, d8
+#else
+#error "Half precision implementation not supported."
+#endif
+/* Clear the cumulative exception-status bits (0-4,7) and the
+   condition code bits (28-31) of the FPSCR.  */
+vmrs	    r5, fpscr
+movw	    r6, #65376
+movt	    r6, #4095
+ands	    r5, r6
+vmsr	    fpscr, r5
+
+/* We are not dealing with hard float ABI, so we can safely use the vlstm and
+   vlldm instructions without needing to preserve the registers used for
+   argument passing.  */
+#else
+sub	    sp, sp, #0x88 /* Reserve stack space to save all floating point
+			     registers, including FPSCR.  */
+vlstm	    sp		  /* Lazy store and clearance of d0-d16 and FPSCR.  */
+#endif /* __ARM_PCS_VFP */
+
+/* Make sure to clear the 'GE' bits of the APSR register if 32-bit SIMD
+   instructions are available.  */
+#if defined(__ARM_FEATURE_SIMD32)
+msr	    APSR_nzcvqg, r4
+#else
+msr	    APSR_nzcvq, r4
+#endif
+
+mov	    r5, r4
+mov	    r6, r4
+blxns	    r4
+
+#ifdef __ARM_PCS_VFP
+vpop.f64    {d8-d15}
+#else
+vlldm	    sp		  /* Lazy restore of d0-d16 and FPSCR.  */
+add	    sp, sp, #0x88 /* Free space used to save floating point registers.  */
+#endif /* __ARM_PCS_VFP */
+
+pop	    {r5-r11, pc}
+
+#elif defined (__ARM_ARCH_8M_BASE__)
+push	    {r5-r7, lr}
+mov	    r5, r8
+mov	    r6, r9
+mov	    r7, r10
+push	    {r5-r7}
+mov	    r5, r11
+push	    {r5}
+mov	    r5, r4
+mov	    r6, r4
+mov	    r7, r4
+mov	    r8, r4
+mov	    r9, r4
+mov	    r10, r4
+mov	    r11, r4
+mov	    ip, r4
+msr	    APSR_nzcvq, r4
+blxns	    r4
+pop	    {r5}
+mov	    r11, r5
+pop	    {r5-r7}
+mov	    r10, r7
+mov	    r9, r6
+mov	    r8, r5
+pop	    {r5-r7, pc}
+
+#else
+#error "This should only be used for armv8-m base- and mainline."
+#endif
diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
index 5618143..9e85ac0 100644
--- a/libgcc/config/arm/t-arm
+++ b/libgcc/config/arm/t-arm
@@ -12,4 +12,6 @@ libgcc-objects += cmse.o cmse_nonsecure_call.o
 
 cmse.o: $(srcdir)/config/arm/cmse.c
 	$(gcc_compile) -c $(CMSE_OPTS) $<
+cmse_nonsecure_call.o: $(srcdir)/config/arm/cmse_nonsecure_call.S
+		       $(gcc_compile) -c $<
 endif
-- 
cgit v1.1


From 8261e476cb64cee8891fa676202d1f42decdcd14 Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Fri, 2 Dec 2016 15:34:36 +0000
Subject: Added support for ARMV8-M Security Extension cmse_nonsecure_caller
 intrinsic

    gcc/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
		Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* config/arm/arm-builtins.c (arm_builtins): Define
	ARM_BUILTIN_CMSE_NONSECURE_CALLER.
	(bdesc_2arg): Add line for cmse_nonsecure_caller.
	(arm_init_builtins): Handle cmse_nonsecure_caller.
	(arm_expand_builtin): Likewise.
	* config/arm/arm_cmse.h (cmse_nonsecure_caller): New.

    gcc/testsuite/ChangeLog:
    2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
		Thomas Preud'homme  <thomas.preudhomme@arm.com>

	* gcc.target/arm/cmse/cmse-1.c: Add test for
	cmse_nonsecure_caller.


Co-Authored-By: Thomas Preud'homme <thomas.preudhomme@arm.com>

From-SVN: r243193
---
 gcc/ChangeLog                              | 10 ++++++++
 gcc/config/arm/arm-builtins.c              | 19 +++++++++++++++
 gcc/config/arm/arm_cmse.h                  |  7 ++++++
 gcc/doc/extend.texi                        |  1 +
 gcc/testsuite/ChangeLog                    |  6 +++++
 gcc/testsuite/gcc.target/arm/cmse/cmse-1.c | 39 ++++++++++++++++++++++++++++++
 6 files changed, 82 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 807d406..65443a1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,16 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* config/arm/arm-builtins.c (arm_builtins): Define
+	ARM_BUILTIN_CMSE_NONSECURE_CALLER.
+	(bdesc_2arg): Add line for cmse_nonsecure_caller.
+	(arm_init_builtins): Handle cmse_nonsecure_caller.
+	(arm_expand_builtin): Likewise.
+	* config/arm/arm_cmse.h (cmse_nonsecure_caller): New.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* config/arm/arm.c (detect_cmse_nonsecure_call): New.
 	(cmse_nonsecure_call_clear_caller_saved): New.
 	(arm_reorg): Use cmse_nonsecure_call_clear_caller_saved.
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 5ed38d1..1444420 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -528,6 +528,8 @@ enum arm_builtins
   ARM_BUILTIN_GET_FPSCR,
   ARM_BUILTIN_SET_FPSCR,
 
+  ARM_BUILTIN_CMSE_NONSECURE_CALLER,
+
 #undef CRYPTO1
 #undef CRYPTO2
 #undef CRYPTO3
@@ -1833,6 +1835,17 @@ arm_init_builtins (void)
 	= add_builtin_function ("__builtin_arm_stfscr", ftype_set_fpscr,
 				ARM_BUILTIN_SET_FPSCR, BUILT_IN_MD, NULL, NULL_TREE);
     }
+
+  if (use_cmse)
+    {
+      tree ftype_cmse_nonsecure_caller
+	= build_function_type_list (unsigned_type_node, NULL);
+      arm_builtin_decls[ARM_BUILTIN_CMSE_NONSECURE_CALLER]
+	= add_builtin_function ("__builtin_arm_cmse_nonsecure_caller",
+				ftype_cmse_nonsecure_caller,
+				ARM_BUILTIN_CMSE_NONSECURE_CALLER, BUILT_IN_MD,
+				NULL, NULL_TREE);
+    }
 }
 
 /* Return the ARM builtin for CODE.  */
@@ -2453,6 +2466,12 @@ arm_expand_builtin (tree exp,
       emit_insn (pat);
       return target;
 
+    case ARM_BUILTIN_CMSE_NONSECURE_CALLER:
+      target = gen_reg_rtx (SImode);
+      op0 = arm_return_addr (0, NULL_RTX);
+      emit_insn (gen_addsi3 (target, op0, const1_rtx));
+      return target;
+
     case ARM_BUILTIN_TEXTRMSB:
     case ARM_BUILTIN_TEXTRMUB:
     case ARM_BUILTIN_TEXTRMSH:
diff --git a/gcc/config/arm/arm_cmse.h b/gcc/config/arm/arm_cmse.h
index 894343b..82b58b1 100644
--- a/gcc/config/arm/arm_cmse.h
+++ b/gcc/config/arm/arm_cmse.h
@@ -163,6 +163,13 @@ __attribute__ ((__always_inline__))
 cmse_TTAT (void *__p)
 __CMSE_TT_ASM (at)
 
+/* FIXME: diagnose use outside cmse_nonsecure_entry functions.  */
+__extension__ static __inline int __attribute__ ((__always_inline__))
+cmse_nonsecure_caller (void)
+{
+  return __builtin_arm_cmse_nonsecure_caller ();
+}
+
 #define CMSE_AU_NONSECURE	2
 #define CMSE_MPU_NONSECURE	16
 #define CMSE_NONSECURE		18
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 0fa59ff..a8402e1 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -12666,6 +12666,7 @@ cmse_address_info_t cmse_TTAT_fptr (FPTR)
 void * cmse_check_address_range (void *, size_t, int)
 typeof(p) cmse_nsfptr_create (FPTR p)
 intptr_t cmse_is_nsfptr (FPTR)
+int cmse_nonsecure_caller (void)
 @end smallexample
 
 @node AVR Built-in Functions
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e9786b7..7cb66d9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,12 @@
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
+	* gcc.target/arm/cmse/cmse-1.c: Add test for
+	cmse_nonsecure_caller.
+
+2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
 	* gcc.target/arm/cmse/cmse.exp: Run tests in mainline dir.
 	* gcc.target/arm/cmse/cmse-9.c: Added some extra tests.
 	* gcc.target/arm/cmse/cmse-14.c: New.
diff --git a/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c b/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c
index d5b9a2d..c13272e 100644
--- a/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c
+++ b/gcc/testsuite/gcc.target/arm/cmse/cmse-1.c
@@ -65,3 +65,42 @@ int foo (char * p)
 /* { dg-final { scan-assembler-times "ttat " 2 } } */
 /* { dg-final { scan-assembler-times "bl.cmse_check_address_range" 7 } } */
 /* { dg-final { scan-assembler-not "cmse_check_pointed_object" } } */
+
+int __attribute__ ((cmse_nonsecure_entry))
+baz (void)
+{
+  return cmse_nonsecure_caller ();
+}
+
+typedef int __attribute__ ((cmse_nonsecure_call)) (int_nsfunc_t) (void);
+
+int default_callback (void)
+{
+  return 0;
+}
+
+int_nsfunc_t * fp = (int_nsfunc_t *) default_callback;
+
+void __attribute__ ((cmse_nonsecure_entry))
+qux (int_nsfunc_t * callback)
+{
+  fp = cmse_nsfptr_create (callback);
+}
+
+int call_callback (void)
+{
+  if (cmse_is_nsfptr (fp))
+      return fp ();
+  else
+    return default_callback ();
+}
+/* { dg-final { scan-assembler "baz:" } } */
+/* { dg-final { scan-assembler "__acle_se_baz:" } } */
+/* { dg-final { scan-assembler "qux:" } } */
+/* { dg-final { scan-assembler "__acle_se_qux:" } } */
+/* { dg-final { scan-assembler-not "\tcmse_nonsecure_caller" } } */
+/* { dg-final { scan-rtl-dump "and.*reg.*const_int 1" expand } } */
+/* { dg-final { scan-assembler "bic" } } */
+/* { dg-final { scan-assembler "push\t\{r4, r5, r6" } } */
+/* { dg-final { scan-assembler "msr\tAPSR_nzcvq" } } */
+/* { dg-final { scan-assembler-times "bl\\s+__gnu_cmse_nonsecure_call" 1 } } */
-- 
cgit v1.1


From eb61d07edaf05f36151bfe4382777eaa79bce4d9 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 2 Dec 2016 16:42:04 +0100
Subject: re PR target/78614 (ICE error: invalid rtl sharing found in the insn
 (verify_rtx_sharing) gcc/emit-rtl.c:2743)

	PR target/78614
	* rtl.c (copy_rtx): Don't clear used flag here.
	(shallow_copy_rtx_stat): Clear used flag here unless code the rtx
	is shareable.
	* simplify-rtx.c (simplify_replace_fn_rtx): When copying rtx with
	'E' in format, copy all vectors.
	* emit-rtl.c (copy_insn_1): Don't clear used flag here.
	* valtrack.c (cleanup_auto_inc_dec): Likewise.
	* config/rs6000/rs6000.c (rs6000_frame_related): Likewise.

From-SVN: r243194
---
 gcc/ChangeLog              | 12 ++++++++++++
 gcc/config/rs6000/rs6000.c |  1 -
 gcc/emit-rtl.c             |  4 ----
 gcc/rtl.c                  | 28 +++++++++++++++++++++++-----
 gcc/valtrack.c             |  4 ----
 5 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 65443a1..68d3588 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2016-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/78614
+	* rtl.c (copy_rtx): Don't clear used flag here.
+	(shallow_copy_rtx_stat): Clear used flag here unless code the rtx
+	is shareable.
+	* simplify-rtx.c (simplify_replace_fn_rtx): When copying rtx with
+	'E' in format, copy all vectors.
+	* emit-rtl.c (copy_insn_1): Don't clear used flag here.
+	* valtrack.c (cleanup_auto_inc_dec): Likewise.
+	* config/rs6000/rs6000.c (rs6000_frame_related): Likewise.
+
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 425a885..59bd3fe 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -27186,7 +27186,6 @@ rs6000_frame_related (rtx_insn *insn, rtx reg, HOST_WIDE_INT val,
     {
       pat = shallow_copy_rtx (pat);
       XVEC (pat, 0) = shallow_copy_rtvec (XVEC (pat, 0));
-      RTX_FLAG (pat, used) = 0;
 
       for (int i = 0; i < XVECLEN (pat, 0); i++)
 	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index d2ac88b..4650540 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -5552,10 +5552,6 @@ copy_insn_1 (rtx orig)
      us to explicitly document why we are *not* copying a flag.  */
   copy = shallow_copy_rtx (orig);
 
-  /* We do not copy the USED flag, which is used as a mark bit during
-     walks over the RTL.  */
-  RTX_FLAG (copy, used) = 0;
-
   /* We do not copy JUMP, CALL, or FRAME_RELATED for INSNs.  */
   if (INSN_P (orig))
     {
diff --git a/gcc/rtl.c b/gcc/rtl.c
index 3fac1931..0410f01 100644
--- a/gcc/rtl.c
+++ b/gcc/rtl.c
@@ -318,10 +318,6 @@ copy_rtx (rtx orig)
      us to explicitly document why we are *not* copying a flag.  */
   copy = shallow_copy_rtx (orig);
 
-  /* We do not copy the USED flag, which is used as a mark bit during
-     walks over the RTL.  */
-  RTX_FLAG (copy, used) = 0;
-
   format_ptr = GET_RTX_FORMAT (GET_CODE (copy));
 
   for (i = 0; i < GET_RTX_LENGTH (GET_CODE (copy)); i++)
@@ -367,7 +363,29 @@ shallow_copy_rtx_stat (const_rtx orig MEM_STAT_DECL)
 {
   const unsigned int size = rtx_size (orig);
   rtx const copy = ggc_alloc_rtx_def_stat (size PASS_MEM_STAT);
-  return (rtx) memcpy (copy, orig, size);
+  memcpy (copy, orig, size);
+  switch (GET_CODE (orig))
+    {
+      /* RTX codes copy_rtx_if_shared_1 considers are shareable,
+	 the used flag is often used for other purposes.  */
+    case REG:
+    case DEBUG_EXPR:
+    case VALUE:
+    CASE_CONST_ANY:
+    case SYMBOL_REF:
+    case CODE_LABEL:
+    case PC:
+    case CC0:
+    case RETURN:
+    case SIMPLE_RETURN:
+    case SCRATCH:
+      break;
+    default:
+      /* For all other RTXes clear the used flag on the copy.  */
+      RTX_FLAG (copy, used) = 0;
+      break;
+    }
+  return copy;
 }
 
 /* Nonzero when we are generating CONCATs.  */
diff --git a/gcc/valtrack.c b/gcc/valtrack.c
index 9a1ae2d..002f49f 100644
--- a/gcc/valtrack.c
+++ b/gcc/valtrack.c
@@ -119,10 +119,6 @@ cleanup_auto_inc_dec (rtx src, machine_mode mem_mode ATTRIBUTE_UNUSED)
      us to explicitly document why we are *not* copying a flag.  */
   x = shallow_copy_rtx (x);
 
-  /* We do not copy the USED flag, which is used as a mark bit during
-     walks over the RTL.  */
-  RTX_FLAG (x, used) = 0;
-
   /* We do not copy FRAME_RELATED for INSNs.  */
   if (INSN_P (x))
     RTX_FLAG (x, frame_related) = 0;
-- 
cgit v1.1


From 17c69eff82d20174099bad6bbd67dbf5e76c39a5 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 2 Dec 2016 17:28:41 +0100
Subject: re PR target/70322 (STV doesn't optimize andn)

	PR target/70322
	* config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle
	NOT.
	(dimode_scalar_chain::compute_convert_gain): Likewise.
	(dimode_scalar_chain::convert_insn): Likewise.
	* config/i386/i386.md (*one_cmpldi2_doubleword): New
	define_insn_and_split.
	(one_cmpl<mode>2): Use SWIM1248x iterator instead of SWIM.

	* gcc.target/i386/pr70322-1.c: New test.
	* gcc.target/i386/pr70322-2.c: New test.
	* gcc.target/i386/pr70322-3.c: New test.

From-SVN: r243195
---
 gcc/ChangeLog                             |  9 +++++++++
 gcc/config/i386/i386.c                    | 16 +++++++++++++++-
 gcc/config/i386/i386.md                   | 17 +++++++++++++++--
 gcc/testsuite/ChangeLog                   |  7 +++++++
 gcc/testsuite/gcc.target/i386/pr70322-1.c | 12 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr70322-2.c | 12 ++++++++++++
 gcc/testsuite/gcc.target/i386/pr70322-3.c | 13 +++++++++++++
 7 files changed, 83 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr70322-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr70322-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr70322-3.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 68d3588..33fd2eb 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,14 @@
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
+	PR target/70322
+	* config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle
+	NOT.
+	(dimode_scalar_chain::compute_convert_gain): Likewise.
+	(dimode_scalar_chain::convert_insn): Likewise.
+	* config/i386/i386.md (*one_cmpldi2_doubleword): New
+	define_insn_and_split.
+	(one_cmpl<mode>2): Use SWIM1248x iterator instead of SWIM.
+
 	PR target/78614
 	* rtl.c (copy_rtx): Don't clear used flag here.
 	(shallow_copy_rtx_stat): Clear used flag here unless code the rtx
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5678fa2..0bee09b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2826,6 +2826,9 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
 	return false;
       break;
 
+    case NOT:
+      break;
+
     case REG:
       return true;
 
@@ -2848,7 +2851,8 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
 
   if ((GET_MODE (XEXP (src, 0)) != DImode
        && !CONST_INT_P (XEXP (src, 0)))
-      || (GET_MODE (XEXP (src, 1)) != DImode
+      || (GET_CODE (src) != NOT
+	  && GET_MODE (XEXP (src, 1)) != DImode
 	  && !CONST_INT_P (XEXP (src, 1))))
     return false;
 
@@ -3415,6 +3419,8 @@ dimode_scalar_chain::compute_convert_gain ()
 	  if (CONST_INT_P (XEXP (src, 1)))
 	    gain -= vector_const_cost (XEXP (src, 1));
 	}
+      else if (GET_CODE (src) == NOT)
+	gain += ix86_cost->add - COSTS_N_INSNS (1);
       else if (GET_CODE (src) == COMPARE)
 	{
 	  /* Assume comparison cost is the same.  */
@@ -3770,6 +3776,14 @@ dimode_scalar_chain::convert_insn (rtx_insn *insn)
       PUT_MODE (src, V2DImode);
       break;
 
+    case NOT:
+      src = XEXP (src, 0);
+      convert_op (&src, insn);
+      subreg = gen_reg_rtx (V2DImode);
+      emit_insn_before (gen_move_insn (subreg, CONSTM1_RTX (V2DImode)), insn);
+      src = gen_rtx_XOR (V2DImode, src, subreg);
+      break;
+
     case MEM:
       if (!REG_P (dst))
 	convert_op (&src, insn);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 583d2bb..da7cb07 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -9312,9 +9312,22 @@
 
 ;; One complement instructions
 
+(define_insn_and_split "*one_cmpldi2_doubleword"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
+	(not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))]
+  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2
+   && ix86_unary_operator_ok (NOT, DImode, operands)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(not:SI (match_dup 1)))
+   (set (match_dup 2)
+	(not:SI (match_dup 3)))]
+  "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")
+
 (define_expand "one_cmpl<mode>2"
-  [(set (match_operand:SWIM 0 "nonimmediate_operand")
-	(not:SWIM (match_operand:SWIM 1 "nonimmediate_operand")))]
+  [(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
+	(not:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")))]
   ""
   "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7cb66d9..3a478b9 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/70322
+	* gcc.target/i386/pr70322-1.c: New test.
+	* gcc.target/i386/pr70322-2.c: New test.
+	* gcc.target/i386/pr70322-3.c: New test.
+
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-1.c b/gcc/testsuite/gcc.target/i386/pr70322-1.c
new file mode 100644
index 0000000..bc10675
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70322-1.c
@@ -0,0 +1,12 @@
+/* PR target/70322 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse2 -mstv -mbmi" } */
+/* { dg-final { scan-assembler "pandn" } } */
+
+extern long long z;
+
+void
+foo (long long x, long long y)
+{
+  z = ~x & y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-2.c b/gcc/testsuite/gcc.target/i386/pr70322-2.c
new file mode 100644
index 0000000..7c5d0be
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70322-2.c
@@ -0,0 +1,12 @@
+/* PR target/70322 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse2 -mstv -mno-bmi" } */
+/* { dg-final { scan-assembler "pandn" { xfail *-*-* } } } */
+
+extern long long z;
+
+void
+foo (long long x, long long y)
+{
+  z = ~x & y;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-3.c b/gcc/testsuite/gcc.target/i386/pr70322-3.c
new file mode 100644
index 0000000..89a8da3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70322-3.c
@@ -0,0 +1,13 @@
+/* PR target/70322 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse2 -mstv" } */
+/* { dg-final { scan-assembler "pxor" } } */
+/* { dg-final { scan-assembler "por" } } */
+
+extern long long z;
+
+void
+foo (long long x, long long y)
+{
+  z = ~x | y;
+}
-- 
cgit v1.1


From d2c82d98de3100a10731d982bf3fe571dba03e13 Mon Sep 17 00:00:00 2001
From: Tadek Kijkowski <tkijkowski@gmail.com>
Date: Fri, 2 Dec 2016 16:34:28 +0000
Subject: Makefile.in (PREPROCESSOR_DEFINES): Add a level of indirection for
 several include directories that may be relative...

	* Makefile.in (PREPROCESSOR_DEFINES): Add a level of indirection
	for several include directories that may be relative to sysroot.
	* config/i386/x-mingw32 (gplus_includedir): Define.
	(gplus_tool_includedir, gplus_backward_include_dir): Likewise.
	(native_system_includedir): Likewise.
	* config/i386/mingw32.h (STANDARD_STARTFILE_PREFIX_1): Do not
	override if TARGET_SYSTEM_ROOT is defined.
	(NATIVE_SYSTEM_HEADER_DIR): Likewise.

From-SVN: r243196
---
 gcc/ChangeLog             | 11 +++++++++++
 gcc/Makefile.in           | 17 +++++++++++++----
 gcc/config/i386/mingw32.h |  4 ++++
 gcc/config/i386/x-mingw32 | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 33fd2eb..c984483 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2016-12-02  Tadek Kijkowski  <tkijkowski@gmail.com>
+
+	* Makefile.in (PREPROCESSOR_DEFINES): Add a level of indirection
+	for several include directories that may be relative to sysroot.
+	* config/i386/x-mingw32 (gplus_includedir): Define.
+	(gplus_tool_includedir, gplus_backward_include_dir): Likewise.
+	(native_system_includedir): Likewise.
+	* config/i386/mingw32.h (STANDARD_STARTFILE_PREFIX_1): Do not
+	override if TARGET_SYSTEM_ROOT is defined.
+	(NATIVE_SYSTEM_HEADER_DIR): Likewise.
+
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR target/70322
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index df4f64f..c7b1eaf 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -675,6 +675,15 @@ objext = .o
 exeext = @host_exeext@
 build_exeext = @build_exeext@
 
+
+# This allows overriding include paths in host specific Makefile
+# (config/i386/x-mingw32 overrides those variables and local_includedir).
+gplusplus_includedir = $(gcc_gxx_include_dir)
+gplusplus_tool_includedir = $(gcc_gxx_include_dir)/$(target_noncanonical)
+gplusplus_backward_includedir = $(gcc_gxx_include_dir)/backward
+native_system_includedir = $(NATIVE_SYSTEM_HEADER_DIR)
+
+
 # Directory in which to put man pages.
 mandir = @mandir@
 man1dir = $(mandir)/man1
@@ -2768,14 +2777,14 @@ CFLAGS-intl.o += -DLOCALEDIR=\"$(localedir)\"
 PREPROCESSOR_DEFINES = \
   -DGCC_INCLUDE_DIR=\"$(libsubdir)/include\" \
   -DFIXED_INCLUDE_DIR=\"$(libsubdir)/include-fixed\" \
-  -DGPLUSPLUS_INCLUDE_DIR=\"$(gcc_gxx_include_dir)\" \
+  -DGPLUSPLUS_INCLUDE_DIR=\"$(gplusplus_includedir)\" \
   -DGPLUSPLUS_INCLUDE_DIR_ADD_SYSROOT=$(gcc_gxx_include_dir_add_sysroot) \
-  -DGPLUSPLUS_TOOL_INCLUDE_DIR=\"$(gcc_gxx_include_dir)/$(target_noncanonical)\" \
-  -DGPLUSPLUS_BACKWARD_INCLUDE_DIR=\"$(gcc_gxx_include_dir)/backward\" \
+  -DGPLUSPLUS_TOOL_INCLUDE_DIR=\"$(gplusplus_tool_includedir)\" \
+  -DGPLUSPLUS_BACKWARD_INCLUDE_DIR=\"$(gplusplus_backward_includedir)\" \
   -DLOCAL_INCLUDE_DIR=\"$(local_includedir)\" \
   -DCROSS_INCLUDE_DIR=\"$(CROSS_SYSTEM_HEADER_DIR)\" \
   -DTOOL_INCLUDE_DIR=\"$(gcc_tooldir)/include\" \
-  -DNATIVE_SYSTEM_HEADER_DIR=\"$(NATIVE_SYSTEM_HEADER_DIR)\" \
+  -DNATIVE_SYSTEM_HEADER_DIR=\"$(native_system_includedir)\" \
   -DPREFIX=\"$(prefix)/\" \
   -DSTANDARD_EXEC_PREFIX=\"$(libdir)/gcc/\" \
   @TARGET_SYSTEM_ROOT_DEFINE@
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
index ac4aa0d..258d3cd 100644
--- a/gcc/config/i386/mingw32.h
+++ b/gcc/config/i386/mingw32.h
@@ -161,6 +161,8 @@ along with GCC; see the file COPYING3.  If not see
     fvtable-verify=std:vtv_end.o%s} \
   crtend.o%s"
 
+#if !defined(TARGET_SYSTEM_ROOT)
+
 /* Override startfile prefix defaults.  */
 #ifndef STANDARD_STARTFILE_PREFIX_1
 #define STANDARD_STARTFILE_PREFIX_1 "/mingw/lib/"
@@ -174,6 +176,8 @@ along with GCC; see the file COPYING3.  If not see
 #undef NATIVE_SYSTEM_HEADER_DIR
 #define NATIVE_SYSTEM_HEADER_DIR "/mingw/include"
 
+#endif /* !defined(TARGET_SYSTEM_ROOT) */
+
 /* Output STRING, a string representing a filename, to FILE.
    We canonicalize it to be in Unix format (backslashes are replaced
    forward slashes.  */
diff --git a/gcc/config/i386/x-mingw32 b/gcc/config/i386/x-mingw32
index 1d28a70..6eb6167 100644
--- a/gcc/config/i386/x-mingw32
+++ b/gcc/config/i386/x-mingw32
@@ -16,10 +16,44 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 #
+
+# MSYS will zealously translate all paths to Windows form, so /usr/include becomes c:/msysX/usr/include.
+# This is undesirable when TARGET_SYSTEM_ROOT is specified, so this function converts /usr/include to //usr\include,
+# which will become /usr/include again when passed to gcc.
+
+# This function takes two parameters: first parameter is include directory path, second parameter tells
+# if the path is relative to TARGET_SYSTEM_ROOT.
+# If TARGET_SYSTEM_ROOT is not configured, or
+#   this function always expands to the unmodified first parameter
+# if TARGET_SYSTEM_ROOT is configured, but second parameter is not 1,
+#   this function again expands to the unmodified first parameter
+# otherwise,
+#  it expands to a shell expression which will transform the first parameter as described above.
+ifneq ($(TARGET_SYSTEM_ROOT),)
+sysroot_relative_path = $(if $(filter 1,$(2)),`echo "$(1)" | tr '/' '\\\\' | sed 's|^\\\\|//|'`,$(1))
+else
+sysroot_relative_path = $(1)
+endif
+
+ifneq ($(TARGET_SYSTEM_ROOT),)
+#
+# Make sure that relative the path is not converted to absolute DOS style path
+#
+local_includedir = $(call sysroot_relative_path,$(local_prefix)/include,1)
+else
 #
 # Make local_includedir relative to EXEC_PREFIX 
 #
 local_includedir=$(libsubdir)/$(unlibsubdir)/..`echo $(exec_prefix) | sed -e 's|^$(prefix)||' -e 's|/[^/]*|/..|g'`/include
+endif
+
+#
+# Make sure that relative path are not converted to absolute DOS style paths
+#
+gplusplus_includedir = $(call sysroot_relative_path,$(gcc_gxx_include_dir),$(gcc_gxx_include_dir_add_sysroot))
+gplusplus_tool_includedir = $(call sysroot_relative_path,$(gcc_gxx_include_dir)/$(target_noncanonical),$(gcc_gxx_include_dir_add_sysroot))
+gplusplus_backward_includedir = $(call sysroot_relative_path,$(gcc_gxx_include_dir)/backward,$(gcc_gxx_include_dir_add_sysroot))
+native_system_includedir = $(call sysroot_relative_path,$(NATIVE_SYSTEM_HEADER_DIR),1)
 
 # On MinGW, we use "%IA64d" to print 64-bit integers, and the format-checking
 # code does not handle that, so we have to disable checking here.
-- 
cgit v1.1


From 2e3af7e2f4b217104b1706994d9f384083a6d98c Mon Sep 17 00:00:00 2001
From: Uros Bizjak <uros@gcc.gnu.org>
Date: Fri, 2 Dec 2016 17:53:23 +0100
Subject: alpha.md (exception_receiver): Copy alpha_gp_ave_rtx return value.

	* config/alpha/alpha.md (exception_receiver): Copy
	alpha_gp_ave_rtx return value.

From-SVN: r243197
---
 gcc/ChangeLog             | 10 +++++++---
 gcc/config/alpha/alpha.md |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c984483..29e40ae 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/alpha/alpha.md (exception_receiver): Copy
+	alpha_gp_ave_rtx return value.
+
 2016-12-02  Tadek Kijkowski  <tkijkowski@gmail.com>
 
 	* Makefile.in (PREPROCESSOR_DEFINES): Add a level of indirection
@@ -12,8 +17,7 @@
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR target/70322
-	* config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle
-	NOT.
+	* config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle NOT.
 	(dimode_scalar_chain::compute_convert_gain): Likewise.
 	(dimode_scalar_chain::convert_insn): Likewise.
 	* config/i386/i386.md (*one_cmpldi2_doubleword): New
@@ -341,7 +345,7 @@
 	early_dwarf_finished.
 
 2016-12-01  Eric Botcazou  <ebotcazou@adacore.com>
-            David S. Miller  <davem@davemloft.net>
+	    David S. Miller  <davem@davemloft.net>
 
 	* config/sparc/sparc.opt (mlra): New target option.
 	* config/sparc/sparc.c (TARGET_LRA_P): Define to...
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 3e4594b..0ed29de 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -5142,7 +5142,7 @@
   "TARGET_ABI_OSF"
 {
   if (flag_reorder_blocks_and_partition)
-    operands[0] = alpha_gp_save_rtx ();
+    operands[0] = copy_rtx (alpha_gp_save_rtx ());
   else
     operands[0] = const0_rtx;
 })
-- 
cgit v1.1


From 8a87dced20fc6cc3951038df3bdc30e453af5fb9 Mon Sep 17 00:00:00 2001
From: Martin Jambor <mjambor@suse.cz>
Date: Fri, 2 Dec 2016 18:05:10 +0100
Subject: Move rebuild_cfg to the end of build_ssa_passes

2016-12-02  Martin Jambor  <mjambor@suse.cz>

	* passes.def: Move pass_rebuild_cgraph_edges to the end of
	pass_build_ssa_passes.

From-SVN: r243199
---
 gcc/ChangeLog  | 5 +++++
 gcc/passes.def | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 29e40ae..47b3f84 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Martin Jambor  <mjambor@suse.cz>
+
+	* passes.def: Move pass_rebuild_cgraph_edges to the end of
+	pass_build_ssa_passes.
+
 2016-12-02  Uros Bizjak  <ubizjak@gmail.com>
 
 	* config/alpha/alpha.md (exception_receiver): Copy
diff --git a/gcc/passes.def b/gcc/passes.def
index 1117b8b..7b12a41 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -56,12 +56,12 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_build_ssa_passes);
   PUSH_INSERT_PASSES_WITHIN (pass_build_ssa_passes)
       NEXT_PASS (pass_fixup_cfg);
-      NEXT_PASS (pass_rebuild_cgraph_edges);
       NEXT_PASS (pass_build_ssa);
       NEXT_PASS (pass_warn_nonnull_compare);
       NEXT_PASS (pass_ubsan);
       NEXT_PASS (pass_early_warn_uninitialized);
       NEXT_PASS (pass_nothrow);
+      NEXT_PASS (pass_rebuild_cgraph_edges);
   POP_INSERT_PASSES ()
 
   NEXT_PASS (pass_chkp_instrumentation_passes);
-- 
cgit v1.1


From 827ab47ab1f9f9b9b108a252b7a43c3c7bc828b7 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Fri, 2 Dec 2016 17:13:08 +0000
Subject: [AArch64] Separate shrink wrapping hooks implementation

	* config/aarch64/aarch64.h (machine_function): Add
	reg_is_wrapped_separately field.
	* config/aarch64/aarch64.md (LAST_SAVED_REGNUM): Define new constant.
	* config/aarch64/aarch64.c (emit_set_insn): Change return type to
	rtx_insn *.
	(aarch64_save_callee_saves): Don't save registers that are wrapped
	separately.
	(aarch64_restore_callee_saves): Don't restore registers that are
	wrapped separately.
	(offset_9bit_signed_unscaled_p, offset_12bit_unsigned_scaled_p,
	aarch64_offset_7bit_signed_scaled_p): Move earlier in the file.
	(aarch64_get_separate_components): New function.
	(aarch64_get_next_set_bit): Likewise.
	(aarch64_components_for_bb): Likewise.
	(aarch64_disqualify_components): Likewise.
	(aarch64_emit_prologue_components): Likewise.
	(aarch64_emit_epilogue_components): Likewise.
	(aarch64_set_handled_components): Likewise.
	(aarch64_process_components): Likewise.
	(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS,
	TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB,
	TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS,
	TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS,
	TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS,
	TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Define.

From-SVN: r243200
---
 gcc/ChangeLog                 |  28 ++++
 gcc/config/aarch64/aarch64.c  | 296 ++++++++++++++++++++++++++++++++++++++----
 gcc/config/aarch64/aarch64.h  |   2 +
 gcc/config/aarch64/aarch64.md |   1 +
 4 files changed, 303 insertions(+), 24 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 47b3f84..7957649 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,31 @@
+2016-12-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/aarch64/aarch64.h (machine_function): Add
+	reg_is_wrapped_separately field.
+	* config/aarch64/aarch64.md (LAST_SAVED_REGNUM): Define new constant.
+	* config/aarch64/aarch64.c (emit_set_insn): Change return type to
+	rtx_insn *.
+	(aarch64_save_callee_saves): Don't save registers that are wrapped
+	separately.
+	(aarch64_restore_callee_saves): Don't restore registers that are
+	wrapped separately.
+	(offset_9bit_signed_unscaled_p, offset_12bit_unsigned_scaled_p,
+	aarch64_offset_7bit_signed_scaled_p): Move earlier in the file.
+	(aarch64_get_separate_components): New function.
+	(aarch64_get_next_set_bit): Likewise.
+	(aarch64_components_for_bb): Likewise.
+	(aarch64_disqualify_components): Likewise.
+	(aarch64_emit_prologue_components): Likewise.
+	(aarch64_emit_epilogue_components): Likewise.
+	(aarch64_set_handled_components): Likewise.
+	(aarch64_process_components): Likewise.
+	(TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS,
+	TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB,
+	TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS,
+	TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS,
+	TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS,
+	TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS): Define.
+
 2016-12-02  Martin Jambor  <mjambor@suse.cz>
 
 	* passes.def: Move pass_rebuild_cgraph_edges to the end of
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 68a3380..af3aa0b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1137,7 +1137,7 @@ aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
 
 /* Emit an insn that's a simple single-set.  Both the operands must be
    known to be valid.  */
-inline static rtx
+inline static rtx_insn *
 emit_set_insn (rtx x, rtx y)
 {
   return emit_insn (gen_rtx_SET (x, y));
@@ -3134,6 +3134,9 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
 	      || regno == cfun->machine->frame.wb_candidate2))
 	continue;
 
+      if (cfun->machine->reg_is_wrapped_separately[regno])
+       continue;
+
       reg = gen_rtx_REG (mode, regno);
       offset = start_offset + cfun->machine->frame.reg_offset[regno];
       mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
@@ -3142,6 +3145,7 @@ aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
       regno2 = aarch64_next_callee_save (regno + 1, limit);
 
       if (regno2 <= limit
+	  && !cfun->machine->reg_is_wrapped_separately[regno2]
 	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
 	      == cfun->machine->frame.reg_offset[regno2]))
 
@@ -3190,6 +3194,9 @@ aarch64_restore_callee_saves (machine_mode mode,
        regno <= limit;
        regno = aarch64_next_callee_save (regno + 1, limit))
     {
+      if (cfun->machine->reg_is_wrapped_separately[regno])
+       continue;
+
       rtx reg, mem;
 
       if (skip_wb
@@ -3204,6 +3211,7 @@ aarch64_restore_callee_saves (machine_mode mode,
       regno2 = aarch64_next_callee_save (regno + 1, limit);
 
       if (regno2 <= limit
+	  && !cfun->machine->reg_is_wrapped_separately[regno2]
 	  && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
 	      == cfun->machine->frame.reg_offset[regno2]))
 	{
@@ -3223,6 +3231,245 @@ aarch64_restore_callee_saves (machine_mode mode,
     }
 }
 
+static inline bool
+offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
+			       HOST_WIDE_INT offset)
+{
+  return offset >= -256 && offset < 256;
+}
+
+static inline bool
+offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
+{
+  return (offset >= 0
+	  && offset < 4096 * GET_MODE_SIZE (mode)
+	  && offset % GET_MODE_SIZE (mode) == 0);
+}
+
+bool
+aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
+{
+  return (offset >= -64 * GET_MODE_SIZE (mode)
+	  && offset < 64 * GET_MODE_SIZE (mode)
+	  && offset % GET_MODE_SIZE (mode) == 0);
+}
+
+/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
+
+static sbitmap
+aarch64_get_separate_components (void)
+{
+  aarch64_layout_frame ();
+
+  sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
+  bitmap_clear (components);
+
+  /* The registers we need saved to the frame.  */
+  for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
+    if (aarch64_register_saved_on_entry (regno))
+      {
+	HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
+	if (!frame_pointer_needed)
+	  offset += cfun->machine->frame.frame_size
+		    - cfun->machine->frame.hard_fp_offset;
+	/* Check that we can access the stack slot of the register with one
+	   direct load with no adjustments needed.  */
+	if (offset_12bit_unsigned_scaled_p (DImode, offset))
+	  bitmap_set_bit (components, regno);
+      }
+
+  /* Don't mess with the hard frame pointer.  */
+  if (frame_pointer_needed)
+    bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+  unsigned reg1 = cfun->machine->frame.wb_candidate1;
+  unsigned reg2 = cfun->machine->frame.wb_candidate2;
+  /* If aarch64_layout_frame has chosen registers to store/restore with
+     writeback don't interfere with them to avoid having to output explicit
+     stack adjustment instructions.  */
+  if (reg2 != INVALID_REGNUM)
+    bitmap_clear_bit (components, reg2);
+  if (reg1 != INVALID_REGNUM)
+    bitmap_clear_bit (components, reg1);
+
+  bitmap_clear_bit (components, LR_REGNUM);
+  bitmap_clear_bit (components, SP_REGNUM);
+
+  return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
+
+static sbitmap
+aarch64_components_for_bb (basic_block bb)
+{
+  bitmap in = DF_LIVE_IN (bb);
+  bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
+  bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
+
+  sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
+  bitmap_clear (components);
+
+  /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
+  for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
+    if ((!call_used_regs[regno])
+       && (bitmap_bit_p (in, regno)
+	   || bitmap_bit_p (gen, regno)
+	   || bitmap_bit_p (kill, regno)))
+	  bitmap_set_bit (components, regno);
+
+  return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.
+   Nothing to do for aarch64.  */
+
+static void
+aarch64_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+}
+
+/* Return the next set bit in BMP from START onwards.  Return the total number
+   of bits in BMP if no set bit is found at or after START.  */
+
+static unsigned int
+aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
+{
+  unsigned int nbits = SBITMAP_SIZE (bmp);
+  if (start == nbits)
+    return start;
+
+  gcc_assert (start < nbits);
+  for (unsigned int i = start; i < nbits; i++)
+    if (bitmap_bit_p (bmp, i))
+      return i;
+
+  return nbits;
+}
+
+/* Do the work for aarch64_emit_prologue_components and
+   aarch64_emit_epilogue_components.  COMPONENTS is the bitmap of registers
+   to save/restore, PROLOGUE_P indicates whether to emit the prologue sequence
+   for these components or the epilogue sequence.  That is, it determines
+   whether we should emit stores or loads and what kind of CFA notes to attach
+   to the insns.  Otherwise the logic for the two sequences is very
+   similar.  */
+
+static void
+aarch64_process_components (sbitmap components, bool prologue_p)
+{
+  rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
+			     ? HARD_FRAME_POINTER_REGNUM
+			     : STACK_POINTER_REGNUM);
+
+  unsigned last_regno = SBITMAP_SIZE (components);
+  unsigned regno = aarch64_get_next_set_bit (components, R0_REGNUM);
+  rtx_insn *insn = NULL;
+
+  while (regno != last_regno)
+    {
+      /* AAPCS64 section 5.1.2 requires only the bottom 64 bits to be saved
+	 so DFmode for the vector registers is enough.  */
+      machine_mode mode = GP_REGNUM_P (regno) ? DImode : DFmode;
+      rtx reg = gen_rtx_REG (mode, regno);
+      HOST_WIDE_INT offset = cfun->machine->frame.reg_offset[regno];
+      if (!frame_pointer_needed)
+	offset += cfun->machine->frame.frame_size
+		  - cfun->machine->frame.hard_fp_offset;
+      rtx addr = plus_constant (Pmode, ptr_reg, offset);
+      rtx mem = gen_frame_mem (mode, addr);
+
+      rtx set = prologue_p ? gen_rtx_SET (mem, reg) : gen_rtx_SET (reg, mem);
+      unsigned regno2 = aarch64_get_next_set_bit (components, regno + 1);
+      /* No more registers to handle after REGNO.
+	 Emit a single save/restore and exit.  */
+      if (regno2 == last_regno)
+	{
+	  insn = emit_insn (set);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  if (prologue_p)
+	    add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
+	  else
+	    add_reg_note (insn, REG_CFA_RESTORE, reg);
+	  break;
+	}
+
+      HOST_WIDE_INT offset2 = cfun->machine->frame.reg_offset[regno2];
+      /* The next register is not of the same class or its offset is not
+	 mergeable with the current one into a pair.  */
+      if (!satisfies_constraint_Ump (mem)
+	  || GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
+	  || (offset2 - cfun->machine->frame.reg_offset[regno])
+		!= GET_MODE_SIZE (mode))
+	{
+	  insn = emit_insn (set);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  if (prologue_p)
+	    add_reg_note (insn, REG_CFA_OFFSET, copy_rtx (set));
+	  else
+	    add_reg_note (insn, REG_CFA_RESTORE, reg);
+
+	  regno = regno2;
+	  continue;
+	}
+
+      /* REGNO2 can be saved/restored in a pair with REGNO.  */
+      rtx reg2 = gen_rtx_REG (mode, regno2);
+      if (!frame_pointer_needed)
+	offset2 += cfun->machine->frame.frame_size
+		  - cfun->machine->frame.hard_fp_offset;
+      rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
+      rtx mem2 = gen_frame_mem (mode, addr2);
+      rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
+			     : gen_rtx_SET (reg2, mem2);
+
+      if (prologue_p)
+	insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, reg2));
+      else
+	insn = emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
+
+      RTX_FRAME_RELATED_P (insn) = 1;
+      if (prologue_p)
+	{
+	  add_reg_note (insn, REG_CFA_OFFSET, set);
+	  add_reg_note (insn, REG_CFA_OFFSET, set2);
+	}
+      else
+	{
+	  add_reg_note (insn, REG_CFA_RESTORE, reg);
+	  add_reg_note (insn, REG_CFA_RESTORE, reg2);
+	}
+
+      regno = aarch64_get_next_set_bit (components, regno2 + 1);
+    }
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
+
+static void
+aarch64_emit_prologue_components (sbitmap components)
+{
+  aarch64_process_components (components, true);
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
+
+static void
+aarch64_emit_epilogue_components (sbitmap components)
+{
+  aarch64_process_components (components, false);
+}
+
+/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
+
+static void
+aarch64_set_handled_components (sbitmap components)
+{
+  for (unsigned regno = 0; regno <= LAST_SAVED_REGNUM; regno++)
+    if (bitmap_bit_p (components, regno))
+      cfun->machine->reg_is_wrapped_separately[regno] = true;
+}
+
 /* AArch64 stack frames generated by this compiler look like:
 
 	+-------------------------------+
@@ -3981,29 +4228,6 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
   return false;
 }
 
-bool
-aarch64_offset_7bit_signed_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
-{
-  return (offset >= -64 * GET_MODE_SIZE (mode)
-	  && offset < 64 * GET_MODE_SIZE (mode)
-	  && offset % GET_MODE_SIZE (mode) == 0);
-}
-
-static inline bool
-offset_9bit_signed_unscaled_p (machine_mode mode ATTRIBUTE_UNUSED,
-			       HOST_WIDE_INT offset)
-{
-  return offset >= -256 && offset < 256;
-}
-
-static inline bool
-offset_12bit_unsigned_scaled_p (machine_mode mode, HOST_WIDE_INT offset)
-{
-  return (offset >= 0
-	  && offset < 4096 * GET_MODE_SIZE (mode)
-	  && offset % GET_MODE_SIZE (mode) == 0);
-}
-
 /* Return true if MODE is one of the modes for which we
    support LDP/STP operations.  */
 
@@ -14567,6 +14791,30 @@ aarch64_libgcc_floating_mode_supported_p
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
   aarch64_first_cycle_multipass_dfa_lookahead_guard
 
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS \
+  aarch64_get_separate_components
+
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB \
+  aarch64_components_for_bb
+
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS \
+  aarch64_disqualify_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+  aarch64_emit_prologue_components
+
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+  aarch64_emit_epilogue_components
+
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS \
+  aarch64_set_handled_components
+
 #undef TARGET_TRAMPOLINE_INIT
 #define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init
 
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 584ff5c..c417569 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -591,6 +591,8 @@ struct GTY (()) aarch64_frame
 typedef struct GTY (()) machine_function
 {
   struct aarch64_frame frame;
+  /* One entry for each hard register.  */
+  bool reg_is_wrapped_separately[LAST_SAVED_REGNUM];
 } machine_function;
 #endif
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index bc6d8a2..1e6b6f5 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -59,6 +59,7 @@
     (V0_REGNUM		32)
     (V15_REGNUM		47)
     (V31_REGNUM		63)
+    (LAST_SAVED_REGNUM	63)
     (SFP_REGNUM		64)
     (AP_REGNUM		65)
     (CC_REGNUM		66)
-- 
cgit v1.1


From 6ed022af2addc38c0d7c2b321d279a48f73f11c4 Mon Sep 17 00:00:00 2001
From: Janus Weil <janus@gcc.gnu.org>
Date: Fri, 2 Dec 2016 19:38:24 +0100
Subject: [multiple changes]

2016-12-02  Janus Weil  <janus@gcc.gnu.org>
	    Steven G. Kargl  <kargl@gcc.gnu.org>

	PR fortran/78618
	* check.c (gfc_check_rank): Remove ATTRIBUTE_UNUSED.
	* expr.c (gfc_check_assign): Fix error propagation.

2016-12-02  Steven G. Kargl  <kargl@gcc.gnu.org>

	PR fortran/78618
	* gfortran.dg/char_conversion.f90: New test.

From-SVN: r243201
---
 gcc/fortran/ChangeLog                         |  7 +++++++
 gcc/fortran/check.c                           |  2 +-
 gcc/fortran/expr.c                            |  6 +++---
 gcc/testsuite/ChangeLog                       |  5 +++++
 gcc/testsuite/gfortran.dg/char_conversion.f90 | 10 ++++++++++
 5 files changed, 26 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/char_conversion.f90

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 20a9f2e..b11a999 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-02  Janus Weil  <janus@gcc.gnu.org>
+	    Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/78618
+	* check.c (gfc_check_rank): Remove ATTRIBUTE_UNUSED.
+	* expr.c (gfc_check_assign): Fix error propagation.
+
 2016-12-01  Elizebeth Punnoose  <elizebeth.punnoose@hpe.com>
 
 	PR fortran/77505
diff --git a/gcc/fortran/check.c b/gcc/fortran/check.c
index 3b80156..3ea8391 100644
--- a/gcc/fortran/check.c
+++ b/gcc/fortran/check.c
@@ -3667,7 +3667,7 @@ gfc_check_range (gfc_expr *x)
 
 
 bool
-gfc_check_rank (gfc_expr *a ATTRIBUTE_UNUSED)
+gfc_check_rank (gfc_expr *a)
 {
   /* Any data object is allowed; a "data object" is a "constant (4.1.3),
      variable (6), or subobject of a constant (2.4.3.2.3)" (F2008, 1.3.45).  */
diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c
index 60f6080..c01418d 100644
--- a/gcc/fortran/expr.c
+++ b/gcc/fortran/expr.c
@@ -3314,9 +3314,9 @@ gfc_check_assign (gfc_expr *lvalue, gfc_expr *rvalue, int conform,
   if (lvalue->ts.type == BT_CHARACTER && rvalue->ts.type == BT_CHARACTER)
     {
       if (lvalue->ts.kind != rvalue->ts.kind && allow_convert)
-	gfc_convert_chartype (rvalue, &lvalue->ts);
-
-      return true;
+	return gfc_convert_chartype (rvalue, &lvalue->ts);
+      else
+	return true;
     }
 
   if (!allow_convert)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3a478b9..d7b90d6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	PR fortran/78618
+	* gfortran.dg/char_conversion.f90: New test.
+
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR target/70322
diff --git a/gcc/testsuite/gfortran.dg/char_conversion.f90 b/gcc/testsuite/gfortran.dg/char_conversion.f90
new file mode 100644
index 0000000..7120a1c
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/char_conversion.f90
@@ -0,0 +1,10 @@
+! { dg-do compile }
+!
+! PR 78618: ICE in gfc_check_rank, at fortran/check.c:3670
+!
+! Contributed by Gerhard Steinmetz <gerhard.steinmetz.fortran@t-online.de>
+
+program p
+   character, parameter :: c = char(256,4) ! { dg-error "cannot be converted" }
+   if (rank(c) /= 0) call abort
+end
-- 
cgit v1.1


From 81cff75ff9178e9344e1f4c8935ffd451f66632f Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Fri, 2 Dec 2016 19:48:35 +0100
Subject: re PR target/70322 (STV doesn't optimize andn)

	PR target/70322
	* config/i386/i386.md (*andndi3_doubleword): Add non-BMI alternative
	and corresponding post-reload splitter.

testsuite/ChangeLog:

	PR target/70322
	* gcc.target/i386/pr70322-2.c (dg-final): Remove xfail.

From-SVN: r243202
---
 gcc/ChangeLog                             |  6 ++++
 gcc/config/i386/i386.md                   | 51 +++++++++++++++++++++++--------
 gcc/testsuite/ChangeLog                   |  5 +++
 gcc/testsuite/gcc.target/i386/pr70322-2.c |  2 +-
 4 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 7957649..ee603c4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-02  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/70322
+	* config/i386/i386.md (*andndi3_doubleword): Add non-BMI alternative
+	and corresponding post-reload splitter.
+
 2016-12-02  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
 	* config/aarch64/aarch64.h (machine_function): Add
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index da7cb07..773f29b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -8534,15 +8534,24 @@
   operands[2] = gen_lowpart (QImode, operands[2]);
 })
 
-(define_insn_and_split "*andndi3_doubleword"
-  [(set (match_operand:DI 0 "register_operand" "=r")
+(define_insn "*andndi3_doubleword"
+  [(set (match_operand:DI 0 "register_operand" "=r,&r")
 	(and:DI
-	  (not:DI (match_operand:DI 1 "register_operand" "r"))
-	  (match_operand:DI 2 "nonimmediate_operand" "rm")))
+	  (not:DI (match_operand:DI 1 "register_operand" "r,0"))
+	  (match_operand:DI 2 "nonimmediate_operand" "rm,rm")))
    (clobber (reg:CC FLAGS_REG))]
-  "TARGET_BMI && !TARGET_64BIT && TARGET_STV && TARGET_SSE2"
+  "!TARGET_64BIT && TARGET_STV && TARGET_SSE2"
   "#"
-  "&& reload_completed"
+  [(set_attr "isa" "bmi,*")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI
+	  (not:DI (match_operand:DI 1 "register_operand"))
+	  (match_operand:DI 2 "nonimmediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && TARGET_BMI && TARGET_STV && TARGET_SSE2
+   && reload_completed"
   [(parallel [(set (match_dup 0)
 		   (and:SI (not:SI (match_dup 1)) (match_dup 2)))
 	      (clobber (reg:CC FLAGS_REG))])
@@ -8551,6 +8560,24 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "split_double_mode (DImode, &operands[0], 3, &operands[0], &operands[3]);")
 
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI
+	  (not:DI (match_dup 0))
+	  (match_operand:DI 1 "nonimmediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "!TARGET_64BIT && !TARGET_BMI && TARGET_STV && TARGET_SSE2
+   && reload_completed"
+  [(set (match_dup 0) (not:SI (match_dup 0)))
+   (parallel [(set (match_dup 0)
+		   (and:SI (match_dup 0) (match_dup 1)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 2) (not:SI (match_dup 2)))
+   (parallel [(set (match_dup 2)
+		   (and:SI (match_dup 2) (match_dup 3)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")
+
 (define_insn "*andn<mode>_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r,r")
 	(and:SWI48
@@ -9312,6 +9339,12 @@
 
 ;; One complement instructions
 
+(define_expand "one_cmpl<mode>2"
+  [(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
+	(not:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")))]
+  ""
+  "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
+
 (define_insn_and_split "*one_cmpldi2_doubleword"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=rm")
 	(not:DI (match_operand:DI 1 "nonimmediate_operand" "0")))]
@@ -9325,12 +9358,6 @@
 	(not:SI (match_dup 3)))]
   "split_double_mode (DImode, &operands[0], 2, &operands[0], &operands[2]);")
 
-(define_expand "one_cmpl<mode>2"
-  [(set (match_operand:SWIM1248x 0 "nonimmediate_operand")
-	(not:SWIM1248x (match_operand:SWIM1248x 1 "nonimmediate_operand")))]
-  ""
-  "ix86_expand_unary_operator (NOT, <MODE>mode, operands); DONE;")
-
 (define_insn "*one_cmpl<mode>2_1"
   [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
 	(not:SWI248 (match_operand:SWI248 1 "nonimmediate_operand" "0")))]
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d7b90d6..261731c 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/70322
+	* gcc.target/i386/pr70322-2.c (dg-final): Remove xfail.
+
 2016-12-02  Steven G. Kargl  <kargl@gcc.gnu.org>
 
 	PR fortran/78618
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-2.c b/gcc/testsuite/gcc.target/i386/pr70322-2.c
index 7c5d0be..a683b6d 100644
--- a/gcc/testsuite/gcc.target/i386/pr70322-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr70322-2.c
@@ -1,7 +1,7 @@
 /* PR target/70322 */
 /* { dg-do compile { target ia32 } } */
 /* { dg-options "-O2 -msse2 -mstv -mno-bmi" } */
-/* { dg-final { scan-assembler "pandn" { xfail *-*-* } } } */
+/* { dg-final { scan-assembler "pandn" } } */
 
 extern long long z;
 
-- 
cgit v1.1


From a3f7c896a55c33e369f9bbb210f4b43a345beabe Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Fri, 2 Dec 2016 22:23:22 +0100
Subject: =?UTF-8?q?re=20PR=20c++/78649=20(ICE=20on=20invalid=20C++=20code?=
 =?UTF-8?q?=20on=20x86=5F64-linux-gnu=20(internal=20compiler=20error:=20tr?=
 =?UTF-8?q?ee=20check:=20expected=20class=20=E2=80=98type=E2=80=99,=20have?=
 =?UTF-8?q?=20=E2=80=98exceptional=E2=80=99=20(error=5Fmark)=20in=20build?=
 =?UTF-8?q?=5Fvalue=5Finit=5Fnoctor,=20at=20cp/init.c:380))?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

	PR c++/78649
	* pt.c (tsubst_init): Don't call build_value_init if decl's type
	is error_mark_node.

	* g++.dg/cpp0x/pr78649.C: New test.

From-SVN: r243204
---
 gcc/cp/ChangeLog                     |  6 ++++++
 gcc/cp/pt.c                          |  2 +-
 gcc/testsuite/ChangeLog              |  5 +++++
 gcc/testsuite/g++.dg/cpp0x/pr78649.C | 16 ++++++++++++++++
 4 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/pr78649.C

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index d39c222..f16813d 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/78649
+	* pt.c (tsubst_init): Don't call build_value_init if decl's type
+	is error_mark_node.
+
 2016-12-02  Cesar Philippidis  <cesar@codesourcery.com>
 	    James Norris  <jnorris@codesourcery.com>
 
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 8b0a21c..b51e580 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -14082,7 +14082,7 @@ tsubst_init (tree init, tree decl, tree args,
 
   init = tsubst_expr (init, args, complain, in_decl, false);
 
-  if (!init)
+  if (!init && TREE_TYPE (decl) != error_mark_node)
     {
       /* If we had an initializer but it
 	 instantiated to nothing,
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 261731c..103906b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/78649
+	* g++.dg/cpp0x/pr78649.C: New test.
+
 2016-12-02  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/70322
diff --git a/gcc/testsuite/g++.dg/cpp0x/pr78649.C b/gcc/testsuite/g++.dg/cpp0x/pr78649.C
new file mode 100644
index 0000000..43bcb64
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/pr78649.C
@@ -0,0 +1,16 @@
+// PR c++/78649
+// { dg-do compile { target c++11 } }
+
+template <class> void foo ();
+template <class T, class... U>
+void
+test ()
+{
+  T t (foo<U>...);	// { dg-error "declared void" }
+}
+
+int
+main ()
+{
+  test<void> ();
+}
-- 
cgit v1.1


From 471092175dcb1ee2ecae398f897003310f1c2e24 Mon Sep 17 00:00:00 2001
From: "Steven G. Kargl" <kargl@gcc.gnu.org>
Date: Fri, 2 Dec 2016 22:09:13 +0000
Subject: simplify.c (gfc_convert_char_constant): Free result on error.

2016-12-02  Steven G. Kargl  <kargl@gcc.gnu.org>

	* simplify.c (gfc_convert_char_constant): Free result on error.

From-SVN: r243205
---
 gcc/fortran/ChangeLog  | 4 ++++
 gcc/fortran/simplify.c | 1 +
 2 files changed, 5 insertions(+)

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index b11a999..68d0559 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-02  Steven G. Kargl  <kargl@gcc.gnu.org>
+
+	* simplify.c (gfc_convert_char_constant): Free result on error.
+
 2016-12-02  Janus Weil  <janus@gcc.gnu.org>
 	    Steven G. Kargl  <kargl@gcc.gnu.org>
 
diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c
index 9047c63..a46fbc5 100644
--- a/gcc/fortran/simplify.c
+++ b/gcc/fortran/simplify.c
@@ -7152,6 +7152,7 @@ gfc_convert_char_constant (gfc_expr *e, bt type ATTRIBUTE_UNUSED, int kind)
 		       "into character kind %d",
 		       gfc_print_wide_char (result->value.character.string[i]),
 		       &e->where, kind);
+	    gfc_free_expr (result);
 	    return &gfc_bad_expr;
 	  }
 
-- 
cgit v1.1


From 6556f6516673c0c51167238d5f594d09cb2d435f Mon Sep 17 00:00:00 2001
From: Michael Meissner <meissner@linux.vnet.ibm.com>
Date: Fri, 2 Dec 2016 22:12:08 +0000
Subject: re PR target/78639 (Power9 bad code generation for cactusADM
 benchmark)

2016-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/78639
	* config/rs6000/rs6000.md (movdi_internal64): Fix typo in
	subversion id 242679 that causes the wrong store instruction to be
	generated if a DImode is in an Altivec register using REG+REG
	addressing.

From-SVN: r243206
---
 gcc/ChangeLog               | 8 ++++++++
 gcc/config/rs6000/rs6000.md | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ee603c4..19394b0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/78639
+	* config/rs6000/rs6000.md (movdi_internal64): Fix typo in
+	subversion id 242679 that causes the wrong store instruction to be
+	generated if a DImode is in an Altivec register using REG+REG
+	addressing.
+
 2016-12-02  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/70322
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index f1ba8d4..5a453a0 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8239,7 +8239,7 @@
 (define_insn "*movdi_internal64"
   [(set (match_operand:DI 0 "nonimmediate_operand"
                "=Y,        r,         r,         r,         r,          r,
-                ^m,        ^d,        ^d,        ^Y,        $Z,         $wb,
+                ^m,        ^d,        ^d,        ^wY,       $Z,         $wb,
                 $wv,       ^wi,       *wo,       *wo,       *wv,        *wi,
                 *wi,       *wv,       *wv,       r,         *h,         *h,
                 ?*r,       ?*wg,      ?*r,       ?*wj")
-- 
cgit v1.1


From bf7f70ffb57c900d18fed28ad05fc2876d435b3d Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Fri, 2 Dec 2016 22:39:43 +0000
Subject: selftest.c: remove calls to strndup (PR bootstrap/78616)

gcc/ChangeLog:
	PR bootstrap/78616
	* selftest.c (selftest::assert_strndup_eq): Rename to...
	(selftest::assert_xstrndup_eq): ...this, and remove call to
	strndup.
	(selftest::test_strndup): Rename to...
	(selftest::test_xstrndup): ...this, updating for above renaming.
	(selftest::test_libiberty): Update for renaming.

From-SVN: r243207
---
 gcc/ChangeLog  | 10 ++++++++++
 gcc/selftest.c | 40 +++++++++++++++++-----------------------
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 19394b0..18efa04 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2016-12-02  David Malcolm  <dmalcolm@redhat.com>
+
+	PR bootstrap/78616
+	* selftest.c (selftest::assert_strndup_eq): Rename to...
+	(selftest::assert_xstrndup_eq): ...this, and remove call to
+	strndup.
+	(selftest::test_strndup): Rename to...
+	(selftest::test_xstrndup): ...this, updating for above renaming.
+	(selftest::test_libiberty): Update for renaming.
+
 2016-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/78639
diff --git a/gcc/selftest.c b/gcc/selftest.c
index 6df73c2..40c6cb5 100644
--- a/gcc/selftest.c
+++ b/gcc/selftest.c
@@ -200,41 +200,35 @@ read_file (const location &loc, const char *path)
 
 /* Selftests for libiberty.  */
 
-/* Verify that both strndup and xstrndup generate EXPECTED
-   when called on SRC and N.  */
+/* Verify that xstrndup generates EXPECTED when called on SRC and N.  */
 
 static void
-assert_strndup_eq (const char *expected, const char *src, size_t n)
+assert_xstrndup_eq (const char *expected, const char *src, size_t n)
 {
-  char *buf = strndup (src, n);
-  if (buf)
-    ASSERT_STREQ (expected, buf);
-  free (buf);
-
-  buf = xstrndup (src, n);
+  char *buf = xstrndup (src, n);
   ASSERT_STREQ (expected, buf);
   free (buf);
 }
 
-/* Verify that strndup and xstrndup work as expected.  */
+/* Verify that xstrndup works as expected.  */
 
 static void
-test_strndup ()
+test_xstrndup ()
 {
-  assert_strndup_eq ("", "test", 0);
-  assert_strndup_eq ("t", "test", 1);
-  assert_strndup_eq ("te", "test", 2);
-  assert_strndup_eq ("tes", "test", 3);
-  assert_strndup_eq ("test", "test", 4);
-  assert_strndup_eq ("test", "test", 5);
+  assert_xstrndup_eq ("", "test", 0);
+  assert_xstrndup_eq ("t", "test", 1);
+  assert_xstrndup_eq ("te", "test", 2);
+  assert_xstrndup_eq ("tes", "test", 3);
+  assert_xstrndup_eq ("test", "test", 4);
+  assert_xstrndup_eq ("test", "test", 5);
 
   /* Test on an string without zero termination.  */
   const char src[4] = {'t', 'e', 's', 't'};
-  assert_strndup_eq ("", src, 0);
-  assert_strndup_eq ("t", src, 1);
-  assert_strndup_eq ("te", src, 2);
-  assert_strndup_eq ("tes", src, 3);
-  assert_strndup_eq ("test", src, 4);
+  assert_xstrndup_eq ("", src, 0);
+  assert_xstrndup_eq ("t", src, 1);
+  assert_xstrndup_eq ("te", src, 2);
+  assert_xstrndup_eq ("tes", src, 3);
+  assert_xstrndup_eq ("test", src, 4);
 }
 
 /* Run selftests for libiberty.  */
@@ -242,7 +236,7 @@ test_strndup ()
 static void
 test_libiberty ()
 {
-  test_strndup ();
+  test_xstrndup ();
 }
 
 /* Selftests for the selftest system itself.  */
-- 
cgit v1.1


From 5bd12bafb5bebafa1a96f0f41c598b57c334576f Mon Sep 17 00:00:00 2001
From: Segher Boessenkool <segher@kernel.crashing.org>
Date: Sat, 3 Dec 2016 00:51:31 +0100
Subject: simplify-rtx: Fix the last fix (PR78638)

I managed to get the last obvious fix wrong: mode is M1, GET_MODE (op)
is M2.


	* simplify-rtx.c (simplify_truncation): M2 is not mode, it is
	GET_MODE (op).  Fix this.

From-SVN: r243210
---
 gcc/ChangeLog      | 5 +++++
 gcc/simplify-rtx.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 18efa04..0d57fe3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Segher Boessenkool  <segher@kernel.crashing.org>
+
+	* simplify-rtx.c (simplify_truncation): M2 is not mode, it is
+	GET_MODE (op).  Fix this.
+
 2016-12-02  David Malcolm  <dmalcolm@redhat.com>
 
 	PR bootstrap/78616
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index 7ed849f..165af23 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -752,7 +752,7 @@ simplify_truncation (machine_mode mode, rtx op,
      changing len.  */
   if ((GET_CODE (op) == ZERO_EXTRACT || GET_CODE (op) == SIGN_EXTRACT)
       && REG_P (XEXP (op, 0))
-      && GET_MODE (XEXP (op, 0)) == mode
+      && GET_MODE (XEXP (op, 0)) == GET_MODE (op)
       && CONST_INT_P (XEXP (op, 1))
       && CONST_INT_P (XEXP (op, 2)))
     {
-- 
cgit v1.1


From 9dd059b7c37796353782c330f73886f931d2b1bc Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Sat, 3 Dec 2016 00:16:15 +0000
Subject: Daily bump.

From-SVN: r243214
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index b720a20..8caabe1 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20161202
+20161203
-- 
cgit v1.1


From 25207f51c498deae4bf57a7ea87fe429cefee711 Mon Sep 17 00:00:00 2001
From: Michael Meissner <meissner@linux.vnet.ibm.com>
Date: Sat, 3 Dec 2016 00:41:44 +0000
Subject: config.gcc (powerpc*-*-linux*): Set gnu-indirect-function by default
 on PowerPC linux systems.

2016-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config.gcc (powerpc*-*-linux*): Set gnu-indirect-function by
	default on PowerPC linux systems.

From-SVN: r243215
---
 gcc/ChangeLog  | 5 +++++
 gcc/config.gcc | 8 ++++++++
 2 files changed, 13 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 0d57fe3..d651cbd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config.gcc (powerpc*-*-linux*): Set gnu-indirect-function by
+	default on PowerPC linux systems.
+
 2016-12-02  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	* simplify-rtx.c (simplify_truncation): M2 is not mode, it is
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 1fa34ac..189073e 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2443,6 +2443,14 @@ powerpc*-*-linux*)
 	if test x${enable_secureplt} = xyes; then
 		tm_file="rs6000/secureplt.h ${tm_file}"
 	fi
+	# Assume modern glibc if not targeting Android nor uclibc.
+	case ${target} in
+	    *-*-*android*|*-*-*uclibc*|*-*-*musl*)
+		    ;;
+	    *)
+		default_gnu_indirect_function=yes
+		    ;;
+	esac
 	;;
 powerpc-wrs-vxworks|powerpc-wrs-vxworksae|powerpc-wrs-vxworksmils)
 	tm_file="${tm_file} elfos.h freebsd-spec.h rs6000/sysv4.h"
-- 
cgit v1.1


From 57c9def7d08e686b794ec64d9df15cd9ccf8d174 Mon Sep 17 00:00:00 2001
From: Jeff Law <law@redhat.com>
Date: Fri, 2 Dec 2016 19:02:51 -0700
Subject: arm.c (arm_handle_cmse_nonsecure_call): Remove unused variable
 main_variant.

	* config/arm/arm.c (arm_handle_cmse_nonsecure_call): Remove unused
	variable main_variant.

From-SVN: r243216
---
 gcc/ChangeLog        | 5 +++++
 gcc/config/arm/arm.c | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d651cbd..96ae900 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-01  Jeff Law  <law@redhat.com>
+
+	* config/arm/arm.c (arm_handle_cmse_nonsecure_call): Remove unused
+	variable main_variant.
+
 2016-12-02  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	* config.gcc (powerpc*-*-linux*): Set gnu-indirect-function by
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index f1df3a0..ec1f5fc 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -6774,7 +6774,7 @@ arm_handle_cmse_nonsecure_call (tree *node, tree name,
 				 bool *no_add_attrs)
 {
   tree decl = NULL_TREE, fntype = NULL_TREE;
-  tree main_variant, type;
+  tree type;
 
   if (!use_cmse)
     {
-- 
cgit v1.1


From 98ff2d6b12106fa5c7e7a3249089ace171075fe3 Mon Sep 17 00:00:00 2001
From: "Steven G. Kargl" <kargl@gcc.gnu.org>
Date: Sat, 3 Dec 2016 07:23:13 +0000
Subject: expr.c (gfc_build_conversion): Remove unneeded initialization.

2016-12-02   Steven G. Kargl  <kargl@gcc.gnu.org>

	* expr.c (gfc_build_conversion): Remove unneeded initialization.

From-SVN: r243217
---
 gcc/fortran/expr.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gcc/fortran/expr.c b/gcc/fortran/expr.c
index c01418d..3464a20 100644
--- a/gcc/fortran/expr.c
+++ b/gcc/fortran/expr.c
@@ -795,8 +795,6 @@ gfc_build_conversion (gfc_expr *e)
   p = gfc_get_expr ();
   p->expr_type = EXPR_FUNCTION;
   p->symtree = NULL;
-  p->value.function.actual = NULL;
-
   p->value.function.actual = gfc_get_actual_arglist ();
   p->value.function.actual->expr = e;
 
-- 
cgit v1.1


From 802583a210c22cdbabb63d660633af09f0039a32 Mon Sep 17 00:00:00 2001
From: Janus Weil <janus@gcc.gnu.org>
Date: Sat, 3 Dec 2016 10:32:27 +0100
Subject: re PR fortran/58175 ([OOP] Incorrect warning message on scalar
 finalizer)

2016-12-03  Janus Weil  <janus@gcc.gnu.org>

	PR fortran/58175
	* resolve.c (gfc_resolve_finalizers): Prevent bogus warning.

2016-12-03  Janus Weil  <janus@gcc.gnu.org>

	PR fortran/58175
	* gfortran.dg/finalize_30.f90: Extend test case.

From-SVN: r243218
---
 gcc/fortran/ChangeLog                     | 5 +++++
 gcc/fortran/resolve.c                     | 2 +-
 gcc/testsuite/ChangeLog                   | 5 +++++
 gcc/testsuite/gfortran.dg/finalize_30.f90 | 2 ++
 4 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 68d0559..7a007c3 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-03  Janus Weil  <janus@gcc.gnu.org>
+
+	PR fortran/58175
+	* resolve.c (gfc_resolve_finalizers): Prevent bogus warning.
+
 2016-12-02  Steven G. Kargl  <kargl@gcc.gnu.org>
 
 	* simplify.c (gfc_convert_char_constant): Free result on error.
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index 152678f..7bc9f5f 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -12517,7 +12517,7 @@ error:
   /* Warn if we haven't seen a scalar finalizer procedure (but we know there
      were nodes in the list, must have been for arrays.  It is surely a good
      idea to have a scalar version there if there's something to finalize.  */
-  if (warn_surprising && result && !seen_scalar)
+  if (warn_surprising && derived->f2k_derived->finalizers && !seen_scalar)
     gfc_warning (OPT_Wsurprising,
 		 "Only array FINAL procedures declared for derived type %qs"
 		 " defined at %L, suggest also scalar one",
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 103906b..39a5c59 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-03  Janus Weil  <janus@gcc.gnu.org>
+
+	PR fortran/58175
+	* gfortran.dg/finalize_30.f90: Extend test case.
+
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/78649
diff --git a/gcc/testsuite/gfortran.dg/finalize_30.f90 b/gcc/testsuite/gfortran.dg/finalize_30.f90
index 281bfaa..b93a3d5 100644
--- a/gcc/testsuite/gfortran.dg/finalize_30.f90
+++ b/gcc/testsuite/gfortran.dg/finalize_30.f90
@@ -10,6 +10,8 @@ module ct
    contains
      final :: aD
   end type
+  type, extends(a) :: a1
+  end type
 contains
   subroutine aD(self)
     type(a), intent(inout) :: self
-- 
cgit v1.1


From 31cfd832864298ea34c52625312e2a0ed0478e3d Mon Sep 17 00:00:00 2001
From: Thomas Koenig <tkoenig@gcc.gnu.org>
Date: Sat, 3 Dec 2016 09:44:35 +0000
Subject: re PR libfortran/78379 (Processor-specific versions for matmul)

2016-12-03  Thomas Koenig  <tkoenig@gcc.gnu.org>

        PR fortran/78379
        * config/i386/cpuinfo.c:  Move denums for processor vendors,
        processor type, processor subtypes and declaration of
        struct __processor_model into
        * config/i386/cpuinfo.h:  New header file.
        * Makefile.am:  Add dependence of m4/matmul_internal_m4 to
        mamtul files..
        * Makefile.in:  Regenerated.
        * acinclude.m4:  Check for AVX, AVX2 and AVX512F.
        * config.h.in:  Add HAVE_AVX, HAVE_AVX2 and HAVE_AVX512F.
        * configure:  Regenerated.
        * configure.ac:  Use checks for AVX, AVX2 and AVX_512F.
        * m4/matmul_internal.m4:  New file. working part of matmul.m4.
        * m4/matmul.m4:  Implement architecture-specific switching
        for AVX, AVX2 and AVX512F by including matmul_internal.m4
        multiple times.
        * generated/matmul_c10.c: Regenerated.
        * generated/matmul_c16.c: Regenerated.
        * generated/matmul_c4.c: Regenerated.
        * generated/matmul_c8.c: Regenerated.
        * generated/matmul_i1.c: Regenerated.
        * generated/matmul_i16.c: Regenerated.
        * generated/matmul_i2.c: Regenerated.
        * generated/matmul_i4.c: Regenerated.
        * generated/matmul_i8.c: Regenerated.
        * generated/matmul_r10.c: Regenerated.
        * generated/matmul_r16.c: Regenerated.
        * generated/matmul_r4.c: Regenerated.
        * generated/matmul_r8.c: Regenerated.

From-SVN: r243219
---
 libgcc/ChangeLog                   |    8 +
 libgcc/config/i386/cpuinfo.c       |   92 +-
 libgcc/config/i386/cpuinfo.h       |  116 ++
 libgfortran/ChangeLog              |   28 +
 libgfortran/Makefile.am            |    2 +-
 libgfortran/Makefile.in            |    2 +-
 libgfortran/acinclude.m4           |   51 +
 libgfortran/config.h.in            |    9 +
 libgfortran/configure              |   87 ++
 libgfortran/configure.ac           |    9 +
 libgfortran/generated/matmul_c10.c | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_c16.c | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_c4.c  | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_c8.c  | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_i1.c  | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_i16.c | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_i2.c  | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_i4.c  | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_i8.c  | 2233 +++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_r10.c | 2237 ++++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_r16.c | 2237 ++++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_r4.c  | 2237 ++++++++++++++++++++++++++++++++++++
 libgfortran/generated/matmul_r8.c  | 2237 ++++++++++++++++++++++++++++++++++++
 libgfortran/m4/matmul.m4           |  596 ++--------
 libgfortran/m4/matmul_internal.m4  |  537 +++++++++
 25 files changed, 29976 insertions(+), 606 deletions(-)
 create mode 100644 libgcc/config/i386/cpuinfo.h
 create mode 100644 libgfortran/m4/matmul_internal.m4

diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 6627d26..efadedf 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-03  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+	PR fortran/78379
+	* config/i386/cpuinfo.c:  Move denums for processor vendors,
+	processor type, processor subtypes and declaration of
+	struct __processor_model into
+	* config/i386/cpuinfo.h:  New header file.
+
 2016-12-02  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 	    Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
diff --git a/libgcc/config/i386/cpuinfo.c b/libgcc/config/i386/cpuinfo.c
index 4a0ad25..9f30cb8 100644
--- a/libgcc/config/i386/cpuinfo.c
+++ b/libgcc/config/i386/cpuinfo.c
@@ -26,6 +26,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #include "cpuid.h"
 #include "tsystem.h"
 #include "auto-target.h"
+#include "cpuinfo.h"
 
 #ifdef HAVE_INIT_PRIORITY
 #define CONSTRUCTOR_PRIORITY (101)
@@ -36,97 +37,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 int __cpu_indicator_init (void)
   __attribute__ ((constructor CONSTRUCTOR_PRIORITY));
 
-/* Processor Vendor and Models. */
 
-enum processor_vendor
-{
-  VENDOR_INTEL = 1,
-  VENDOR_AMD,
-  VENDOR_OTHER,
-  VENDOR_MAX
-};
-
-/* Any new types or subtypes have to be inserted at the end. */
-
-enum processor_types
-{
-  INTEL_BONNELL = 1,
-  INTEL_CORE2,
-  INTEL_COREI7,
-  AMDFAM10H,
-  AMDFAM15H,
-  INTEL_SILVERMONT,
-  INTEL_KNL,
-  AMD_BTVER1,
-  AMD_BTVER2,  
-  AMDFAM17H,
-  CPU_TYPE_MAX
-};
-
-enum processor_subtypes
-{
-  INTEL_COREI7_NEHALEM = 1,
-  INTEL_COREI7_WESTMERE,
-  INTEL_COREI7_SANDYBRIDGE,
-  AMDFAM10H_BARCELONA,
-  AMDFAM10H_SHANGHAI,
-  AMDFAM10H_ISTANBUL,
-  AMDFAM15H_BDVER1,
-  AMDFAM15H_BDVER2,
-  AMDFAM15H_BDVER3,
-  AMDFAM15H_BDVER4,
-  AMDFAM17H_ZNVER1,
-  INTEL_COREI7_IVYBRIDGE,
-  INTEL_COREI7_HASWELL,
-  INTEL_COREI7_BROADWELL,
-  INTEL_COREI7_SKYLAKE,
-  INTEL_COREI7_SKYLAKE_AVX512,
-  CPU_SUBTYPE_MAX
-};
-
-/* ISA Features supported. New features have to be inserted at the end.  */
-
-enum processor_features
-{
-  FEATURE_CMOV = 0,
-  FEATURE_MMX,
-  FEATURE_POPCNT,
-  FEATURE_SSE,
-  FEATURE_SSE2,
-  FEATURE_SSE3,
-  FEATURE_SSSE3,
-  FEATURE_SSE4_1,
-  FEATURE_SSE4_2,
-  FEATURE_AVX,
-  FEATURE_AVX2,
-  FEATURE_SSE4_A,
-  FEATURE_FMA4,
-  FEATURE_XOP,
-  FEATURE_FMA,
-  FEATURE_AVX512F,
-  FEATURE_BMI,
-  FEATURE_BMI2,
-  FEATURE_AES,
-  FEATURE_PCLMUL,
-  FEATURE_AVX512VL,
-  FEATURE_AVX512BW,
-  FEATURE_AVX512DQ,
-  FEATURE_AVX512CD,
-  FEATURE_AVX512ER,
-  FEATURE_AVX512PF,
-  FEATURE_AVX512VBMI,
-  FEATURE_AVX512IFMA,
-  FEATURE_AVX5124VNNIW,
-  FEATURE_AVX5124FMAPS
-};
-
-struct __processor_model
-{
-  unsigned int __cpu_vendor;
-  unsigned int __cpu_type;
-  unsigned int __cpu_subtype;
-  unsigned int __cpu_features[1];
-} __cpu_model = { };
+struct __processor_model __cpu_model = { };
 
 
 /* Get the specific type of AMD CPU.  */
diff --git a/libgcc/config/i386/cpuinfo.h b/libgcc/config/i386/cpuinfo.h
new file mode 100644
index 0000000..cf848e6
--- /dev/null
+++ b/libgcc/config/i386/cpuinfo.h
@@ -0,0 +1,116 @@
+/* Get CPU type and Features for x86 processors.
+   Copyright (C) 2012-2016 Free Software Foundation, Inc.
+   Contributed by Sriraman Tallam (tmsriram@google.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Processor Vendor and Models. */
+
+enum processor_vendor
+{
+  VENDOR_INTEL = 1,
+  VENDOR_AMD,
+  VENDOR_OTHER,
+  VENDOR_MAX
+};
+
+/* Any new types or subtypes have to be inserted at the end. */
+
+enum processor_types
+{
+  INTEL_BONNELL = 1,
+  INTEL_CORE2,
+  INTEL_COREI7,
+  AMDFAM10H,
+  AMDFAM15H,
+  INTEL_SILVERMONT,
+  INTEL_KNL,
+  AMD_BTVER1,
+  AMD_BTVER2,  
+  AMDFAM17H,
+  CPU_TYPE_MAX
+};
+
+enum processor_subtypes
+{
+  INTEL_COREI7_NEHALEM = 1,
+  INTEL_COREI7_WESTMERE,
+  INTEL_COREI7_SANDYBRIDGE,
+  AMDFAM10H_BARCELONA,
+  AMDFAM10H_SHANGHAI,
+  AMDFAM10H_ISTANBUL,
+  AMDFAM15H_BDVER1,
+  AMDFAM15H_BDVER2,
+  AMDFAM15H_BDVER3,
+  AMDFAM15H_BDVER4,
+  AMDFAM17H_ZNVER1,
+  INTEL_COREI7_IVYBRIDGE,
+  INTEL_COREI7_HASWELL,
+  INTEL_COREI7_BROADWELL,
+  INTEL_COREI7_SKYLAKE,
+  INTEL_COREI7_SKYLAKE_AVX512,
+  CPU_SUBTYPE_MAX
+};
+
+/* ISA Features supported. New features have to be inserted at the end.  */
+
+enum processor_features
+{
+  FEATURE_CMOV = 0,
+  FEATURE_MMX,
+  FEATURE_POPCNT,
+  FEATURE_SSE,
+  FEATURE_SSE2,
+  FEATURE_SSE3,
+  FEATURE_SSSE3,
+  FEATURE_SSE4_1,
+  FEATURE_SSE4_2,
+  FEATURE_AVX,
+  FEATURE_AVX2,
+  FEATURE_SSE4_A,
+  FEATURE_FMA4,
+  FEATURE_XOP,
+  FEATURE_FMA,
+  FEATURE_AVX512F,
+  FEATURE_BMI,
+  FEATURE_BMI2,
+  FEATURE_AES,
+  FEATURE_PCLMUL,
+  FEATURE_AVX512VL,
+  FEATURE_AVX512BW,
+  FEATURE_AVX512DQ,
+  FEATURE_AVX512CD,
+  FEATURE_AVX512ER,
+  FEATURE_AVX512PF,
+  FEATURE_AVX512VBMI,
+  FEATURE_AVX512IFMA,
+  FEATURE_AVX5124VNNIW,
+  FEATURE_AVX5124FMAPS
+};
+
+extern struct __processor_model
+{
+  unsigned int __cpu_vendor;
+  unsigned int __cpu_type;
+  unsigned int __cpu_subtype;
+  unsigned int __cpu_features[1];
+} __cpu_model;
diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog
index d3966f5..03ff063 100644
--- a/libgfortran/ChangeLog
+++ b/libgfortran/ChangeLog
@@ -1,3 +1,31 @@
+2016-12-03  Thomas Koenig  <tkoenig@gcc.gnu.org>
+
+	PR fortran/78379
+	* Makefile.am:  Add dependence of m4/matmul_internal_m4 to
+	mamtul files..
+	* Makefile.in:  Regenerated.
+	* acinclude.m4:  Check for AVX, AVX2 and AVX512F.
+	* config.h.in:  Add HAVE_AVX, HAVE_AVX2 and HAVE_AVX512F.
+	* configure:  Regenerated.
+	* configure.ac:  Use checks for AVX, AVX2 and AVX_512F.
+	* m4/matmul_internal.m4:  New file. working part of matmul.m4.
+	* m4/matmul.m4:  Implement architecture-specific switching
+	for AVX, AVX2 and AVX512F by including matmul_internal.m4
+	multiple times.
+	* generated/matmul_c10.c: Regenerated.
+        * generated/matmul_c16.c: Regenerated.
+        * generated/matmul_c4.c: Regenerated.
+        * generated/matmul_c8.c: Regenerated.
+        * generated/matmul_i1.c: Regenerated.
+        * generated/matmul_i16.c: Regenerated.
+        * generated/matmul_i2.c: Regenerated.
+        * generated/matmul_i4.c: Regenerated.
+        * generated/matmul_i8.c: Regenerated.
+        * generated/matmul_r10.c: Regenerated.
+        * generated/matmul_r16.c: Regenerated.
+        * generated/matmul_r4.c: Regenerated.
+        * generated/matmul_r8.c: Regenerated.
+
 2016-11-30  Andre Vehreschild  <vehre@gcc.gnu.org>
 
 	* caf/single.c (_gfortran_caf_get_by_ref): Prevent compile time
diff --git a/libgfortran/Makefile.am b/libgfortran/Makefile.am
index 3db52b8..6137d88 100644
--- a/libgfortran/Makefile.am
+++ b/libgfortran/Makefile.am
@@ -987,7 +987,7 @@ $(i_product_c): m4/product.m4 $(I_M4_DEPS1)
 $(i_sum_c): m4/sum.m4 $(I_M4_DEPS1)
 	$(M4) -Dfile=$@ -I$(srcdir)/m4 sum.m4 > $@
 
-$(i_matmul_c): m4/matmul.m4 $(I_M4_DEPS)
+$(i_matmul_c): m4/matmul.m4 m4/matmul_internal.m4 $(I_M4_DEPS)
 	$(M4) -Dfile=$@ -I$(srcdir)/m4 matmul.m4 > $@
 
 $(i_matmull_c): m4/matmull.m4 $(I_M4_DEPS)
diff --git a/libgfortran/Makefile.in b/libgfortran/Makefile.in
index f7b34b9..4d95723 100644
--- a/libgfortran/Makefile.in
+++ b/libgfortran/Makefile.in
@@ -6053,7 +6053,7 @@ fpu-target.inc: fpu-target.h $(srcdir)/libgfortran.h
 @MAINTAINER_MODE_TRUE@$(i_sum_c): m4/sum.m4 $(I_M4_DEPS1)
 @MAINTAINER_MODE_TRUE@	$(M4) -Dfile=$@ -I$(srcdir)/m4 sum.m4 > $@
 
-@MAINTAINER_MODE_TRUE@$(i_matmul_c): m4/matmul.m4 $(I_M4_DEPS)
+@MAINTAINER_MODE_TRUE@$(i_matmul_c): m4/matmul.m4 m4/matmul_internal.m4 $(I_M4_DEPS)
 @MAINTAINER_MODE_TRUE@	$(M4) -Dfile=$@ -I$(srcdir)/m4 matmul.m4 > $@
 
 @MAINTAINER_MODE_TRUE@$(i_matmull_c): m4/matmull.m4 $(I_M4_DEPS)
diff --git a/libgfortran/acinclude.m4 b/libgfortran/acinclude.m4
index 7280bc3..9a7f461 100644
--- a/libgfortran/acinclude.m4
+++ b/libgfortran/acinclude.m4
@@ -393,3 +393,54 @@ AC_DEFUN([LIBGFOR_CHECK_STRERROR_R], [
 		  [Define if strerror_r takes two arguments and is available in <string.h>.]),)
   CFLAGS="$ac_save_CFLAGS"
 ])
+
+dnl Check for AVX
+
+AC_DEFUN([LIBGFOR_CHECK_AVX], [
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-O2 -mavx"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+  void _mm256_zeroall (void)
+        {
+           __builtin_ia32_vzeroall ();
+        }]], [[]])],
+	AC_DEFINE(HAVE_AVX, 1,
+	[Define if AVX instructions can be compiled.]),
+	[])
+  CFLAGS="$ac_save_CFLAGS"
+])
+
+dnl Check for AVX2
+
+AC_DEFUN([LIBGFOR_CHECK_AVX2], [
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-O2 -mavx2"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+  typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+	__v4di
+	mm256_is32_andnotsi256  (__v4di __X, __v4di __Y)
+        {
+	   return __builtin_ia32_andnotsi256 (__X, __Y);
+        }]], [[]])],
+	AC_DEFINE(HAVE_AVX2, 1,
+	[Define if AVX2 instructions can be compiled.]),
+	[])
+  CFLAGS="$ac_save_CFLAGS"
+])
+
+dnl Check for AVX512f
+
+AC_DEFUN([LIBGFOR_CHECK_AVX512F], [
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-O2 -mavx512f"
+  AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
+	typedef double __m512d __attribute__ ((__vector_size__ (64)));
+	__m512d _mm512_add (__m512d a)
+	{
+	  return __builtin_ia32_addpd512_mask (a, a, a, 1, 4);
+        }]], [[]])],
+	AC_DEFINE(HAVE_AVX512F, 1,
+	[Define if AVX512f instructions can be compiled.]),
+	[])
+  CFLAGS="$ac_save_CFLAGS"
+])
diff --git a/libgfortran/config.h.in b/libgfortran/config.h.in
index 22449e6..b762d099 100644
--- a/libgfortran/config.h.in
+++ b/libgfortran/config.h.in
@@ -78,6 +78,15 @@
 /* Define to 1 if the target supports __attribute__((visibility(...))). */
 #undef HAVE_ATTRIBUTE_VISIBILITY
 
+/* Define if AVX instructions can be compiled. */
+#undef HAVE_AVX
+
+/* Define if AVX2 instructions can be compiled. */
+#undef HAVE_AVX2
+
+/* Define if AVX512f instructions can be compiled. */
+#undef HAVE_AVX512F
+
 /* Define to 1 if you have the `cabs' function. */
 #undef HAVE_CABS
 
diff --git a/libgfortran/configure b/libgfortran/configure
index c052027..45ef935 100755
--- a/libgfortran/configure
+++ b/libgfortran/configure
@@ -26174,6 +26174,93 @@ $as_echo "#define HAVE_CRLF 1" >>confdefs.h
 
 fi
 
+# Check whether we support AVX extensions
+
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-O2 -mavx"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+  void _mm256_zeroall (void)
+        {
+           __builtin_ia32_vzeroall ();
+        }
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+$as_echo "#define HAVE_AVX 1" >>confdefs.h
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CFLAGS="$ac_save_CFLAGS"
+
+
+# Check wether we support AVX2 extensions
+
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-O2 -mavx2"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+  typedef long long __v4di __attribute__ ((__vector_size__ (32)));
+	__v4di
+	mm256_is32_andnotsi256  (__v4di __X, __v4di __Y)
+        {
+	   return __builtin_ia32_andnotsi256 (__X, __Y);
+        }
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+$as_echo "#define HAVE_AVX2 1" >>confdefs.h
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CFLAGS="$ac_save_CFLAGS"
+
+
+# Check wether we support AVX512f extensions
+
+  ac_save_CFLAGS="$CFLAGS"
+  CFLAGS="-O2 -mavx512f"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+	typedef double __m512d __attribute__ ((__vector_size__ (64)));
+	__m512d _mm512_add (__m512d a)
+	{
+	  return __builtin_ia32_addpd512_mask (a, a, a, 1, 4);
+        }
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+$as_echo "#define HAVE_AVX512F 1" >>confdefs.h
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+  CFLAGS="$ac_save_CFLAGS"
+
+
 cat >confcache <<\_ACEOF
 # This file is a shell script that caches the results of configure
 # tests run on this system so they can be shared between configure
diff --git a/libgfortran/configure.ac b/libgfortran/configure.ac
index 3de737d..bb84baf 100644
--- a/libgfortran/configure.ac
+++ b/libgfortran/configure.ac
@@ -609,6 +609,15 @@ LIBGFOR_CHECK_UNLINK_OPEN_FILE
 # Check whether line terminator is LF or CRLF
 LIBGFOR_CHECK_CRLF
 
+# Check whether we support AVX extensions
+LIBGFOR_CHECK_AVX
+
+# Check wether we support AVX2 extensions
+LIBGFOR_CHECK_AVX2
+
+# Check wether we support AVX512f extensions
+LIBGFOR_CHECK_AVX512F
+
 AC_CACHE_SAVE
 
 if test ${multilib} = yes; then
diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c
index c784a26..bf40e37 100644
--- a/libgfortran/generated/matmul_c10.c
+++ b/libgfortran/generated/matmul_c10.c
@@ -75,6 +75,2233 @@ extern void matmul_c10 (gfc_array_c10 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_c10);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_c10_avx (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_c10_avx (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_10 * restrict abase;
+  const GFC_COMPLEX_10 * restrict bbase;
+  GFC_COMPLEX_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_10 *a, *b;
+      GFC_COMPLEX_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_10 *restrict abase_x;
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 *restrict dest_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_10 *restrict abase_x;
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 *restrict dest_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_c10_avx2 (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_10 * restrict abase;
+  const GFC_COMPLEX_10 * restrict bbase;
+  GFC_COMPLEX_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_10 *a, *b;
+      GFC_COMPLEX_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_10 *restrict abase_x;
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 *restrict dest_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_10 *restrict abase_x;
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 *restrict dest_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_c10_avx512f (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_10 * restrict abase;
+  const GFC_COMPLEX_10 * restrict bbase;
+  GFC_COMPLEX_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_10 *a, *b;
+      GFC_COMPLEX_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_10 *restrict abase_x;
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 *restrict dest_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_10 *restrict abase_x;
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 *restrict dest_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_c10_vanilla (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_10 * restrict abase;
+  const GFC_COMPLEX_10 * restrict bbase;
+  GFC_COMPLEX_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_10 *a, *b;
+      GFC_COMPLEX_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_10 *restrict abase_x;
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 *restrict dest_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_10 *restrict bbase_y;
+	  GFC_COMPLEX_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_10 *restrict abase_x;
+      const GFC_COMPLEX_10 *restrict bbase_y;
+      GFC_COMPLEX_10 *restrict dest_y;
+      GFC_COMPLEX_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_c10 (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_c10 * const restrict retarray, 
+	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_c10_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_c10_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_c10_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_c10_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_c10 (gfc_array_c10 * const restrict retarray, 
 	gfc_array_c10 * const restrict a, gfc_array_c10 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c
index 47e1bea..6e4545d 100644
--- a/libgfortran/generated/matmul_c16.c
+++ b/libgfortran/generated/matmul_c16.c
@@ -75,6 +75,2233 @@ extern void matmul_c16 (gfc_array_c16 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_c16);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_c16_avx (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_c16_avx (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_16 * restrict abase;
+  const GFC_COMPLEX_16 * restrict bbase;
+  GFC_COMPLEX_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_16 *a, *b;
+      GFC_COMPLEX_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_16 *restrict abase_x;
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 *restrict dest_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_16 *restrict abase_x;
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 *restrict dest_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_c16_avx2 (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_16 * restrict abase;
+  const GFC_COMPLEX_16 * restrict bbase;
+  GFC_COMPLEX_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_16 *a, *b;
+      GFC_COMPLEX_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_16 *restrict abase_x;
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 *restrict dest_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_16 *restrict abase_x;
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 *restrict dest_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_c16_avx512f (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_16 * restrict abase;
+  const GFC_COMPLEX_16 * restrict bbase;
+  GFC_COMPLEX_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_16 *a, *b;
+      GFC_COMPLEX_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_16 *restrict abase_x;
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 *restrict dest_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_16 *restrict abase_x;
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 *restrict dest_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_c16_vanilla (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_16 * restrict abase;
+  const GFC_COMPLEX_16 * restrict bbase;
+  GFC_COMPLEX_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_16 *a, *b;
+      GFC_COMPLEX_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_16 *restrict abase_x;
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 *restrict dest_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_16 *restrict bbase_y;
+	  GFC_COMPLEX_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_16 *restrict abase_x;
+      const GFC_COMPLEX_16 *restrict bbase_y;
+      GFC_COMPLEX_16 *restrict dest_y;
+      GFC_COMPLEX_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_c16 (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_c16 * const restrict retarray, 
+	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_c16_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_c16_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_c16_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_c16_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_c16 (gfc_array_c16 * const restrict retarray, 
 	gfc_array_c16 * const restrict a, gfc_array_c16 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c
index 4eb1896..6f7d5c2 100644
--- a/libgfortran/generated/matmul_c4.c
+++ b/libgfortran/generated/matmul_c4.c
@@ -75,6 +75,2233 @@ extern void matmul_c4 (gfc_array_c4 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_c4);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_c4_avx (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_c4_avx (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_4 * restrict abase;
+  const GFC_COMPLEX_4 * restrict bbase;
+  GFC_COMPLEX_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_4 *a, *b;
+      GFC_COMPLEX_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_4 *restrict abase_x;
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 *restrict dest_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_4 *restrict abase_x;
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 *restrict dest_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_c4_avx2 (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_4 * restrict abase;
+  const GFC_COMPLEX_4 * restrict bbase;
+  GFC_COMPLEX_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_4 *a, *b;
+      GFC_COMPLEX_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_4 *restrict abase_x;
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 *restrict dest_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_4 *restrict abase_x;
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 *restrict dest_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_c4_avx512f (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_4 * restrict abase;
+  const GFC_COMPLEX_4 * restrict bbase;
+  GFC_COMPLEX_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_4 *a, *b;
+      GFC_COMPLEX_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_4 *restrict abase_x;
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 *restrict dest_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_4 *restrict abase_x;
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 *restrict dest_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_c4_vanilla (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_4 * restrict abase;
+  const GFC_COMPLEX_4 * restrict bbase;
+  GFC_COMPLEX_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_4 *a, *b;
+      GFC_COMPLEX_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_4 *restrict abase_x;
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 *restrict dest_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_4 *restrict bbase_y;
+	  GFC_COMPLEX_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_4 *restrict abase_x;
+      const GFC_COMPLEX_4 *restrict bbase_y;
+      GFC_COMPLEX_4 *restrict dest_y;
+      GFC_COMPLEX_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_c4 (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_c4 * const restrict retarray, 
+	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_c4_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_c4_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_c4_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_c4_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_c4 (gfc_array_c4 * const restrict retarray, 
 	gfc_array_c4 * const restrict a, gfc_array_c4 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c
index 2321b9e..06916c3 100644
--- a/libgfortran/generated/matmul_c8.c
+++ b/libgfortran/generated/matmul_c8.c
@@ -75,6 +75,2233 @@ extern void matmul_c8 (gfc_array_c8 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_c8);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_c8_avx (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_c8_avx (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_8 * restrict abase;
+  const GFC_COMPLEX_8 * restrict bbase;
+  GFC_COMPLEX_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_8 *a, *b;
+      GFC_COMPLEX_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_8 *restrict abase_x;
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 *restrict dest_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_8 *restrict abase_x;
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 *restrict dest_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_c8_avx2 (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_8 * restrict abase;
+  const GFC_COMPLEX_8 * restrict bbase;
+  GFC_COMPLEX_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_8 *a, *b;
+      GFC_COMPLEX_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_8 *restrict abase_x;
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 *restrict dest_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_8 *restrict abase_x;
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 *restrict dest_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_c8_avx512f (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_8 * restrict abase;
+  const GFC_COMPLEX_8 * restrict bbase;
+  GFC_COMPLEX_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_8 *a, *b;
+      GFC_COMPLEX_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_8 *restrict abase_x;
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 *restrict dest_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_8 *restrict abase_x;
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 *restrict dest_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_c8_vanilla (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_COMPLEX_8 * restrict abase;
+  const GFC_COMPLEX_8 * restrict bbase;
+  GFC_COMPLEX_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_COMPLEX_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_COMPLEX_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_COMPLEX_8 *a, *b;
+      GFC_COMPLEX_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_COMPLEX_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_COMPLEX_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_COMPLEX_8 *restrict abase_x;
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 *restrict dest_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_COMPLEX_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_COMPLEX_8 *restrict bbase_y;
+	  GFC_COMPLEX_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_COMPLEX_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_COMPLEX_8 *restrict abase_x;
+      const GFC_COMPLEX_8 *restrict bbase_y;
+      GFC_COMPLEX_8 *restrict dest_y;
+      GFC_COMPLEX_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_COMPLEX_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_c8 (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_c8 * const restrict retarray, 
+	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_c8_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_c8_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_c8_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_c8_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_c8 (gfc_array_c8 * const restrict retarray, 
 	gfc_array_c8 * const restrict a, gfc_array_c8 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c
index 81c067b..2cce9d1 100644
--- a/libgfortran/generated/matmul_i1.c
+++ b/libgfortran/generated/matmul_i1.c
@@ -75,6 +75,2233 @@ extern void matmul_i1 (gfc_array_i1 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_i1);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_i1_avx (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_i1_avx (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_1 * restrict abase;
+  const GFC_INTEGER_1 * restrict bbase;
+  GFC_INTEGER_1 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_1 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_1 *a, *b;
+      GFC_INTEGER_1 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_1 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_1 *restrict abase_x;
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 *restrict dest_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_1) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_1) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_1 *restrict abase_x;
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 *restrict dest_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_i1_avx2 (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_1 * restrict abase;
+  const GFC_INTEGER_1 * restrict bbase;
+  GFC_INTEGER_1 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_1 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_1 *a, *b;
+      GFC_INTEGER_1 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_1 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_1 *restrict abase_x;
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 *restrict dest_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_1) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_1) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_1 *restrict abase_x;
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 *restrict dest_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_i1_avx512f (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_1 * restrict abase;
+  const GFC_INTEGER_1 * restrict bbase;
+  GFC_INTEGER_1 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_1 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_1 *a, *b;
+      GFC_INTEGER_1 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_1 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_1 *restrict abase_x;
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 *restrict dest_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_1) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_1) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_1 *restrict abase_x;
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 *restrict dest_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_i1_vanilla (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_1 * restrict abase;
+  const GFC_INTEGER_1 * restrict bbase;
+  GFC_INTEGER_1 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_1));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_1 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_1 *a, *b;
+      GFC_INTEGER_1 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_1 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_1)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_1 *restrict abase_x;
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 *restrict dest_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_1) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_1 *restrict bbase_y;
+	  GFC_INTEGER_1 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_1) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_1 *restrict abase_x;
+      const GFC_INTEGER_1 *restrict bbase_y;
+      GFC_INTEGER_1 *restrict dest_y;
+      GFC_INTEGER_1 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_1) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_i1 (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_i1 * const restrict retarray, 
+	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_i1_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_i1_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_i1_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_i1_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_i1 (gfc_array_i1 * const restrict retarray, 
 	gfc_array_i1 * const restrict a, gfc_array_i1 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c
index d1b1761..76a605f 100644
--- a/libgfortran/generated/matmul_i16.c
+++ b/libgfortran/generated/matmul_i16.c
@@ -75,6 +75,2233 @@ extern void matmul_i16 (gfc_array_i16 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_i16);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_i16_avx (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_i16_avx (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_16 * restrict abase;
+  const GFC_INTEGER_16 * restrict bbase;
+  GFC_INTEGER_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_16 *a, *b;
+      GFC_INTEGER_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_16 *restrict abase_x;
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 *restrict dest_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_16 *restrict abase_x;
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 *restrict dest_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_i16_avx2 (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_16 * restrict abase;
+  const GFC_INTEGER_16 * restrict bbase;
+  GFC_INTEGER_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_16 *a, *b;
+      GFC_INTEGER_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_16 *restrict abase_x;
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 *restrict dest_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_16 *restrict abase_x;
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 *restrict dest_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_i16_avx512f (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_16 * restrict abase;
+  const GFC_INTEGER_16 * restrict bbase;
+  GFC_INTEGER_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_16 *a, *b;
+      GFC_INTEGER_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_16 *restrict abase_x;
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 *restrict dest_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_16 *restrict abase_x;
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 *restrict dest_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_i16_vanilla (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_16 * restrict abase;
+  const GFC_INTEGER_16 * restrict bbase;
+  GFC_INTEGER_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_16 *a, *b;
+      GFC_INTEGER_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_16 *restrict abase_x;
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 *restrict dest_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_16 *restrict bbase_y;
+	  GFC_INTEGER_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_16 *restrict abase_x;
+      const GFC_INTEGER_16 *restrict bbase_y;
+      GFC_INTEGER_16 *restrict dest_y;
+      GFC_INTEGER_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_i16 (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_i16 * const restrict retarray, 
+	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_i16_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_i16_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_i16_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_i16_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_i16 (gfc_array_i16 * const restrict retarray, 
 	gfc_array_i16 * const restrict a, gfc_array_i16 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c
index 5a06fcc..324197a 100644
--- a/libgfortran/generated/matmul_i2.c
+++ b/libgfortran/generated/matmul_i2.c
@@ -75,6 +75,2233 @@ extern void matmul_i2 (gfc_array_i2 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_i2);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_i2_avx (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_i2_avx (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_2 * restrict abase;
+  const GFC_INTEGER_2 * restrict bbase;
+  GFC_INTEGER_2 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_2 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_2 *a, *b;
+      GFC_INTEGER_2 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_2 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_2 *restrict abase_x;
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 *restrict dest_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_2) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_2) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_2 *restrict abase_x;
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 *restrict dest_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_i2_avx2 (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_2 * restrict abase;
+  const GFC_INTEGER_2 * restrict bbase;
+  GFC_INTEGER_2 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_2 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_2 *a, *b;
+      GFC_INTEGER_2 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_2 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_2 *restrict abase_x;
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 *restrict dest_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_2) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_2) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_2 *restrict abase_x;
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 *restrict dest_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_i2_avx512f (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_2 * restrict abase;
+  const GFC_INTEGER_2 * restrict bbase;
+  GFC_INTEGER_2 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_2 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_2 *a, *b;
+      GFC_INTEGER_2 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_2 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_2 *restrict abase_x;
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 *restrict dest_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_2) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_2) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_2 *restrict abase_x;
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 *restrict dest_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_i2_vanilla (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_2 * restrict abase;
+  const GFC_INTEGER_2 * restrict bbase;
+  GFC_INTEGER_2 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_2));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_2 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_2 *a, *b;
+      GFC_INTEGER_2 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_2 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_2)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_2 *restrict abase_x;
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 *restrict dest_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_2) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_2 *restrict bbase_y;
+	  GFC_INTEGER_2 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_2) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_2 *restrict abase_x;
+      const GFC_INTEGER_2 *restrict bbase_y;
+      GFC_INTEGER_2 *restrict dest_y;
+      GFC_INTEGER_2 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_2) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_i2 (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_i2 * const restrict retarray, 
+	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_i2_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_i2_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_i2_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_i2_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_i2 (gfc_array_i2 * const restrict retarray, 
 	gfc_array_i2 * const restrict a, gfc_array_i2 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c
index aee8e4d..bd31c7c 100644
--- a/libgfortran/generated/matmul_i4.c
+++ b/libgfortran/generated/matmul_i4.c
@@ -75,6 +75,2233 @@ extern void matmul_i4 (gfc_array_i4 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_i4);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_i4_avx (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_i4_avx (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_4 * restrict abase;
+  const GFC_INTEGER_4 * restrict bbase;
+  GFC_INTEGER_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_4 *a, *b;
+      GFC_INTEGER_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_4 *restrict abase_x;
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 *restrict dest_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_4 *restrict abase_x;
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 *restrict dest_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_i4_avx2 (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_4 * restrict abase;
+  const GFC_INTEGER_4 * restrict bbase;
+  GFC_INTEGER_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_4 *a, *b;
+      GFC_INTEGER_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_4 *restrict abase_x;
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 *restrict dest_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_4 *restrict abase_x;
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 *restrict dest_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_i4_avx512f (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_4 * restrict abase;
+  const GFC_INTEGER_4 * restrict bbase;
+  GFC_INTEGER_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_4 *a, *b;
+      GFC_INTEGER_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_4 *restrict abase_x;
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 *restrict dest_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_4 *restrict abase_x;
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 *restrict dest_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_i4_vanilla (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_4 * restrict abase;
+  const GFC_INTEGER_4 * restrict bbase;
+  GFC_INTEGER_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_4 *a, *b;
+      GFC_INTEGER_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_4 *restrict abase_x;
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 *restrict dest_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_4 *restrict bbase_y;
+	  GFC_INTEGER_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_4 *restrict abase_x;
+      const GFC_INTEGER_4 *restrict bbase_y;
+      GFC_INTEGER_4 *restrict dest_y;
+      GFC_INTEGER_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_i4 (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_i4 * const restrict retarray, 
+	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_i4_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_i4_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_i4_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_i4_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_i4 (gfc_array_i4 * const restrict retarray, 
 	gfc_array_i4 * const restrict a, gfc_array_i4 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c
index 902b284..c4d0327 100644
--- a/libgfortran/generated/matmul_i8.c
+++ b/libgfortran/generated/matmul_i8.c
@@ -75,6 +75,2233 @@ extern void matmul_i8 (gfc_array_i8 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_i8);
 
+
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_i8_avx (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_i8_avx (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_8 * restrict abase;
+  const GFC_INTEGER_8 * restrict bbase;
+  GFC_INTEGER_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_8 *a, *b;
+      GFC_INTEGER_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_8 *restrict abase_x;
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 *restrict dest_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_8 *restrict abase_x;
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 *restrict dest_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_i8_avx2 (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_8 * restrict abase;
+  const GFC_INTEGER_8 * restrict bbase;
+  GFC_INTEGER_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_8 *a, *b;
+      GFC_INTEGER_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_8 *restrict abase_x;
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 *restrict dest_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_8 *restrict abase_x;
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 *restrict dest_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_i8_avx512f (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_8 * restrict abase;
+  const GFC_INTEGER_8 * restrict bbase;
+  GFC_INTEGER_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_8 *a, *b;
+      GFC_INTEGER_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_8 *restrict abase_x;
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 *restrict dest_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_8 *restrict abase_x;
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 *restrict dest_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_i8_vanilla (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_INTEGER_8 * restrict abase;
+  const GFC_INTEGER_8 * restrict bbase;
+  GFC_INTEGER_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_INTEGER_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_INTEGER_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_INTEGER_8 *a, *b;
+      GFC_INTEGER_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_INTEGER_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_INTEGER_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_INTEGER_8 *restrict abase_x;
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 *restrict dest_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_INTEGER_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_INTEGER_8 *restrict bbase_y;
+	  GFC_INTEGER_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_INTEGER_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_INTEGER_8 *restrict abase_x;
+      const GFC_INTEGER_8 *restrict bbase_y;
+      GFC_INTEGER_8 *restrict dest_y;
+      GFC_INTEGER_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_INTEGER_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_i8 (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_i8 * const restrict retarray, 
+	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_i8_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_i8_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_i8_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_i8_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_i8 (gfc_array_i8 * const restrict retarray, 
 	gfc_array_i8 * const restrict a, gfc_array_i8 * const restrict b, int try_blas,
@@ -607,4 +2834,10 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c
index 8bb1e62..b9fb10b 100644
--- a/libgfortran/generated/matmul_r10.c
+++ b/libgfortran/generated/matmul_r10.c
@@ -75,6 +75,2237 @@ extern void matmul_r10 (gfc_array_r10 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_r10);
 
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+/* REAL types generate identical code for AVX and AVX2.  Only generate
+   an AVX2 function if we are dealing with integer.  */
+#undef HAVE_AVX2
+#endif
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_r10_avx (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_r10_avx (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_10 * restrict abase;
+  const GFC_REAL_10 * restrict bbase;
+  GFC_REAL_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_10 *a, *b;
+      GFC_REAL_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_10 *restrict abase_x;
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 *restrict dest_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_10 *restrict abase_x;
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 *restrict dest_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_r10_avx2 (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_10 * restrict abase;
+  const GFC_REAL_10 * restrict bbase;
+  GFC_REAL_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_10 *a, *b;
+      GFC_REAL_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_10 *restrict abase_x;
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 *restrict dest_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_10 *restrict abase_x;
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 *restrict dest_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_r10_avx512f (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_10 * restrict abase;
+  const GFC_REAL_10 * restrict bbase;
+  GFC_REAL_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_10 *a, *b;
+      GFC_REAL_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_10 *restrict abase_x;
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 *restrict dest_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_10 *restrict abase_x;
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 *restrict dest_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_r10_vanilla (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_10 * restrict abase;
+  const GFC_REAL_10 * restrict bbase;
+  GFC_REAL_10 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_10));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_10 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_10 *a, *b;
+      GFC_REAL_10 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_10 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_10)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_10 *restrict abase_x;
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 *restrict dest_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_10) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_10 *restrict bbase_y;
+	  GFC_REAL_10 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_10) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_10 *restrict abase_x;
+      const GFC_REAL_10 *restrict bbase_y;
+      GFC_REAL_10 *restrict dest_y;
+      GFC_REAL_10 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_10) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_r10 (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_r10 * const restrict retarray, 
+	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_r10_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_r10_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_r10_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_r10_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_r10 (gfc_array_r10 * const restrict retarray, 
 	gfc_array_r10 * const restrict a, gfc_array_r10 * const restrict b, int try_blas,
@@ -607,4 +2838,10 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c
index 4ebd104..65ac801 100644
--- a/libgfortran/generated/matmul_r16.c
+++ b/libgfortran/generated/matmul_r16.c
@@ -75,6 +75,2237 @@ extern void matmul_r16 (gfc_array_r16 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_r16);
 
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+/* REAL types generate identical code for AVX and AVX2.  Only generate
+   an AVX2 function if we are dealing with integer.  */
+#undef HAVE_AVX2
+#endif
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_r16_avx (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_r16_avx (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_16 * restrict abase;
+  const GFC_REAL_16 * restrict bbase;
+  GFC_REAL_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_16 *a, *b;
+      GFC_REAL_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_16 *restrict abase_x;
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 *restrict dest_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_16 *restrict abase_x;
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 *restrict dest_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_r16_avx2 (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_16 * restrict abase;
+  const GFC_REAL_16 * restrict bbase;
+  GFC_REAL_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_16 *a, *b;
+      GFC_REAL_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_16 *restrict abase_x;
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 *restrict dest_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_16 *restrict abase_x;
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 *restrict dest_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_r16_avx512f (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_16 * restrict abase;
+  const GFC_REAL_16 * restrict bbase;
+  GFC_REAL_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_16 *a, *b;
+      GFC_REAL_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_16 *restrict abase_x;
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 *restrict dest_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_16 *restrict abase_x;
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 *restrict dest_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_r16_vanilla (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_16 * restrict abase;
+  const GFC_REAL_16 * restrict bbase;
+  GFC_REAL_16 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_16));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_16 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_16 *a, *b;
+      GFC_REAL_16 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_16 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_16)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_16 *restrict abase_x;
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 *restrict dest_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_16) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_16 *restrict bbase_y;
+	  GFC_REAL_16 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_16) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_16 *restrict abase_x;
+      const GFC_REAL_16 *restrict bbase_y;
+      GFC_REAL_16 *restrict dest_y;
+      GFC_REAL_16 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_16) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_r16 (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_r16 * const restrict retarray, 
+	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_r16_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_r16_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_r16_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_r16_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_r16 (gfc_array_r16 * const restrict retarray, 
 	gfc_array_r16 * const restrict a, gfc_array_r16 * const restrict b, int try_blas,
@@ -607,4 +2838,10 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c
index cf3ffa3..2a85d6b 100644
--- a/libgfortran/generated/matmul_r4.c
+++ b/libgfortran/generated/matmul_r4.c
@@ -75,6 +75,2237 @@ extern void matmul_r4 (gfc_array_r4 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_r4);
 
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+/* REAL types generate identical code for AVX and AVX2.  Only generate
+   an AVX2 function if we are dealing with integer.  */
+#undef HAVE_AVX2
+#endif
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_r4_avx (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_r4_avx (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_4 * restrict abase;
+  const GFC_REAL_4 * restrict bbase;
+  GFC_REAL_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_4 *a, *b;
+      GFC_REAL_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_4 *restrict abase_x;
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 *restrict dest_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_4 *restrict abase_x;
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 *restrict dest_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_r4_avx2 (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_4 * restrict abase;
+  const GFC_REAL_4 * restrict bbase;
+  GFC_REAL_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_4 *a, *b;
+      GFC_REAL_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_4 *restrict abase_x;
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 *restrict dest_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_4 *restrict abase_x;
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 *restrict dest_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_r4_avx512f (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_4 * restrict abase;
+  const GFC_REAL_4 * restrict bbase;
+  GFC_REAL_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_4 *a, *b;
+      GFC_REAL_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_4 *restrict abase_x;
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 *restrict dest_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_4 *restrict abase_x;
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 *restrict dest_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_r4_vanilla (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_4 * restrict abase;
+  const GFC_REAL_4 * restrict bbase;
+  GFC_REAL_4 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_4));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_4 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_4 *a, *b;
+      GFC_REAL_4 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_4 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_4)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_4 *restrict abase_x;
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 *restrict dest_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_4) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_4 *restrict bbase_y;
+	  GFC_REAL_4 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_4) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_4 *restrict abase_x;
+      const GFC_REAL_4 *restrict bbase_y;
+      GFC_REAL_4 *restrict dest_y;
+      GFC_REAL_4 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_4) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_r4 (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_r4 * const restrict retarray, 
+	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_r4_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_r4_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_r4_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_r4_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_r4 (gfc_array_r4 * const restrict retarray, 
 	gfc_array_r4 * const restrict a, gfc_array_r4 * const restrict b, int try_blas,
@@ -607,4 +2838,10 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c
index 9a70a23..78bf52e 100644
--- a/libgfortran/generated/matmul_r8.c
+++ b/libgfortran/generated/matmul_r8.c
@@ -75,6 +75,2237 @@ extern void matmul_r8 (gfc_array_r8 * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_r8);
 
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+/* REAL types generate identical code for AVX and AVX2.  Only generate
+   an AVX2 function if we are dealing with integer.  */
+#undef HAVE_AVX2
+#endif
+
+
+/* Put exhaustive list of possible architectures here here, ORed together.  */
+
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
+
+#ifdef HAVE_AVX
+static void
+matmul_r8_avx (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static void
+matmul_r8_avx (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_8 * restrict abase;
+  const GFC_REAL_8 * restrict bbase;
+  GFC_REAL_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_8 *a, *b;
+      GFC_REAL_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_8 *restrict abase_x;
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 *restrict dest_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_8 *restrict abase_x;
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 *restrict dest_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+static void
+matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static void
+matmul_r8_avx2 (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_8 * restrict abase;
+  const GFC_REAL_8 * restrict bbase;
+  GFC_REAL_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_8 *a, *b;
+      GFC_REAL_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_8 *restrict abase_x;
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 *restrict dest_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_8 *restrict abase_x;
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 *restrict dest_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+static void
+matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static void
+matmul_r8_avx512f (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_8 * restrict abase;
+  const GFC_REAL_8 * restrict bbase;
+  GFC_REAL_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_8 *a, *b;
+      GFC_REAL_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_8 *restrict abase_x;
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 *restrict dest_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_8 *restrict abase_x;
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 *restrict dest_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+#endif  /* HAVE_AVX512F */
+
+/* Function to fall back to if there is no special processor-specific version.  */
+static void
+matmul_r8_vanilla (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const GFC_REAL_8 * restrict abase;
+  const GFC_REAL_8 * restrict bbase;
+  GFC_REAL_8 * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_REAL_8));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const GFC_REAL_8 one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const GFC_REAL_8 *a, *b;
+      GFC_REAL_8 *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      GFC_REAL_8 t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = (GFC_REAL_8)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const GFC_REAL_8 *restrict abase_x;
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 *restrict dest_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = (GFC_REAL_8) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const GFC_REAL_8 *restrict bbase_y;
+	  GFC_REAL_8 s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = (GFC_REAL_8) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const GFC_REAL_8 *restrict abase_x;
+      const GFC_REAL_8 *restrict bbase_y;
+      GFC_REAL_8 *restrict dest_y;
+      GFC_REAL_8 s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = (GFC_REAL_8) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+
+
+/* Compiling main function, with selection code for the processor.  */
+
+/* Currently, this is i386 only.  Adjust for other architectures.  */
+
+#include <config/i386/cpuinfo.h>
+void matmul_r8 (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) (gfc_array_r8 * const restrict retarray, 
+	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
+
+  if (matmul_p == NULL)
+    {
+      matmul_p = matmul_r8_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
+	{
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
+	    {
+	      matmul_p = matmul_r8_avx512f;
+	      goto tailcall;
+	    }
+
+#endif  /* HAVE_AVX512F */
+
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
+	    {
+	      matmul_p = matmul_r8_avx2;
+	      goto tailcall;
+	    }
+
+#endif
+
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_r8_avx;
+	      goto tailcall;
+	    }
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
 void
 matmul_r8 (gfc_array_r8 * const restrict retarray, 
 	gfc_array_r8 * const restrict a, gfc_array_r8 * const restrict b, int try_blas,
@@ -607,4 +2838,10 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
 	}
     }
 }
+#undef POW3
+#undef min
+#undef max
+
 #endif
+#endif
+
diff --git a/libgfortran/m4/matmul.m4 b/libgfortran/m4/matmul.m4
index 77ed440..4e5bf60 100644
--- a/libgfortran/m4/matmul.m4
+++ b/libgfortran/m4/matmul.m4
@@ -76,537 +76,105 @@ extern void matmul_'rtype_code` ('rtype` * const restrict retarray,
 	int blas_limit, blas_call gemm);
 export_proto(matmul_'rtype_code`);
 
-void
-matmul_'rtype_code` ('rtype` * const restrict retarray, 
-	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
-	int blas_limit, blas_call gemm)
-{
-  const 'rtype_name` * restrict abase;
-  const 'rtype_name` * restrict bbase;
-  'rtype_name` * restrict dest;
-
-  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
-  index_type x, y, n, count, xcount, ycount;
-
-  assert (GFC_DESCRIPTOR_RANK (a) == 2
-          || GFC_DESCRIPTOR_RANK (b) == 2);
-
-/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
-
-   Either A or B (but not both) can be rank 1:
-
-   o One-dimensional argument A is implicitly treated as a row matrix
-     dimensioned [1,count], so xcount=1.
-
-   o One-dimensional argument B is implicitly treated as a column matrix
-     dimensioned [count, 1], so ycount=1.
-*/
+'ifelse(rtype_letter,`r',dnl
+`#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+/* REAL types generate identical code for AVX and AVX2.  Only generate
+   an AVX2 function if we are dealing with integer.  */
+#undef HAVE_AVX2
+#endif')
+`
 
-  if (retarray->base_addr == NULL)
-    {
-      if (GFC_DESCRIPTOR_RANK (a) == 1)
-        {
-	  GFC_DIMENSION_SET(retarray->dim[0], 0,
-	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
-        }
-      else if (GFC_DESCRIPTOR_RANK (b) == 1)
-        {
-	  GFC_DIMENSION_SET(retarray->dim[0], 0,
-	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
-        }
-      else
-        {
-	  GFC_DIMENSION_SET(retarray->dim[0], 0,
-	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
-
-          GFC_DIMENSION_SET(retarray->dim[1], 0,
-	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
-			    GFC_DESCRIPTOR_EXTENT(retarray,0));
-        }
+/* Put exhaustive list of possible architectures here here, ORed together.  */
 
-      retarray->base_addr
-	= xmallocarray (size0 ((array_t *) retarray), sizeof ('rtype_name`));
-      retarray->offset = 0;
-    }
-  else if (unlikely (compile_options.bounds_check))
-    {
-      index_type ret_extent, arg_extent;
+#if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
 
-      if (GFC_DESCRIPTOR_RANK (a) == 1)
-	{
-	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
-	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
-	  if (arg_extent != ret_extent)
-	    runtime_error ("Incorrect extent in return array in"
-			   " MATMUL intrinsic: is %ld, should be %ld",
-			   (long int) ret_extent, (long int) arg_extent);
-	}
-      else if (GFC_DESCRIPTOR_RANK (b) == 1)
-	{
-	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
-	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
-	  if (arg_extent != ret_extent)
-	    runtime_error ("Incorrect extent in return array in"
-			   " MATMUL intrinsic: is %ld, should be %ld",
-			   (long int) ret_extent, (long int) arg_extent);
-	}
-      else
-	{
-	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
-	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
-	  if (arg_extent != ret_extent)
-	    runtime_error ("Incorrect extent in return array in"
-			   " MATMUL intrinsic for dimension 1:"
-			   " is %ld, should be %ld",
-			   (long int) ret_extent, (long int) arg_extent);
-
-	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
-	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
-	  if (arg_extent != ret_extent)
-	    runtime_error ("Incorrect extent in return array in"
-			   " MATMUL intrinsic for dimension 2:"
-			   " is %ld, should be %ld",
-			   (long int) ret_extent, (long int) arg_extent);
-	}
-    }
-'
-sinclude(`matmul_asm_'rtype_code`.m4')dnl
-`
-  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
-    {
-      /* One-dimensional result may be addressed in the code below
-	 either as a row or a column matrix. We want both cases to
-	 work. */
-      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
-    }
-  else
-    {
-      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
-      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
-    }
+#ifdef HAVE_AVX
+'define(`matmul_name',`matmul_'rtype_code`_avx')dnl
+`static void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
+static' include(matmul_internal.m4)dnl
+`#endif /* HAVE_AVX */
+
+#ifdef HAVE_AVX2
+'define(`matmul_name',`matmul_'rtype_code`_avx2')dnl
+`static void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx2")));
+static' include(matmul_internal.m4)dnl
+`#endif /* HAVE_AVX2 */
+
+#ifdef HAVE_AVX512F
+'define(`matmul_name',`matmul_'rtype_code`_avx512f')dnl
+`static void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
+static' include(matmul_internal.m4)dnl
+`#endif  /* HAVE_AVX512F */
 
+/* Function to fall back to if there is no special processor-specific version.  */
+'define(`matmul_name',`matmul_'rtype_code`_vanilla')dnl
+`static' include(matmul_internal.m4)dnl
 
-  if (GFC_DESCRIPTOR_RANK (a) == 1)
-    {
-      /* Treat it as a a row matrix A[1,count]. */
-      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
-      aystride = 1;
-
-      xcount = 1;
-      count = GFC_DESCRIPTOR_EXTENT(a,0);
-    }
-  else
-    {
-      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
-      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+`/* Compiling main function, with selection code for the processor.  */
 
-      count = GFC_DESCRIPTOR_EXTENT(a,1);
-      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
-    }
+/* Currently, this is i386 only.  Adjust for other architectures.  */
 
-  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
-    {
-      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
-	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
-    }
-
-  if (GFC_DESCRIPTOR_RANK (b) == 1)
-    {
-      /* Treat it as a column matrix B[count,1] */
-      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
-
-      /* bystride should never be used for 1-dimensional b.
-	 in case it is we want it to cause a segfault, rather than
-	 an incorrect result. */
-      bystride = 0xDEADBEEF;
-      ycount = 1;
-    }
-  else
-    {
-      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
-      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
-      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
-    }
-
-  abase = a->base_addr;
-  bbase = b->base_addr;
-  dest = retarray->base_addr;
-
-  /* Now that everything is set up, we perform the multiplication
-     itself.  */
-
-#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
-#define min(a,b) ((a) <= (b) ? (a) : (b))
-#define max(a,b) ((a) >= (b) ? (a) : (b))
-
-  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
-      && (bxstride == 1 || bystride == 1)
-      && (((float) xcount) * ((float) ycount) * ((float) count)
-          > POW3(blas_limit)))
-    {
-      const int m = xcount, n = ycount, k = count, ldc = rystride;
-      const 'rtype_name` one = 1, zero = 0;
-      const int lda = (axstride == 1) ? aystride : axstride,
-		ldb = (bxstride == 1) ? bystride : bxstride;
+#include <config/i386/cpuinfo.h>
+void matmul_'rtype_code` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  static void (*matmul_p) ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm) = NULL;
 
-      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
-	{
-	  assert (gemm != NULL);
-	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
-		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
-		&ldc, 1, 1);
-	  return;
-	}
-    }
-
-  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+  if (matmul_p == NULL)
     {
-      /* This block of code implements a tuned matmul, derived from
-         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
-
-               Bo Kagstrom and Per Ling
-               Department of Computing Science
-               Umea University
-               S-901 87 Umea, Sweden
-
-	 from netlib.org, translated to C, and modified for matmul.m4.  */
-
-      const 'rtype_name` *a, *b;
-      'rtype_name` *c;
-      const index_type m = xcount, n = ycount, k = count;
-
-      /* System generated locals */
-      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
-		 i1, i2, i3, i4, i5, i6;
-
-      /* Local variables */
-      'rtype_name` t1[65536], /* was [256][256] */
-		 f11, f12, f21, f22, f31, f32, f41, f42,
-		 f13, f14, f23, f24, f33, f34, f43, f44;
-      index_type i, j, l, ii, jj, ll;
-      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
-
-      a = abase;
-      b = bbase;
-      c = retarray->base_addr;
-
-      /* Parameter adjustments */
-      c_dim1 = rystride;
-      c_offset = 1 + c_dim1;
-      c -= c_offset;
-      a_dim1 = aystride;
-      a_offset = 1 + a_dim1;
-      a -= a_offset;
-      b_dim1 = bystride;
-      b_offset = 1 + b_dim1;
-      b -= b_offset;
-
-      /* Early exit if possible */
-      if (m == 0 || n == 0 || k == 0)
-	return;
-
-      /* Empty c first.  */
-      for (j=1; j<=n; j++)
-	for (i=1; i<=m; i++)
-	  c[i + j * c_dim1] = ('rtype_name`)0;
-
-      /* Start turning the crank. */
-      i1 = n;
-      for (jj = 1; jj <= i1; jj += 512)
+      matmul_p = matmul_'rtype_code`_vanilla;
+      if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
 	{
-	  /* Computing MIN */
-	  i2 = 512;
-	  i3 = n - jj + 1;
-	  jsec = min(i2,i3);
-	  ujsec = jsec - jsec % 4;
-	  i2 = k;
-	  for (ll = 1; ll <= i2; ll += 256)
+          /* Run down the available processors in order of preference.  */
+#ifdef HAVE_AVX512F
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
 	    {
-	      /* Computing MIN */
-	      i3 = 256;
-	      i4 = k - ll + 1;
-	      lsec = min(i3,i4);
-	      ulsec = lsec - lsec % 2;
-
-	      i3 = m;
-	      for (ii = 1; ii <= i3; ii += 256)
-		{
-		  /* Computing MIN */
-		  i4 = 256;
-		  i5 = m - ii + 1;
-		  isec = min(i4,i5);
-		  uisec = isec - isec % 2;
-		  i4 = ll + ulsec - 1;
-		  for (l = ll; l <= i4; l += 2)
-		    {
-		      i5 = ii + uisec - 1;
-		      for (i = ii; i <= i5; i += 2)
-			{
-			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
-					a[i + l * a_dim1];
-			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
-					a[i + (l + 1) * a_dim1];
-			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
-					a[i + 1 + l * a_dim1];
-			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
-					a[i + 1 + (l + 1) * a_dim1];
-			}
-		      if (uisec < isec)
-			{
-			  t1[l - ll + 1 + (isec << 8) - 257] =
-				    a[ii + isec - 1 + l * a_dim1];
-			  t1[l - ll + 2 + (isec << 8) - 257] =
-				    a[ii + isec - 1 + (l + 1) * a_dim1];
-			}
-		    }
-		  if (ulsec < lsec)
-		    {
-		      i4 = ii + isec - 1;
-		      for (i = ii; i<= i4; ++i)
-			{
-			  t1[lsec + ((i - ii + 1) << 8) - 257] =
-				    a[i + (ll + lsec - 1) * a_dim1];
-			}
-		    }
-
-		  uisec = isec - isec % 4;
-		  i4 = jj + ujsec - 1;
-		  for (j = jj; j <= i4; j += 4)
-		    {
-		      i5 = ii + uisec - 1;
-		      for (i = ii; i <= i5; i += 4)
-			{
-			  f11 = c[i + j * c_dim1];
-			  f21 = c[i + 1 + j * c_dim1];
-			  f12 = c[i + (j + 1) * c_dim1];
-			  f22 = c[i + 1 + (j + 1) * c_dim1];
-			  f13 = c[i + (j + 2) * c_dim1];
-			  f23 = c[i + 1 + (j + 2) * c_dim1];
-			  f14 = c[i + (j + 3) * c_dim1];
-			  f24 = c[i + 1 + (j + 3) * c_dim1];
-			  f31 = c[i + 2 + j * c_dim1];
-			  f41 = c[i + 3 + j * c_dim1];
-			  f32 = c[i + 2 + (j + 1) * c_dim1];
-			  f42 = c[i + 3 + (j + 1) * c_dim1];
-			  f33 = c[i + 2 + (j + 2) * c_dim1];
-			  f43 = c[i + 3 + (j + 2) * c_dim1];
-			  f34 = c[i + 2 + (j + 3) * c_dim1];
-			  f44 = c[i + 3 + (j + 3) * c_dim1];
-			  i6 = ll + lsec - 1;
-			  for (l = ll; l <= i6; ++l)
-			    {
-			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
-				      * b[l + j * b_dim1];
-			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
-				      * b[l + j * b_dim1];
-			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
-				      * b[l + (j + 1) * b_dim1];
-			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
-				      * b[l + (j + 1) * b_dim1];
-			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
-				      * b[l + (j + 2) * b_dim1];
-			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
-				      * b[l + (j + 2) * b_dim1];
-			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
-				      * b[l + (j + 3) * b_dim1];
-			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
-				      * b[l + (j + 3) * b_dim1];
-			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
-				      * b[l + j * b_dim1];
-			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
-				      * b[l + j * b_dim1];
-			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
-				      * b[l + (j + 1) * b_dim1];
-			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
-				      * b[l + (j + 1) * b_dim1];
-			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
-				      * b[l + (j + 2) * b_dim1];
-			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
-				      * b[l + (j + 2) * b_dim1];
-			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
-				      * b[l + (j + 3) * b_dim1];
-			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
-				      * b[l + (j + 3) * b_dim1];
-			    }
-			  c[i + j * c_dim1] = f11;
-			  c[i + 1 + j * c_dim1] = f21;
-			  c[i + (j + 1) * c_dim1] = f12;
-			  c[i + 1 + (j + 1) * c_dim1] = f22;
-			  c[i + (j + 2) * c_dim1] = f13;
-			  c[i + 1 + (j + 2) * c_dim1] = f23;
-			  c[i + (j + 3) * c_dim1] = f14;
-			  c[i + 1 + (j + 3) * c_dim1] = f24;
-			  c[i + 2 + j * c_dim1] = f31;
-			  c[i + 3 + j * c_dim1] = f41;
-			  c[i + 2 + (j + 1) * c_dim1] = f32;
-			  c[i + 3 + (j + 1) * c_dim1] = f42;
-			  c[i + 2 + (j + 2) * c_dim1] = f33;
-			  c[i + 3 + (j + 2) * c_dim1] = f43;
-			  c[i + 2 + (j + 3) * c_dim1] = f34;
-			  c[i + 3 + (j + 3) * c_dim1] = f44;
-			}
-		      if (uisec < isec)
-			{
-			  i5 = ii + isec - 1;
-			  for (i = ii + uisec; i <= i5; ++i)
-			    {
-			      f11 = c[i + j * c_dim1];
-			      f12 = c[i + (j + 1) * c_dim1];
-			      f13 = c[i + (j + 2) * c_dim1];
-			      f14 = c[i + (j + 3) * c_dim1];
-			      i6 = ll + lsec - 1;
-			      for (l = ll; l <= i6; ++l)
-				{
-				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
-					  257] * b[l + j * b_dim1];
-				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
-					  257] * b[l + (j + 1) * b_dim1];
-				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
-					  257] * b[l + (j + 2) * b_dim1];
-				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
-					  257] * b[l + (j + 3) * b_dim1];
-				}
-			      c[i + j * c_dim1] = f11;
-			      c[i + (j + 1) * c_dim1] = f12;
-			      c[i + (j + 2) * c_dim1] = f13;
-			      c[i + (j + 3) * c_dim1] = f14;
-			    }
-			}
-		    }
-		  if (ujsec < jsec)
-		    {
-		      i4 = jj + jsec - 1;
-		      for (j = jj + ujsec; j <= i4; ++j)
-			{
-			  i5 = ii + uisec - 1;
-			  for (i = ii; i <= i5; i += 4)
-			    {
-			      f11 = c[i + j * c_dim1];
-			      f21 = c[i + 1 + j * c_dim1];
-			      f31 = c[i + 2 + j * c_dim1];
-			      f41 = c[i + 3 + j * c_dim1];
-			      i6 = ll + lsec - 1;
-			      for (l = ll; l <= i6; ++l)
-				{
-				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
-					  257] * b[l + j * b_dim1];
-				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
-					  257] * b[l + j * b_dim1];
-				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
-					  257] * b[l + j * b_dim1];
-				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
-					  257] * b[l + j * b_dim1];
-				}
-			      c[i + j * c_dim1] = f11;
-			      c[i + 1 + j * c_dim1] = f21;
-			      c[i + 2 + j * c_dim1] = f31;
-			      c[i + 3 + j * c_dim1] = f41;
-			    }
-			  i5 = ii + isec - 1;
-			  for (i = ii + uisec; i <= i5; ++i)
-			    {
-			      f11 = c[i + j * c_dim1];
-			      i6 = ll + lsec - 1;
-			      for (l = ll; l <= i6; ++l)
-				{
-				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
-					  257] * b[l + j * b_dim1];
-				}
-			      c[i + j * c_dim1] = f11;
-			    }
-			}
-		    }
-		}
+	      matmul_p = matmul_'rtype_code`_avx512f;
+	      goto tailcall;
 	    }
-	}
-      return;
-    }
-  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
-    {
-      if (GFC_DESCRIPTOR_RANK (a) != 1)
-	{
-	  const 'rtype_name` *restrict abase_x;
-	  const 'rtype_name` *restrict bbase_y;
-	  'rtype_name` *restrict dest_y;
-	  'rtype_name` s;
 
-	  for (y = 0; y < ycount; y++)
-	    {
-	      bbase_y = &bbase[y*bystride];
-	      dest_y = &dest[y*rystride];
-	      for (x = 0; x < xcount; x++)
-		{
-		  abase_x = &abase[x*axstride];
-		  s = ('rtype_name`) 0;
-		  for (n = 0; n < count; n++)
-		    s += abase_x[n] * bbase_y[n];
-		  dest_y[x] = s;
-		}
-	    }
-	}
-      else
-	{
-	  const 'rtype_name` *restrict bbase_y;
-	  'rtype_name` s;
+#endif  /* HAVE_AVX512F */
 
-	  for (y = 0; y < ycount; y++)
+#ifdef HAVE_AVX2
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
 	    {
-	      bbase_y = &bbase[y*bystride];
-	      s = ('rtype_name`) 0;
-	      for (n = 0; n < count; n++)
-		s += abase[n*axstride] * bbase_y[n];
-	      dest[y*rystride] = s;
+	      matmul_p = matmul_'rtype_code`_avx2;
+	      goto tailcall;
 	    }
-	}
-    }
-  else if (axstride < aystride)
-    {
-      for (y = 0; y < ycount; y++)
-	for (x = 0; x < xcount; x++)
-	  dest[x*rxstride + y*rystride] = ('rtype_name`)0;
-
-      for (y = 0; y < ycount; y++)
-	for (n = 0; n < count; n++)
-	  for (x = 0; x < xcount; x++)
-	    /* dest[x,y] += a[x,n] * b[n,y] */
-	    dest[x*rxstride + y*rystride] +=
-					abase[x*axstride + n*aystride] *
-					bbase[n*bxstride + y*bystride];
-    }
-  else if (GFC_DESCRIPTOR_RANK (a) == 1)
-    {
-      const 'rtype_name` *restrict bbase_y;
-      'rtype_name` s;
 
-      for (y = 0; y < ycount; y++)
-	{
-	  bbase_y = &bbase[y*bystride];
-	  s = ('rtype_name`) 0;
-	  for (n = 0; n < count; n++)
-	    s += abase[n*axstride] * bbase_y[n*bxstride];
-	  dest[y*rxstride] = s;
-	}
-    }
-  else
-    {
-      const 'rtype_name` *restrict abase_x;
-      const 'rtype_name` *restrict bbase_y;
-      'rtype_name` *restrict dest_y;
-      'rtype_name` s;
+#endif
 
-      for (y = 0; y < ycount; y++)
-	{
-	  bbase_y = &bbase[y*bystride];
-	  dest_y = &dest[y*rystride];
-	  for (x = 0; x < xcount; x++)
-	    {
-	      abase_x = &abase[x*axstride];
-	      s = ('rtype_name`) 0;
-	      for (n = 0; n < count; n++)
-		s += abase_x[n*aystride] * bbase_y[n*bxstride];
-	      dest_y[x*rxstride] = s;
+#ifdef HAVE_AVX
+      	  if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
+ 	    {
+              matmul_p = matmul_'rtype_code`_avx;
+	      goto tailcall;
 	    }
-	}
-    }
-}'
+#endif  /* HAVE_AVX */
+        }
+   }
+
+tailcall:
+   (*matmul_p) (retarray, a, b, try_blas, blas_limit, gemm);
+}
+
+#else  /* Just the vanilla function.  */
+
+'define(`matmul_name',`matmul_'rtype_code)dnl
+define(`target_attribute',`')dnl
+include(matmul_internal.m4)dnl
+`#endif
 #endif
+'
diff --git a/libgfortran/m4/matmul_internal.m4 b/libgfortran/m4/matmul_internal.m4
new file mode 100644
index 0000000..d35968b
--- /dev/null
+++ b/libgfortran/m4/matmul_internal.m4
@@ -0,0 +1,537 @@
+`void
+'matmul_name` ('rtype` * const restrict retarray, 
+	'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
+	int blas_limit, blas_call gemm)
+{
+  const 'rtype_name` * restrict abase;
+  const 'rtype_name` * restrict bbase;
+  'rtype_name` * restrict dest;
+
+  index_type rxstride, rystride, axstride, aystride, bxstride, bystride;
+  index_type x, y, n, count, xcount, ycount;
+
+  assert (GFC_DESCRIPTOR_RANK (a) == 2
+          || GFC_DESCRIPTOR_RANK (b) == 2);
+
+/* C[xcount,ycount] = A[xcount, count] * B[count,ycount]
+
+   Either A or B (but not both) can be rank 1:
+
+   o One-dimensional argument A is implicitly treated as a row matrix
+     dimensioned [1,count], so xcount=1.
+
+   o One-dimensional argument B is implicitly treated as a column matrix
+     dimensioned [count, 1], so ycount=1.
+*/
+
+  if (retarray->base_addr == NULL)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
+        }
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+        }
+      else
+        {
+	  GFC_DIMENSION_SET(retarray->dim[0], 0,
+	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
+
+          GFC_DIMENSION_SET(retarray->dim[1], 0,
+	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
+			    GFC_DESCRIPTOR_EXTENT(retarray,0));
+        }
+
+      retarray->base_addr
+	= xmallocarray (size0 ((array_t *) retarray), sizeof ('rtype_name`));
+      retarray->offset = 0;
+    }
+  else if (unlikely (compile_options.bounds_check))
+    {
+      index_type ret_extent, arg_extent;
+
+      if (GFC_DESCRIPTOR_RANK (a) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else if (GFC_DESCRIPTOR_RANK (b) == 1)
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic: is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+      else
+	{
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 1:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+
+	  arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
+	  ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
+	  if (arg_extent != ret_extent)
+	    runtime_error ("Incorrect extent in return array in"
+			   " MATMUL intrinsic for dimension 2:"
+			   " is %ld, should be %ld",
+			   (long int) ret_extent, (long int) arg_extent);
+	}
+    }
+'
+sinclude(`matmul_asm_'rtype_code`.m4')dnl
+`
+  if (GFC_DESCRIPTOR_RANK (retarray) == 1)
+    {
+      /* One-dimensional result may be addressed in the code below
+	 either as a row or a column matrix. We want both cases to
+	 work. */
+      rxstride = rystride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+    }
+  else
+    {
+      rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
+      rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
+    }
+
+
+  if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      /* Treat it as a a row matrix A[1,count]. */
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = 1;
+
+      xcount = 1;
+      count = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+  else
+    {
+      axstride = GFC_DESCRIPTOR_STRIDE(a,0);
+      aystride = GFC_DESCRIPTOR_STRIDE(a,1);
+
+      count = GFC_DESCRIPTOR_EXTENT(a,1);
+      xcount = GFC_DESCRIPTOR_EXTENT(a,0);
+    }
+
+  if (count != GFC_DESCRIPTOR_EXTENT(b,0))
+    {
+      if (count > 0 || GFC_DESCRIPTOR_EXTENT(b,0) > 0)
+	runtime_error ("dimension of array B incorrect in MATMUL intrinsic");
+    }
+
+  if (GFC_DESCRIPTOR_RANK (b) == 1)
+    {
+      /* Treat it as a column matrix B[count,1] */
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+
+      /* bystride should never be used for 1-dimensional b.
+	 in case it is we want it to cause a segfault, rather than
+	 an incorrect result. */
+      bystride = 0xDEADBEEF;
+      ycount = 1;
+    }
+  else
+    {
+      bxstride = GFC_DESCRIPTOR_STRIDE(b,0);
+      bystride = GFC_DESCRIPTOR_STRIDE(b,1);
+      ycount = GFC_DESCRIPTOR_EXTENT(b,1);
+    }
+
+  abase = a->base_addr;
+  bbase = b->base_addr;
+  dest = retarray->base_addr;
+
+  /* Now that everything is set up, we perform the multiplication
+     itself.  */
+
+#define POW3(x) (((float) (x)) * ((float) (x)) * ((float) (x)))
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+
+  if (try_blas && rxstride == 1 && (axstride == 1 || aystride == 1)
+      && (bxstride == 1 || bystride == 1)
+      && (((float) xcount) * ((float) ycount) * ((float) count)
+          > POW3(blas_limit)))
+    {
+      const int m = xcount, n = ycount, k = count, ldc = rystride;
+      const 'rtype_name` one = 1, zero = 0;
+      const int lda = (axstride == 1) ? aystride : axstride,
+		ldb = (bxstride == 1) ? bystride : bxstride;
+
+      if (lda > 0 && ldb > 0 && ldc > 0 && m > 1 && n > 1 && k > 1)
+	{
+	  assert (gemm != NULL);
+	  gemm (axstride == 1 ? "N" : "T", bxstride == 1 ? "N" : "T", &m,
+		&n, &k,	&one, abase, &lda, bbase, &ldb, &zero, dest,
+		&ldc, 1, 1);
+	  return;
+	}
+    }
+
+  if (rxstride == 1 && axstride == 1 && bxstride == 1)
+    {
+      /* This block of code implements a tuned matmul, derived from
+         Superscalar GEMM-based level 3 BLAS,  Beta version 0.1
+
+               Bo Kagstrom and Per Ling
+               Department of Computing Science
+               Umea University
+               S-901 87 Umea, Sweden
+
+	 from netlib.org, translated to C, and modified for matmul.m4.  */
+
+      const 'rtype_name` *a, *b;
+      'rtype_name` *c;
+      const index_type m = xcount, n = ycount, k = count;
+
+      /* System generated locals */
+      index_type a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset,
+		 i1, i2, i3, i4, i5, i6;
+
+      /* Local variables */
+      'rtype_name` t1[65536], /* was [256][256] */
+		 f11, f12, f21, f22, f31, f32, f41, f42,
+		 f13, f14, f23, f24, f33, f34, f43, f44;
+      index_type i, j, l, ii, jj, ll;
+      index_type isec, jsec, lsec, uisec, ujsec, ulsec;
+
+      a = abase;
+      b = bbase;
+      c = retarray->base_addr;
+
+      /* Parameter adjustments */
+      c_dim1 = rystride;
+      c_offset = 1 + c_dim1;
+      c -= c_offset;
+      a_dim1 = aystride;
+      a_offset = 1 + a_dim1;
+      a -= a_offset;
+      b_dim1 = bystride;
+      b_offset = 1 + b_dim1;
+      b -= b_offset;
+
+      /* Early exit if possible */
+      if (m == 0 || n == 0 || k == 0)
+	return;
+
+      /* Empty c first.  */
+      for (j=1; j<=n; j++)
+	for (i=1; i<=m; i++)
+	  c[i + j * c_dim1] = ('rtype_name`)0;
+
+      /* Start turning the crank. */
+      i1 = n;
+      for (jj = 1; jj <= i1; jj += 512)
+	{
+	  /* Computing MIN */
+	  i2 = 512;
+	  i3 = n - jj + 1;
+	  jsec = min(i2,i3);
+	  ujsec = jsec - jsec % 4;
+	  i2 = k;
+	  for (ll = 1; ll <= i2; ll += 256)
+	    {
+	      /* Computing MIN */
+	      i3 = 256;
+	      i4 = k - ll + 1;
+	      lsec = min(i3,i4);
+	      ulsec = lsec - lsec % 2;
+
+	      i3 = m;
+	      for (ii = 1; ii <= i3; ii += 256)
+		{
+		  /* Computing MIN */
+		  i4 = 256;
+		  i5 = m - ii + 1;
+		  isec = min(i4,i5);
+		  uisec = isec - isec % 2;
+		  i4 = ll + ulsec - 1;
+		  for (l = ll; l <= i4; l += 2)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 2)
+			{
+			  t1[l - ll + 1 + ((i - ii + 1) << 8) - 257] =
+					a[i + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 1) << 8) - 257] =
+					a[i + (l + 1) * a_dim1];
+			  t1[l - ll + 1 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + l * a_dim1];
+			  t1[l - ll + 2 + ((i - ii + 2) << 8) - 257] =
+					a[i + 1 + (l + 1) * a_dim1];
+			}
+		      if (uisec < isec)
+			{
+			  t1[l - ll + 1 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + l * a_dim1];
+			  t1[l - ll + 2 + (isec << 8) - 257] =
+				    a[ii + isec - 1 + (l + 1) * a_dim1];
+			}
+		    }
+		  if (ulsec < lsec)
+		    {
+		      i4 = ii + isec - 1;
+		      for (i = ii; i<= i4; ++i)
+			{
+			  t1[lsec + ((i - ii + 1) << 8) - 257] =
+				    a[i + (ll + lsec - 1) * a_dim1];
+			}
+		    }
+
+		  uisec = isec - isec % 4;
+		  i4 = jj + ujsec - 1;
+		  for (j = jj; j <= i4; j += 4)
+		    {
+		      i5 = ii + uisec - 1;
+		      for (i = ii; i <= i5; i += 4)
+			{
+			  f11 = c[i + j * c_dim1];
+			  f21 = c[i + 1 + j * c_dim1];
+			  f12 = c[i + (j + 1) * c_dim1];
+			  f22 = c[i + 1 + (j + 1) * c_dim1];
+			  f13 = c[i + (j + 2) * c_dim1];
+			  f23 = c[i + 1 + (j + 2) * c_dim1];
+			  f14 = c[i + (j + 3) * c_dim1];
+			  f24 = c[i + 1 + (j + 3) * c_dim1];
+			  f31 = c[i + 2 + j * c_dim1];
+			  f41 = c[i + 3 + j * c_dim1];
+			  f32 = c[i + 2 + (j + 1) * c_dim1];
+			  f42 = c[i + 3 + (j + 1) * c_dim1];
+			  f33 = c[i + 2 + (j + 2) * c_dim1];
+			  f43 = c[i + 3 + (j + 2) * c_dim1];
+			  f34 = c[i + 2 + (j + 3) * c_dim1];
+			  f44 = c[i + 3 + (j + 3) * c_dim1];
+			  i6 = ll + lsec - 1;
+			  for (l = ll; l <= i6; ++l)
+			    {
+			      f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f22 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f23 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f24 += t1[l - ll + 1 + ((i - ii + 2) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + j * b_dim1];
+			      f32 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f42 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 1) * b_dim1];
+			      f33 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f43 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 2) * b_dim1];
+			      f34 += t1[l - ll + 1 + ((i - ii + 3) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			      f44 += t1[l - ll + 1 + ((i - ii + 4) << 8) - 257]
+				      * b[l + (j + 3) * b_dim1];
+			    }
+			  c[i + j * c_dim1] = f11;
+			  c[i + 1 + j * c_dim1] = f21;
+			  c[i + (j + 1) * c_dim1] = f12;
+			  c[i + 1 + (j + 1) * c_dim1] = f22;
+			  c[i + (j + 2) * c_dim1] = f13;
+			  c[i + 1 + (j + 2) * c_dim1] = f23;
+			  c[i + (j + 3) * c_dim1] = f14;
+			  c[i + 1 + (j + 3) * c_dim1] = f24;
+			  c[i + 2 + j * c_dim1] = f31;
+			  c[i + 3 + j * c_dim1] = f41;
+			  c[i + 2 + (j + 1) * c_dim1] = f32;
+			  c[i + 3 + (j + 1) * c_dim1] = f42;
+			  c[i + 2 + (j + 2) * c_dim1] = f33;
+			  c[i + 3 + (j + 2) * c_dim1] = f43;
+			  c[i + 2 + (j + 3) * c_dim1] = f34;
+			  c[i + 3 + (j + 3) * c_dim1] = f44;
+			}
+		      if (uisec < isec)
+			{
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f12 = c[i + (j + 1) * c_dim1];
+			      f13 = c[i + (j + 2) * c_dim1];
+			      f14 = c[i + (j + 3) * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f12 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 1) * b_dim1];
+				  f13 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 2) * b_dim1];
+				  f14 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + (j + 3) * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + (j + 1) * c_dim1] = f12;
+			      c[i + (j + 2) * c_dim1] = f13;
+			      c[i + (j + 3) * c_dim1] = f14;
+			    }
+			}
+		    }
+		  if (ujsec < jsec)
+		    {
+		      i4 = jj + jsec - 1;
+		      for (j = jj + ujsec; j <= i4; ++j)
+			{
+			  i5 = ii + uisec - 1;
+			  for (i = ii; i <= i5; i += 4)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      f21 = c[i + 1 + j * c_dim1];
+			      f31 = c[i + 2 + j * c_dim1];
+			      f41 = c[i + 3 + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f21 += t1[l - ll + 1 + ((i - ii + 2) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f31 += t1[l - ll + 1 + ((i - ii + 3) << 8) -
+					  257] * b[l + j * b_dim1];
+				  f41 += t1[l - ll + 1 + ((i - ii + 4) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			      c[i + 1 + j * c_dim1] = f21;
+			      c[i + 2 + j * c_dim1] = f31;
+			      c[i + 3 + j * c_dim1] = f41;
+			    }
+			  i5 = ii + isec - 1;
+			  for (i = ii + uisec; i <= i5; ++i)
+			    {
+			      f11 = c[i + j * c_dim1];
+			      i6 = ll + lsec - 1;
+			      for (l = ll; l <= i6; ++l)
+				{
+				  f11 += t1[l - ll + 1 + ((i - ii + 1) << 8) -
+					  257] * b[l + j * b_dim1];
+				}
+			      c[i + j * c_dim1] = f11;
+			    }
+			}
+		    }
+		}
+	    }
+	}
+      return;
+    }
+  else if (rxstride == 1 && aystride == 1 && bxstride == 1)
+    {
+      if (GFC_DESCRIPTOR_RANK (a) != 1)
+	{
+	  const 'rtype_name` *restrict abase_x;
+	  const 'rtype_name` *restrict bbase_y;
+	  'rtype_name` *restrict dest_y;
+	  'rtype_name` s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      dest_y = &dest[y*rystride];
+	      for (x = 0; x < xcount; x++)
+		{
+		  abase_x = &abase[x*axstride];
+		  s = ('rtype_name`) 0;
+		  for (n = 0; n < count; n++)
+		    s += abase_x[n] * bbase_y[n];
+		  dest_y[x] = s;
+		}
+	    }
+	}
+      else
+	{
+	  const 'rtype_name` *restrict bbase_y;
+	  'rtype_name` s;
+
+	  for (y = 0; y < ycount; y++)
+	    {
+	      bbase_y = &bbase[y*bystride];
+	      s = ('rtype_name`) 0;
+	      for (n = 0; n < count; n++)
+		s += abase[n*axstride] * bbase_y[n];
+	      dest[y*rystride] = s;
+	    }
+	}
+    }
+  else if (axstride < aystride)
+    {
+      for (y = 0; y < ycount; y++)
+	for (x = 0; x < xcount; x++)
+	  dest[x*rxstride + y*rystride] = ('rtype_name`)0;
+
+      for (y = 0; y < ycount; y++)
+	for (n = 0; n < count; n++)
+	  for (x = 0; x < xcount; x++)
+	    /* dest[x,y] += a[x,n] * b[n,y] */
+	    dest[x*rxstride + y*rystride] +=
+					abase[x*axstride + n*aystride] *
+					bbase[n*bxstride + y*bystride];
+    }
+  else if (GFC_DESCRIPTOR_RANK (a) == 1)
+    {
+      const 'rtype_name` *restrict bbase_y;
+      'rtype_name` s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  s = ('rtype_name`) 0;
+	  for (n = 0; n < count; n++)
+	    s += abase[n*axstride] * bbase_y[n*bxstride];
+	  dest[y*rxstride] = s;
+	}
+    }
+  else
+    {
+      const 'rtype_name` *restrict abase_x;
+      const 'rtype_name` *restrict bbase_y;
+      'rtype_name` *restrict dest_y;
+      'rtype_name` s;
+
+      for (y = 0; y < ycount; y++)
+	{
+	  bbase_y = &bbase[y*bystride];
+	  dest_y = &dest[y*rystride];
+	  for (x = 0; x < xcount; x++)
+	    {
+	      abase_x = &abase[x*axstride];
+	      s = ('rtype_name`) 0;
+	      for (n = 0; n < count; n++)
+		s += abase_x[n*aystride] * bbase_y[n*bxstride];
+	      dest_y[x*rxstride] = s;
+	    }
+	}
+    }
+}
+#undef POW3
+#undef min
+#undef max
+'
-- 
cgit v1.1


From f9b6b9291db6eab79e3f4c177cf4cf264aac6dda Mon Sep 17 00:00:00 2001
From: John David Anglin <danglin@gcc.gnu.org>
Date: Sat, 3 Dec 2016 16:10:43 +0000
Subject: baseline_symbols.txt: Regenerate.

	* config/abi/post/hppa-linux-gnu/baseline_symbols.txt: Regenerate.

From-SVN: r243220
---
 libstdc++-v3/ChangeLog                             |  4 +
 .../abi/post/hppa-linux-gnu/baseline_symbols.txt   | 95 ++++++++++++++++++++++
 2 files changed, 99 insertions(+)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 08d9229..451b6ee 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-03  John David Anglin  <danglin@gcc.gnu.org>
+
+	* config/abi/post/hppa-linux-gnu/baseline_symbols.txt: Regenerate.
+
 2016-12-01  David Edelsohn  <dje.gcc@gmail.com>
 
 	* testsuite/26_numerics/headers/cmath/hypot.cc: XFAIL on AIX.
diff --git a/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt b/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
index ab37045..47b4b62 100644
--- a/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
+++ b/libstdc++-v3/config/abi/post/hppa-linux-gnu/baseline_symbols.txt
@@ -1,3 +1,72 @@
+FUNC:_ZGTtNKSt11logic_error4whatEv@@GLIBCXX_3.4.22
+FUNC:_ZGTtNKSt13bad_exception4whatEv@@CXXABI_1.3.10
+FUNC:_ZGTtNKSt13bad_exceptionD1Ev@@CXXABI_1.3.10
+FUNC:_ZGTtNKSt13runtime_error4whatEv@@GLIBCXX_3.4.22
+FUNC:_ZGTtNKSt9exception4whatEv@@CXXABI_1.3.10
+FUNC:_ZGTtNKSt9exceptionD1Ev@@CXXABI_1.3.10
+FUNC:_ZGTtNSt11logic_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11logic_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt11range_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12domain_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12length_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt12out_of_rangeD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt13runtime_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt14overflow_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt15underflow_errorD2Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC1EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC2EPKc@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentC2ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentD0Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentD1Ev@@GLIBCXX_3.4.22
+FUNC:_ZGTtNSt16invalid_argumentD2Ev@@GLIBCXX_3.4.22
 FUNC:_ZN10__cxxabiv116__enum_type_infoD0Ev@@CXXABI_1.3
 FUNC:_ZN10__cxxabiv116__enum_type_infoD1Ev@@CXXABI_1.3
 FUNC:_ZN10__cxxabiv116__enum_type_infoD2Ev@@CXXABI_1.3
@@ -100,6 +169,7 @@ FUNC:_ZN9__gnu_cxx6__poolILb1EE16_M_get_thread_idEv@@GLIBCXX_3.4.4
 FUNC:_ZN9__gnu_cxx6__poolILb1EE16_M_reclaim_blockEPcj@@GLIBCXX_3.4.4
 FUNC:_ZN9__gnu_cxx6__poolILb1EE16_M_reserve_blockEjj@@GLIBCXX_3.4.4
 FUNC:_ZN9__gnu_cxx6__poolILb1EE21_M_destroy_thread_keyEPv@@GLIBCXX_3.4.4
+FUNC:_ZN9__gnu_cxx9__freeresEv@@CXXABI_1.3.10
 FUNC:_ZN9__gnu_cxx9free_list6_M_getEj@@GLIBCXX_3.4.4
 FUNC:_ZN9__gnu_cxx9free_list8_M_clearEv@@GLIBCXX_3.4.4
 FUNC:_ZNK10__cxxabiv117__class_type_info10__do_catchEPKSt9type_infoPPvj@@CXXABI_1.3
@@ -1510,6 +1580,7 @@ FUNC:_ZNSsC1EPKcRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC1EPKcjRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC1ERKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC1ERKSs@@GLIBCXX_3.4
+FUNC:_ZNSsC1ERKSsjRKSaIcE@@GLIBCXX_3.4.23
 FUNC:_ZNSsC1ERKSsjj@@GLIBCXX_3.4
 FUNC:_ZNSsC1ERKSsjjRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC1ESt16initializer_listIcERKSaIcE@@GLIBCXX_3.4.11
@@ -1523,6 +1594,7 @@ FUNC:_ZNSsC2EPKcRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC2EPKcjRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC2ERKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC2ERKSs@@GLIBCXX_3.4
+FUNC:_ZNSsC2ERKSsjRKSaIcE@@GLIBCXX_3.4.23
 FUNC:_ZNSsC2ERKSsjj@@GLIBCXX_3.4
 FUNC:_ZNSsC2ERKSsjjRKSaIcE@@GLIBCXX_3.4
 FUNC:_ZNSsC2ESt16initializer_listIcERKSaIcE@@GLIBCXX_3.4.11
@@ -2162,6 +2234,7 @@ FUNC:_ZNSt15_List_node_base8transferEPS_S0_@@GLIBCXX_3.4
 FUNC:_ZNSt15_List_node_base9_M_unhookEv@@GLIBCXX_3.4.14
 FUNC:_ZNSt15__exception_ptr13exception_ptr4swapERS0_@@CXXABI_1.3.3
 FUNC:_ZNSt15__exception_ptr13exception_ptrC1EMS0_FvvE@@CXXABI_1.3.3
+FUNC:_ZNSt15__exception_ptr13exception_ptrC1EPv@@CXXABI_1.3.11
 FUNC:_ZNSt15__exception_ptr13exception_ptrC1ERKS0_@@CXXABI_1.3.3
 FUNC:_ZNSt15__exception_ptr13exception_ptrC1Ev@@CXXABI_1.3.3
 FUNC:_ZNSt15__exception_ptr13exception_ptrC2EMS0_FvvE@@CXXABI_1.3.3
@@ -2707,7 +2780,9 @@ FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_disposeEv@@GLIBCX
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_M_replaceEjjPKcj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE10_S_compareEjj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE11_M_capacityEj@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderC1EPcOS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderC1EPcRKS3_@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderC2EPcOS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_Alloc_hiderC2EPcRKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructEjc@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE12_M_constructIN9__gnu_cxx17__normal_iteratorIPKcS4_EEEEvT_SB_St20forward_iterator_tag@@GLIBCXX_3.4.21
@@ -2806,6 +2881,7 @@ FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1EPKcjRKS3_@@GLIBCXX_
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS4_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS4_RKS3_@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS4_jRKS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS4_jj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ERKS4_jjRKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC1ESt16initializer_listIcERKS3_@@GLIBCXX_3.4.21
@@ -2821,6 +2897,7 @@ FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2EPKcjRKS3_@@GLIBCXX_
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2ERKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2ERKS4_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2ERKS4_RKS3_@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2ERKS4_jRKS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2ERKS4_jj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2ERKS4_jjRKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEC2ESt16initializer_listIcERKS3_@@GLIBCXX_3.4.21
@@ -2846,7 +2923,9 @@ FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE10_M_disposeEv@@GLIBCX
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE10_M_replaceEjjPKwj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE10_S_compareEjj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE11_M_capacityEj@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_Alloc_hiderC1EPwOS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_Alloc_hiderC1EPwRKS3_@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_Alloc_hiderC2EPwOS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_Alloc_hiderC2EPwRKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_M_constructEjw@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE12_M_constructIN9__gnu_cxx17__normal_iteratorIPKwS4_EEEEvT_SB_St20forward_iterator_tag@@GLIBCXX_3.4.21
@@ -2945,6 +3024,7 @@ FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1EPKwjRKS3_@@GLIBCXX_
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1ERKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1ERKS4_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1ERKS4_RKS3_@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1ERKS4_jRKS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1ERKS4_jj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1ERKS4_jjRKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC1ESt16initializer_listIwERKS3_@@GLIBCXX_3.4.21
@@ -2960,6 +3040,7 @@ FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2EPKwjRKS3_@@GLIBCXX_
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2ERKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2ERKS4_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2ERKS4_RKS3_@@GLIBCXX_3.4.21
+FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2ERKS4_jRKS3_@@GLIBCXX_3.4.23
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2ERKS4_jj@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2ERKS4_jjRKS3_@@GLIBCXX_3.4.21
 FUNC:_ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEEC2ESt16initializer_listIwERKS3_@@GLIBCXX_3.4.21
@@ -3829,14 +3910,24 @@ FUNC:_ZTv0_n12_NSt9strstreamD0Ev@@GLIBCXX_3.4
 FUNC:_ZTv0_n12_NSt9strstreamD1Ev@@GLIBCXX_3.4
 FUNC:_ZdaPv@@GLIBCXX_3.4
 FUNC:_ZdaPvRKSt9nothrow_t@@GLIBCXX_3.4
+FUNC:_ZdaPvSt11align_val_t@@CXXABI_1.3.11
+FUNC:_ZdaPvSt11align_val_tRKSt9nothrow_t@@CXXABI_1.3.11
 FUNC:_ZdaPvj@@CXXABI_1.3.9
+FUNC:_ZdaPvjSt11align_val_t@@CXXABI_1.3.11
 FUNC:_ZdlPv@@GLIBCXX_3.4
 FUNC:_ZdlPvRKSt9nothrow_t@@GLIBCXX_3.4
+FUNC:_ZdlPvSt11align_val_t@@CXXABI_1.3.11
+FUNC:_ZdlPvSt11align_val_tRKSt9nothrow_t@@CXXABI_1.3.11
 FUNC:_ZdlPvj@@CXXABI_1.3.9
+FUNC:_ZdlPvjSt11align_val_t@@CXXABI_1.3.11
 FUNC:_Znaj@@GLIBCXX_3.4
 FUNC:_ZnajRKSt9nothrow_t@@GLIBCXX_3.4
+FUNC:_ZnajSt11align_val_t@@CXXABI_1.3.11
+FUNC:_ZnajSt11align_val_tRKSt9nothrow_t@@CXXABI_1.3.11
 FUNC:_Znwj@@GLIBCXX_3.4
 FUNC:_ZnwjRKSt9nothrow_t@@GLIBCXX_3.4
+FUNC:_ZnwjSt11align_val_t@@CXXABI_1.3.11
+FUNC:_ZnwjSt11align_val_tRKSt9nothrow_t@@CXXABI_1.3.11
 FUNC:__atomic_flag_for_address@@GLIBCXX_3.4.11
 FUNC:__atomic_flag_wait_explicit@@GLIBCXX_3.4.11
 FUNC:__cxa_allocate_dependent_exception@@CXXABI_1.3.6
@@ -3857,6 +3948,7 @@ FUNC:__cxa_get_globals_fast@@CXXABI_1.3
 FUNC:__cxa_guard_abort@@CXXABI_1.3
 FUNC:__cxa_guard_acquire@@CXXABI_1.3
 FUNC:__cxa_guard_release@@CXXABI_1.3
+FUNC:__cxa_init_primary_exception@@CXXABI_1.3.11
 FUNC:__cxa_pure_virtual@@CXXABI_1.3
 FUNC:__cxa_rethrow@@CXXABI_1.3
 FUNC:__cxa_thread_atexit@@CXXABI_1.3.7
@@ -3903,6 +3995,8 @@ FUNC:tanhl@GLIBCXX_3.4
 FUNC:tanl@GLIBCXX_3.4
 OBJECT:0:CXXABI_1.3
 OBJECT:0:CXXABI_1.3.1
+OBJECT:0:CXXABI_1.3.10
+OBJECT:0:CXXABI_1.3.11
 OBJECT:0:CXXABI_1.3.2
 OBJECT:0:CXXABI_1.3.3
 OBJECT:0:CXXABI_1.3.4
@@ -3928,6 +4022,7 @@ OBJECT:0:GLIBCXX_3.4.2
 OBJECT:0:GLIBCXX_3.4.20
 OBJECT:0:GLIBCXX_3.4.21
 OBJECT:0:GLIBCXX_3.4.22
+OBJECT:0:GLIBCXX_3.4.23
 OBJECT:0:GLIBCXX_3.4.3
 OBJECT:0:GLIBCXX_3.4.4
 OBJECT:0:GLIBCXX_3.4.5
-- 
cgit v1.1


From 7c3cace17c44e3fa1bf8f7128d2f9b8fefb4f865 Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@gcc.gnu.org>
Date: Sat, 3 Dec 2016 17:29:43 +0000
Subject: constraints.md (U): Adjust comment.

	* config/sparc/constraints.md (U): Adjust comment.
	* config/sparc/sparc.md (lra): New attribute.
	(enabled): For base instructions, if the lra attribute is set,
	return 1 if it is in keeping with TARGET_LRA.
	(*movdi_insn_sp32): Add lra attribute for alternatives mentioning U
	constraint and duplicate them with U replaced by r.
	(*movdf_insn_sp32): Likewise.
	(*mov<VM64:mode>_insn_sp32): Likewise.
	(*movtf_insn_sp32): Remove alternatives mentioning U constraint.

From-SVN: r243221
---
 gcc/ChangeLog                   | 15 +++++++++-
 gcc/config/sparc/constraints.md |  8 ++---
 gcc/config/sparc/sparc.md       | 65 +++++++++++++++++++++++++----------------
 3 files changed, 58 insertions(+), 30 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 96ae900..396580a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,17 @@
-2016-12-01  Jeff Law  <law@redhat.com>
+2016-12-03  Eric Botcazou  <ebotcazou@adacore.com>
+            David S. Miller  <davem@davemloft.net>
+
+	* config/sparc/constraints.md (U): Adjust comment.
+	* config/sparc/sparc.md (lra): New attribute.
+	(enabled): For base instructions, if the lra attribute is set,
+	return 1 if it is in keeping with TARGET_LRA.
+	(*movdi_insn_sp32): Add lra attribute for alternatives mentioning U
+	constraint and duplicate them with U replaced by r.
+	(*movdf_insn_sp32): Likewise.
+	(*mov<VM64:mode>_insn_sp32): Likewise.
+	(*movtf_insn_sp32): Remove alternatives mentioning U constraint.
+
+2016-12-02  Jeff Law  <law@redhat.com>
 
 	* config/arm/arm.c (arm_handle_cmse_nonsecure_call): Remove unused
 	variable main_variant.
diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md
index 191c0bf..626ced7 100644
--- a/gcc/config/sparc/constraints.md
+++ b/gcc/config/sparc/constraints.md
@@ -166,10 +166,10 @@
 ;; example, we have a non-offsetable MEM.  Reload will notice this
 ;; case and reload the address into a single hard register.
 ;;
-;; The real downfall of this awkward register constraint is that it does
-;; not evaluate to a true register class like a bonafide use of
-;; define_register_constraint would.  This currently means that we cannot
-;; use LRA on Sparc, since the constraint processing of LRA really depends
+;; The real downfall of this awkward register constraint is that it
+;; does not evaluate to a true register class like a bonafide use of
+;; define_register_constraint would.  This means that we cannot use
+;; it with LRA, since the constraint processing of LRA really depends
 ;; upon whether an extra constraint is for registers or not.  It uses
 ;; reg_class_for_constraint, and checks it against NO_REGS.
 (define_constraint "U"
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 896ce4b..000c5a3 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -254,8 +254,14 @@
 (define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4"
   (const_string "none"))
 
+(define_attr "lra" "none,disabled,enabled"
+  (const_string "none"))
+
 (define_attr "enabled" ""
-  (cond [(eq_attr "cpu_feature" "none") (const_int 1)
+  (cond [(eq_attr "cpu_feature" "none")
+	   (cond [(eq_attr "lra" "disabled") (symbol_ref "!TARGET_LRA")
+		  (eq_attr "lra" "enabled") (symbol_ref "TARGET_LRA")]
+		 (const_int 1))
          (eq_attr "cpu_feature" "fpu") (symbol_ref "TARGET_FPU")
          (eq_attr "cpu_feature" "fpunotv9") (symbol_ref "TARGET_FPU && !TARGET_V9")
          (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9")
@@ -1701,9 +1707,9 @@
 
 (define_insn "*movdi_insn_sp32"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-			    "=T,o,T,U,o,r,r,r,?T,?*f,?*f,?o,?*e,?*e,  r,?*f,?*e,?W,b,b")
+			    "=T,o,T,T,U,r,o,r,r,r,?T,?*f,?*f,?o,?*e,?*e,  r,?*f,?*e,?W,b,b")
         (match_operand:DI 1 "input_operand"
-			    " J,J,U,T,r,o,i,r,*f,  T,  o,*f, *e, *e,?*f,  r,  W,*e,J,P"))]
+			    " J,J,U,r,T,T,r,o,i,r,*f,  T,  o,*f, *e, *e,?*f,  r,  W,*e,J,P"))]
   "TARGET_ARCH32
    && (register_operand (operands[0], DImode)
        || register_or_zero_operand (operands[1], DImode))"
@@ -1711,6 +1717,8 @@
    stx\t%%g0, %0
    #
    std\t%1, %0
+   std\t%1, %0
+   ldd\t%1, %0
    ldd\t%1, %0
    #
    #
@@ -1728,11 +1736,12 @@
    std\t%1, %0
    fzero\t%0
    fone\t%0"
-  [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
-   (set_attr "length" "*,2,*,*,2,2,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
-   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
-   (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,true,true")])
+  [(set_attr "type" "store,store,store,store,load,load,*,*,*,*,fpstore,fpload,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
+   (set_attr "length" "*,2,*,*,*,*,2,2,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
+   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
+   (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
+   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,true,true")
+   (set_attr "lra" "*,*,disabled,enabled,disabled,enabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 (define_insn "*movdi_insn_sp64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e,?W,b,b")
@@ -2372,9 +2381,9 @@
 
 (define_insn "*movdf_insn_sp32"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-			    "=b,b,e,e,*r, f,  e,T,W,U,T,  f,  *r,  o,o")
+			    "=b,b,e,e,*r, f,  e,T,W,U,r,T,T,  f,  *r,  o,o")
 	(match_operand:DF 1 "input_operand"
-			    " G,C,e,e, f,*r,W#F,G,e,T,U,o#F,*roF,*rG,f"))]
+			    " G,C,e,e, f,*r,W#F,G,e,T,T,U,r,o#F,*roF,*rG,f"))]
   "TARGET_ARCH32
    && (register_operand (operands[0], DFmode)
        || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -2389,16 +2398,19 @@
   stx\t%r1, %0
   std\t%1, %0
   ldd\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
   std\t%1, %0
   #
   #
   #
   #"
-  [(set_attr "type" "visl,visl,fpmove,*,*,*,fpload,store,fpstore,load,store,*,*,*,*")
-   (set_attr "length" "*,*,*,2,2,2,*,*,*,*,*,2,2,2,2")
-   (set_attr "fptype" "double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
-   (set_attr "cpu_feature" "vis,vis,v9,fpunotv9,vis3,vis3,fpu,v9,fpu,*,*,fpu,*,*,fpu")
-   (set_attr "v3pipe" "true,true,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+  [(set_attr "type" "visl,visl,fpmove,*,*,*,fpload,store,fpstore,load,load,store,store,*,*,*,*")
+   (set_attr "length" "*,*,*,2,2,2,*,*,*,*,*,*,*,2,2,2,2")
+   (set_attr "fptype" "double,double,double,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "cpu_feature" "vis,vis,v9,fpunotv9,vis3,vis3,fpu,v9,fpu,*,*,*,*,fpu,*,*,fpu")
+   (set_attr "v3pipe" "true,true,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,enabled,disabled,enabled,*,*,*,*")])
 
 (define_insn "*movdf_insn_sp64"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,  e,W, *r,*r,  m,*r")
@@ -2609,14 +2621,14 @@
 })
 
 (define_insn "*movtf_insn_sp32"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o,  o,U,  r")
-	(match_operand:TF 1 "input_operand"        " G,oe,e,rGU,o,roG"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o,  r")
+	(match_operand:TF 1 "input_operand"        " G,oe,e,rG,roG"))]
   "TARGET_ARCH32
    && (register_operand (operands[0], TFmode)
        || register_or_zero_operand (operands[1], TFmode))"
   "#"
-  [(set_attr "length" "4,4,4,4,4,4")
-   (set_attr "cpu_feature" "fpu,fpu,fpu,*,*,*")])
+  [(set_attr "length" "4,4,4,4,4")
+   (set_attr "cpu_feature" "fpu,fpu,fpu,*,*")])
 
 (define_insn "*movtf_insn_sp64"
   [(set (match_operand:TF 0 "nonimmediate_operand" "=b, e,o, o,  r")
@@ -8636,8 +8648,8 @@
    (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*")])
 
 (define_insn "*mov<VM64:mode>_insn_sp32"
-  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,*r, f,e,m,m,U,T, o,*r")
-	(match_operand:VM64 1 "input_operand"         "Y,C,e, f,*r,m,e,Y,T,U,*r,*r"))]
+  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,*r, f,e,m,m,U,r,T,T, o,*r")
+	(match_operand:VM64 1 "input_operand"         "Y,C,e, f,*r,m,e,Y,T,T,U,r,*r,*r"))]
   "TARGET_VIS
    && TARGET_ARCH32
    && (register_operand (operands[0], <VM64:MODE>mode)
@@ -8652,13 +8664,16 @@
   std\t%1, %0
   stx\t%r1, %0
   ldd\t%1, %0
+  ldd\t%1, %0
+  std\t%1, %0
   std\t%1, %0
   #
   #"
-  [(set_attr "type" "visl,visl,vismv,*,*,fpload,fpstore,store,load,store,*,*")
-   (set_attr "length" "*,*,*,2,2,*,*,*,*,*,2,2")
-   (set_attr "cpu_feature" "vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*")
-   (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*,*")])
+  [(set_attr "type" "visl,visl,vismv,*,*,fpload,fpstore,store,load,load,store,store,*,*")
+   (set_attr "length" "*,*,*,2,2,*,*,*,*,*,*,*,2,2")
+   (set_attr "cpu_feature" "vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
+   (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "lra" "*,*,*,*,*,*,*,*,disabled,enabled,disabled,enabled,*,*")])
 
 (define_split
   [(set (match_operand:VM64 0 "memory_operand" "")
-- 
cgit v1.1


From 54b84aa9456004ceee2bceb813843525ee11bd7d Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Sat, 3 Dec 2016 17:37:13 +0000
Subject: lra-constraints.c (emit_spill_move): Use gen_lowpart_SUBREG in all
 cases to build a lowpart SUBREG.

	* lra-constraints.c (emit_spill_move): Use gen_lowpart_SUBREG in all
	cases to build a lowpart SUBREG.

From-SVN: r243222
---
 gcc/ChangeLog         | 5 +++++
 gcc/lra-constraints.c | 6 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 396580a..496c691 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,4 +1,9 @@
 2016-12-03  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* lra-constraints.c (emit_spill_move): Use gen_lowpart_SUBREG in all
+	cases to build a lowpart SUBREG.
+
+2016-12-03  Eric Botcazou  <ebotcazou@adacore.com>
             David S. Miller  <davem@davemloft.net>
 
 	* config/sparc/constraints.md (U): Adjust comment.
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 260591a..e661aef 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -1109,9 +1109,9 @@ emit_spill_move (bool to_p, rtx mem_pseudo, rtx val)
 	 dependent macro HARD_REGNO_CALLER_SAVE_MODE.  */
       if (! MEM_P (val))
 	{
-	  val = gen_rtx_SUBREG (GET_MODE (mem_pseudo),
-				GET_CODE (val) == SUBREG ? SUBREG_REG (val) : val,
-				0);
+	  val = gen_lowpart_SUBREG (GET_MODE (mem_pseudo),
+				    GET_CODE (val) == SUBREG
+				    ? SUBREG_REG (val) : val);
 	  LRA_SUBREG_P (val) = 1;
 	}
       else
-- 
cgit v1.1


From 20fee4a901c0cf097df1f1ba00e226f1eb8e973c Mon Sep 17 00:00:00 2001
From: Janus Weil <janus@gcc.gnu.org>
Date: Sat, 3 Dec 2016 19:37:57 +0100
Subject: re PR fortran/42188 ([OOP] F03:C612. The leftmost part-name shall be
 the name of a data object.)

2016-12-03  Janus Weil  <janus@gcc.gnu.org>

	PR fortran/42188
	* primary.c (gfc_match_rvalue): Add a new check that gives better error
	messages.

2016-12-03  Janus Weil  <janus@gcc.gnu.org>

	PR fortran/42188
	* gfortran.dg/derived_result_2.f90.f90: New test case.

From-SVN: r243223
---
 gcc/fortran/ChangeLog                          |  6 ++++
 gcc/fortran/primary.c                          |  9 ++++++
 gcc/testsuite/ChangeLog                        |  5 +++
 gcc/testsuite/gfortran.dg/derived_result_2.f90 | 45 ++++++++++++++++++++++++++
 4 files changed, 65 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/derived_result_2.f90

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 7a007c3..eaae696 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,5 +1,11 @@
 2016-12-03  Janus Weil  <janus@gcc.gnu.org>
 
+	PR fortran/42188
+	* primary.c (gfc_match_rvalue): Add a new check that gives better error
+	messages.
+
+2016-12-03  Janus Weil  <janus@gcc.gnu.org>
+
 	PR fortran/58175
 	* resolve.c (gfc_resolve_finalizers): Prevent bogus warning.
 
diff --git a/gcc/fortran/primary.c b/gcc/fortran/primary.c
index eb2d780..2cdc9a4 100644
--- a/gcc/fortran/primary.c
+++ b/gcc/fortran/primary.c
@@ -3298,6 +3298,15 @@ gfc_match_rvalue (gfc_expr **result)
       if (sym->result == NULL)
 	sym->result = sym;
 
+      gfc_gobble_whitespace ();
+      /* F08:C612.  */
+      if (gfc_peek_ascii_char() == '%')
+	{
+	  gfc_error ("The leftmost part-ref in a data-ref can not be a "
+		     "function reference at %C");
+	  m = MATCH_ERROR;
+	}
+
       m = MATCH_YES;
       break;
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 39a5c59..0d95973 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
 2016-12-03  Janus Weil  <janus@gcc.gnu.org>
 
+	PR fortran/42188
+	* gfortran.dg/derived_result_2.f90.f90: New test case.
+
+2016-12-03  Janus Weil  <janus@gcc.gnu.org>
+
 	PR fortran/58175
 	* gfortran.dg/finalize_30.f90: Extend test case.
 
diff --git a/gcc/testsuite/gfortran.dg/derived_result_2.f90 b/gcc/testsuite/gfortran.dg/derived_result_2.f90
new file mode 100644
index 0000000..51f5b86
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/derived_result_2.f90
@@ -0,0 +1,45 @@
+! { dg-do compile }
+!
+! PR 42188: [OOP] F03:C612. The leftmost part-name shall be the name of a data object
+!
+! Contributed by Janus Weil <janus@gcc.gnu.org>
+
+module grid_module
+ implicit none
+ type grid
+ contains
+   procedure :: new_grid
+   procedure :: new_int
+ end type
+contains
+ subroutine new_grid(this)
+   class(grid) :: this
+ end subroutine
+ integer function new_int(this)
+   class(grid) :: this
+   new_int = 42
+ end function
+end module
+
+module field_module
+ use grid_module
+ implicit none
+
+ type field
+   type(grid) :: mesh
+ end type
+
+contains
+
+ type(field) function new_field()
+ end function
+
+ subroutine test
+   integer :: i
+   type(grid) :: g
+   g = new_field()%mesh              ! { dg-error "can not be a function reference" }
+   call new_field()%mesh%new_grid()  ! { dg-error "Syntax error" }
+   i = new_field() % mesh%new_int()  ! { dg-error "can not be a function reference" }
+ end subroutine
+
+end module
-- 
cgit v1.1


From 4ceda20498ec82d0fd24a2ab1e56c6c1e38b0174 Mon Sep 17 00:00:00 2001
From: Janus Weil <janus@gcc.gnu.org>
Date: Sat, 3 Dec 2016 19:48:48 +0100
Subject: re PR fortran/43207 ([OOP] invalid (pointer) assignment to and from
 abstract non-polymorphic expressions)

2016-12-03  Janus Weil  <janus@gcc.gnu.org>

	PR fortran/43207
	* primary.c (gfc_match_varspec): Reject nonpolymorphic references to
	abstract types.

2016-12-03  Janus Weil  <janus@gcc.gnu.org>

	PR fortran/43207
	* gfortran.dg/abstract_type_9.f90: New test case.

From-SVN: r243224
---
 gcc/fortran/ChangeLog                         |  6 +++++
 gcc/fortran/primary.c                         | 10 +++++++-
 gcc/testsuite/ChangeLog                       |  5 ++++
 gcc/testsuite/gfortran.dg/abstract_type_9.f90 | 34 +++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gfortran.dg/abstract_type_9.f90

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index eaae696..3489bc4 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,5 +1,11 @@
 2016-12-03  Janus Weil  <janus@gcc.gnu.org>
 
+	PR fortran/43207
+	* primary.c (gfc_match_varspec): Reject nonpolymorphic references to
+	abstract types.
+
+2016-12-03  Janus Weil  <janus@gcc.gnu.org>
+
 	PR fortran/42188
 	* primary.c (gfc_match_rvalue): Add a new check that gives better error
 	messages.
diff --git a/gcc/fortran/primary.c b/gcc/fortran/primary.c
index 2cdc9a4..f13b0f0 100644
--- a/gcc/fortran/primary.c
+++ b/gcc/fortran/primary.c
@@ -2222,7 +2222,15 @@ check_substring:
 	}
     }
 
-  /* F2008, C727.  */
+  /* F08:C611.  */
+  if (primary->ts.type == BT_DERIVED && primary->ref
+      && primary->ts.u.derived && primary->ts.u.derived->attr.abstract)
+    {
+      gfc_error ("Nonpolymorphic reference to abstract type at %C");
+      return MATCH_ERROR;
+    }
+
+  /* F08:C727.  */
   if (primary->expr_type == EXPR_PPC && gfc_is_coindexed (primary))
     {
       gfc_error ("Coindexed procedure-pointer component at %C");
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 0d95973..84fc6fa 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,10 @@
 2016-12-03  Janus Weil  <janus@gcc.gnu.org>
 
+	PR fortran/43207
+	* gfortran.dg/abstract_type_9.f90: New test case.
+
+2016-12-03  Janus Weil  <janus@gcc.gnu.org>
+
 	PR fortran/42188
 	* gfortran.dg/derived_result_2.f90.f90: New test case.
 
diff --git a/gcc/testsuite/gfortran.dg/abstract_type_9.f90 b/gcc/testsuite/gfortran.dg/abstract_type_9.f90
new file mode 100644
index 0000000..77d48ba
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/abstract_type_9.f90
@@ -0,0 +1,34 @@
+! { dg-do compile }
+!
+! PR 43207: [OOP] invalid (pointer) assignment to and from abstract non-polymorphic expressions
+!
+! Contributed by Tobias Burnus <burnus@gcc.gnu.org>
+
+  implicit none
+  type, abstract :: parent
+    integer :: i
+  end type
+  type, extends(parent) :: child
+    class(parent), pointer :: comp
+  end type
+
+  type(child), target :: c1
+  class(child), allocatable :: c2
+  class(parent), pointer :: cp
+
+  c1%parent = c1%parent  ! { dg-error "Nonpolymorphic reference to abstract type" }
+  c2%parent = c1%parent  ! { dg-error "Nonpolymorphic reference to abstract type" }
+
+  cp => c1%comp
+  cp => c1%parent        ! { dg-error "Nonpolymorphic reference to abstract type" }
+
+  call sub(c1%comp)
+  call sub(c1%parent)    ! { dg-error "Nonpolymorphic reference to abstract type" }
+
+contains
+
+  subroutine sub(arg)
+    class(parent) :: arg
+  end subroutine
+
+end
-- 
cgit v1.1


From c818397a989cda38cf335a411360307dea311c99 Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Sun, 4 Dec 2016 00:16:17 +0000
Subject: Daily bump.

From-SVN: r243227
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 8caabe1..f179554 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20161203
+20161204
-- 
cgit v1.1


From 6b7d84532342ed038a07d850ddacf7a86106a998 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <uros@gcc.gnu.org>
Date: Sun, 4 Dec 2016 15:38:05 +0100
Subject: re PR target/70322 (STV doesn't optimize andn)

	PR target/70322
	* config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle NEG.
	(dimode_scalar_chain::compute_convert_gain): Ditto.
	(dimode_scalar_chain::convert_insn): Ditto.

testsuite/ChangeLog:

	PR target/70322
	* gcc.target/i386/pr70322-4.c: New test.

From-SVN: r243228
---
 gcc/ChangeLog                             |  9 ++++++++-
 gcc/config/i386/i386.c                    | 15 +++++++++++++--
 gcc/testsuite/ChangeLog                   |  5 +++++
 gcc/testsuite/gcc.target/i386/pr70322-4.c | 13 +++++++++++++
 4 files changed, 39 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr70322-4.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 496c691..e888c03 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,10 +1,17 @@
+2016-12-04  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/70322
+	* config/i386/i386.c (dimode_scalar_to_vector_candidate_p): Handle NEG.
+	(dimode_scalar_chain::compute_convert_gain): Ditto.
+	(dimode_scalar_chain::convert_insn): Ditto.
+
 2016-12-03  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* lra-constraints.c (emit_spill_move): Use gen_lowpart_SUBREG in all
 	cases to build a lowpart SUBREG.
 
 2016-12-03  Eric Botcazou  <ebotcazou@adacore.com>
-            David S. Miller  <davem@davemloft.net>
+	    David S. Miller  <davem@davemloft.net>
 
 	* config/sparc/constraints.md (U): Adjust comment.
 	* config/sparc/sparc.md (lra): New attribute.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 0bee09b..41717da 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2826,6 +2826,7 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
 	return false;
       break;
 
+    case NEG:
     case NOT:
       break;
 
@@ -2851,7 +2852,8 @@ dimode_scalar_to_vector_candidate_p (rtx_insn *insn)
 
   if ((GET_MODE (XEXP (src, 0)) != DImode
        && !CONST_INT_P (XEXP (src, 0)))
-      || (GET_CODE (src) != NOT
+      || (GET_CODE (src) != NEG
+	  && GET_CODE (src) != NOT
 	  && GET_MODE (XEXP (src, 1)) != DImode
 	  && !CONST_INT_P (XEXP (src, 1))))
     return false;
@@ -3419,7 +3421,8 @@ dimode_scalar_chain::compute_convert_gain ()
 	  if (CONST_INT_P (XEXP (src, 1)))
 	    gain -= vector_const_cost (XEXP (src, 1));
 	}
-      else if (GET_CODE (src) == NOT)
+      else if (GET_CODE (src) == NEG
+	       || GET_CODE (src) == NOT)
 	gain += ix86_cost->add - COSTS_N_INSNS (1);
       else if (GET_CODE (src) == COMPARE)
 	{
@@ -3776,6 +3779,14 @@ dimode_scalar_chain::convert_insn (rtx_insn *insn)
       PUT_MODE (src, V2DImode);
       break;
 
+    case NEG:
+      src = XEXP (src, 0);
+      convert_op (&src, insn);
+      subreg = gen_reg_rtx (V2DImode);
+      emit_insn_before (gen_move_insn (subreg, CONST0_RTX (V2DImode)), insn);
+      src = gen_rtx_MINUS (V2DImode, subreg, src);
+      break;
+
     case NOT:
       src = XEXP (src, 0);
       convert_op (&src, insn);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 84fc6fa..a323678 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-04  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/70322
+	* gcc.target/i386/pr70322-4.c: New test.
+
 2016-12-03  Janus Weil  <janus@gcc.gnu.org>
 
 	PR fortran/43207
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-4.c b/gcc/testsuite/gcc.target/i386/pr70322-4.c
new file mode 100644
index 0000000..8a02b9b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr70322-4.c
@@ -0,0 +1,13 @@
+/* PR target/70322 */
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2 -msse2 -mstv" } */
+/* { dg-final { scan-assembler "psub" } } */
+/* { dg-final { scan-assembler "por" } } */
+
+extern long long z;
+
+void
+foo (long long x, long long y)
+{
+  z = -x | y;
+}
-- 
cgit v1.1


From 5dcf45618031c9d0e44ba306102a6562eed879d3 Mon Sep 17 00:00:00 2001
From: Martin Sebor <msebor@redhat.com>
Date: Sun, 4 Dec 2016 17:48:44 +0000
Subject: PR c/78668 - aligned_alloc, realloc, et al. missing attribute
 alloc_size

gcc/ChangeLog:

	PR c/78668
        * builtin-attrs.def (ATTR_ALLOC_SIZE, ATTR_RETURNS_NONNULL): New
        identifier tree nodes.
        (ATTR_ALLOCA_SIZE_1_NOTHROW_LEAF_LIST): New attribute list.
        (ATTR_MALLOC_SIZE_1_NOTHROW_LIST): Same.
        (ATTR_MALLOC_SIZE_1_NOTHROW_LEAF_LIST): Same.
        (ATTR_MALLOC_SIZE_1_2_NOTHROW_LEAF_LIST): Same.
        (ATTR_ALLOC_SIZE_2_NOTHROW_LEAF_LIST): Same.
        * builtins.def (aligned_alloc, calloc, malloc, realloc):
        Add attribute alloc_size.
        (alloca): Add attribute alloc_size and returns_nonnull.

gcc/testsuite/ChangeLog:

	PR c/78668
	* gcc.dg/builtin-alloc-size.c: New test.

From-SVN: r243231
---
 gcc/ChangeLog                             | 14 ++++++
 gcc/builtin-attrs.def                     | 21 +++++++++
 gcc/builtins.def                          | 10 ++---
 gcc/testsuite/ChangeLog                   |  5 +++
 gcc/testsuite/gcc.dg/builtin-alloc-size.c | 72 +++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-alloc-size.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e888c03..c33ad2f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@
+2016-12-04  Martin Sebor  <msebor@redhat.com>
+
+	PR c/78668
+        * builtin-attrs.def (ATTR_ALLOC_SIZE, ATTR_RETURNS_NONNULL): New
+        identifier tree nodes.
+        (ATTR_ALLOCA_SIZE_1_NOTHROW_LEAF_LIST): New attribute list.
+        (ATTR_MALLOC_SIZE_1_NOTHROW_LIST): Same.
+        (ATTR_MALLOC_SIZE_1_NOTHROW_LEAF_LIST): Same.
+        (ATTR_MALLOC_SIZE_1_2_NOTHROW_LEAF_LIST): Same.
+        (ATTR_ALLOC_SIZE_2_NOTHROW_LEAF_LIST): Same.
+        * builtins.def (aligned_alloc, calloc, malloc, realloc):
+        Add attribute alloc_size.
+        (alloca): Add attribute alloc_size and returns_nonnull.
+
 2016-12-04  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/70322
diff --git a/gcc/builtin-attrs.def b/gcc/builtin-attrs.def
index 88c9bd1..1520d15 100644
--- a/gcc/builtin-attrs.def
+++ b/gcc/builtin-attrs.def
@@ -83,6 +83,7 @@ DEF_LIST_INT_INT (5,6)
 #undef DEF_LIST_INT_INT
 
 /* Construct trees for identifiers.  */
+DEF_ATTR_IDENT (ATTR_ALLOC_SIZE, "alloc_size")
 DEF_ATTR_IDENT (ATTR_COLD, "cold")
 DEF_ATTR_IDENT (ATTR_CONST, "const")
 DEF_ATTR_IDENT (ATTR_FORMAT, "format")
@@ -151,6 +152,26 @@ DEF_ATTR_TREE_LIST (ATTR_SENTINEL_NOTHROW_LEAF_LIST, ATTR_SENTINEL,	\
 DEF_ATTR_TREE_LIST (ATTR_COLD_CONST_NORETURN_NOTHROW_LEAF_LIST, ATTR_CONST,\
 			ATTR_NULL, ATTR_COLD_NORETURN_NOTHROW_LEAF_LIST)
 
+/* Allocation functions like malloc and realloc whose first argument
+   specifies the size of the allocated object.  */
+DEF_ATTR_TREE_LIST (ATTR_MALLOC_SIZE_1_NOTHROW_LIST, ATTR_ALLOC_SIZE,	\
+			ATTR_LIST_1, ATTR_MALLOC_NOTHROW_LIST)
+DEF_ATTR_TREE_LIST (ATTR_MALLOC_SIZE_1_NOTHROW_LEAF_LIST, ATTR_ALLOC_SIZE, \
+		        ATTR_LIST_1, ATTR_MALLOC_NOTHROW_LEAF_LIST)
+/* Alloca is just like malloc except that it never returns null.  */
+DEF_ATTR_TREE_LIST (ATTR_ALLOCA_SIZE_1_NOTHROW_LEAF_LIST, ATTR_RETURNS_NONNULL,
+		    ATTR_NULL, ATTR_MALLOC_SIZE_1_NOTHROW_LEAF_LIST)
+
+/* Allocation functions like calloc the product of whose first two arguments
+   specifies the size of the allocated object.  */
+DEF_ATTR_TREE_LIST (ATTR_MALLOC_SIZE_1_2_NOTHROW_LEAF_LIST, ATTR_ALLOC_SIZE, \
+		        ATTR_LIST_1_2, ATTR_MALLOC_NOTHROW_LEAF_LIST)
+
+/* Allocation functions like realloc whose second argument specifies
+   the size of the allocated object.  */
+DEF_ATTR_TREE_LIST (ATTR_ALLOC_SIZE_2_NOTHROW_LEAF_LIST, ATTR_ALLOC_SIZE, \
+		        ATTR_LIST_2, ATTR_NOTHROW_LEAF_LIST)
+
 /* Functions whose pointer parameter(s) are all nonnull.  */
 DEF_ATTR_TREE_LIST (ATTR_NONNULL_LIST, ATTR_NONNULL, ATTR_NULL, ATTR_NULL)
 /* Functions whose first parameter is a nonnull pointer.  */
diff --git a/gcc/builtins.def b/gcc/builtins.def
index 6766975..9cd24e8 100644
--- a/gcc/builtins.def
+++ b/gcc/builtins.def
@@ -297,7 +297,7 @@ DEF_C99_BUILTIN        (BUILT_IN_ACOSH, "acosh", BT_FN_DOUBLE_DOUBLE, ATTR_MATHF
 DEF_C99_BUILTIN        (BUILT_IN_ACOSHF, "acoshf", BT_FN_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_C99_BUILTIN        (BUILT_IN_ACOSHL, "acoshl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_C99_C90RES_BUILTIN (BUILT_IN_ACOSL, "acosl", BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
-DEF_C11_BUILTIN        (BUILT_IN_ALIGNED_ALLOC, "aligned_alloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_NOTHROW_LIST)
+DEF_C11_BUILTIN        (BUILT_IN_ALIGNED_ALLOC, "aligned_alloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_SIZE_1_NOTHROW_LIST)
 DEF_LIB_BUILTIN        (BUILT_IN_ASIN, "asin", BT_FN_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_C99_C90RES_BUILTIN (BUILT_IN_ASINF, "asinf", BT_FN_FLOAT_FLOAT, ATTR_MATHFN_FPROUNDING_ERRNO)
 DEF_C99_BUILTIN        (BUILT_IN_ASINH, "asinh", BT_FN_DOUBLE_DOUBLE, ATTR_MATHFN_FPROUNDING)
@@ -777,7 +777,7 @@ DEF_GCC_BUILTIN        (BUILT_IN_UMULLL_OVERFLOW, "umulll_overflow", BT_FN_BOOL_
 DEF_LIB_BUILTIN        (BUILT_IN_ABORT, "abort", BT_FN_VOID, ATTR_TMPURE_NORETURN_NOTHROW_LEAF_LIST)
 DEF_LIB_BUILTIN        (BUILT_IN_ABS, "abs", BT_FN_INT_INT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_AGGREGATE_INCOMING_ADDRESS, "aggregate_incoming_address", BT_FN_PTR_VAR, ATTR_LEAF_LIST)
-DEF_EXT_LIB_BUILTIN    (BUILT_IN_ALLOCA, "alloca", BT_FN_PTR_SIZE, ATTR_MALLOC_NOTHROW_LEAF_LIST)
+DEF_EXT_LIB_BUILTIN    (BUILT_IN_ALLOCA, "alloca", BT_FN_PTR_SIZE, ATTR_ALLOCA_SIZE_1_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_APPLY, "apply", BT_FN_PTR_PTR_FN_VOID_VAR_PTR_SIZE, ATTR_NULL)
 DEF_GCC_BUILTIN        (BUILT_IN_APPLY_ARGS, "apply_args", BT_FN_PTR_VAR, ATTR_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP16, "bswap16", BT_FN_UINT16_UINT16, ATTR_CONST_NOTHROW_LEAF_LIST)
@@ -785,7 +785,7 @@ DEF_GCC_BUILTIN        (BUILT_IN_BSWAP32, "bswap32", BT_FN_UINT32_UINT32, ATTR_C
 DEF_GCC_BUILTIN        (BUILT_IN_BSWAP64, "bswap64", BT_FN_UINT64_UINT64, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_CLEAR_CACHE, "__clear_cache", BT_FN_VOID_PTR_PTR, ATTR_NOTHROW_LEAF_LIST)
 /* [trans-mem]: Adjust BUILT_IN_TM_CALLOC if BUILT_IN_CALLOC is changed.  */
-DEF_LIB_BUILTIN        (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_NOTHROW_LEAF_LIST)
+DEF_LIB_BUILTIN        (BUILT_IN_CALLOC, "calloc", BT_FN_PTR_SIZE_SIZE, ATTR_MALLOC_SIZE_1_2_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_CLASSIFY_TYPE, "classify_type", BT_FN_INT_VAR, ATTR_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_CLZ, "clz", BT_FN_INT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_CLZIMAX, "clzimax", BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
@@ -861,7 +861,7 @@ DEF_LIB_BUILTIN        (BUILT_IN_LABS, "labs", BT_FN_LONG_LONG, ATTR_CONST_NOTHR
 DEF_C99_BUILTIN        (BUILT_IN_LLABS, "llabs", BT_FN_LONGLONG_LONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_LONGJMP, "longjmp", BT_FN_VOID_PTR_INT, ATTR_NORETURN_NOTHROW_LIST)
 /* [trans-mem]: Adjust BUILT_IN_TM_MALLOC if BUILT_IN_MALLOC is changed.  */
-DEF_LIB_BUILTIN        (BUILT_IN_MALLOC, "malloc", BT_FN_PTR_SIZE, ATTR_MALLOC_NOTHROW_LEAF_LIST)
+DEF_LIB_BUILTIN        (BUILT_IN_MALLOC, "malloc", BT_FN_PTR_SIZE, ATTR_MALLOC_SIZE_1_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_NEXT_ARG, "next_arg", BT_FN_PTR_VAR, ATTR_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_PARITY, "parity", BT_FN_INT_UINT, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_PARITYIMAX, "parityimax", BT_FN_INT_UINTMAX, ATTR_CONST_NOTHROW_LEAF_LIST)
@@ -873,7 +873,7 @@ DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG, ATTR_C
 DEF_GCC_BUILTIN        (BUILT_IN_POPCOUNTLL, "popcountll", BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
 DEF_EXT_LIB_BUILTIN    (BUILT_IN_POSIX_MEMALIGN, "posix_memalign", BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
 DEF_GCC_BUILTIN        (BUILT_IN_PREFETCH, "prefetch", BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
-DEF_LIB_BUILTIN        (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, ATTR_NOTHROW_LEAF_LIST)
+DEF_LIB_BUILTIN        (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, ATTR_ALLOC_SIZE_2_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_RETURN, "return", BT_FN_VOID_PTR, ATTR_NORETURN_NOTHROW_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_RETURN_ADDRESS, "return_address", BT_FN_PTR_UINT, ATTR_LEAF_LIST)
 DEF_GCC_BUILTIN        (BUILT_IN_SAVEREGS, "saveregs", BT_FN_PTR_VAR, ATTR_NULL)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index a323678..838fca5 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-04  Martin Sebor  <msebor@redhat.com>
+
+	PR c/78668
+	* gcc.dg/builtin-alloc-size.c: New test.
+
 2016-12-04  Uros Bizjak  <ubizjak@gmail.com>
 
 	PR target/70322
diff --git a/gcc/testsuite/gcc.dg/builtin-alloc-size.c b/gcc/testsuite/gcc.dg/builtin-alloc-size.c
new file mode 100644
index 0000000..5a40862
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-alloc-size.c
@@ -0,0 +1,72 @@
+/* PR c/78668 - aligned_alloc, realloc, et al. missing attribute alloc_size
+   Test to verify that memory allocation built-ins are decorated with
+   attribute alloc_size that __builtin_object_size can make use of (or
+   are treated as if they were for that purpose)..
+   { dg-do compile }
+   { dg-additional-options "-O2 -fdump-tree-optimized" } */
+
+void sink (void*);
+
+unsigned size (unsigned n)
+{
+  return n;
+}
+
+void test_aligned_alloc (unsigned a)
+{
+  unsigned n = size (7);
+
+  void *p = __builtin_aligned_alloc (n, a);
+  if (__builtin_object_size (p, 0) != n)
+    __builtin_abort ();
+  sink (p);
+}
+
+void test_alloca (void)
+{
+  unsigned n = size (13);
+
+  void *p = __builtin_alloca (n);
+
+  /* Also verify that alloca is declared with attribute returns_nonnull
+     (or treated as it were as the case may be).  */
+  if (!p)
+    __builtin_abort ();
+
+  if (__builtin_object_size (p, 0) != n)
+    __builtin_abort ();
+  sink (p);
+}
+
+void test_calloc (void)
+{
+  unsigned m = size (19);
+  unsigned n = size (23);
+
+  void *p = __builtin_calloc (m, n);
+  if (__builtin_object_size (p, 0) != m * n)
+    __builtin_abort ();
+  sink (p);
+}
+
+void test_malloc (void)
+{
+  unsigned n = size (17);
+
+  void *p = __builtin_malloc (n);
+  if (__builtin_object_size (p, 0) != n)
+    __builtin_abort ();
+  sink (p);
+}
+
+void test_realloc (void *p)
+{
+  unsigned n = size (31);
+
+  p = __builtin_realloc (p, n);
+  if (__builtin_object_size (p, 0) != n)
+    __builtin_abort ();
+  sink (p);
+}
+
+/* { dg-final { scan-tree-dump-not "abort" "optimized" } } */
-- 
cgit v1.1


From aa7cfe40579670e5bc0df3220181aad0bee763ab Mon Sep 17 00:00:00 2001
From: Janus Weil <janus@gcc.gnu.org>
Date: Sun, 4 Dec 2016 21:31:26 +0100
Subject: re PR fortran/78618 (ICE in gfc_check_rank, at fortran/check.c:3670)

2016-12-04  Janus Weil  <janus@gcc.gnu.org>

	PR fortran/78618
	* intrinsic.c (gfc_convert_type_warn): Do not set the full typespec for
	the conversion symbol, but only type and kind. Set the full typespec
	for the expression.
	(gfc_convert_chartype): Ditto.

From-SVN: r243232
---
 gcc/fortran/ChangeLog   | 8 ++++++++
 gcc/fortran/intrinsic.c | 8 ++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 3489bc4..2c06b31 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-04  Janus Weil  <janus@gcc.gnu.org>
+
+	PR fortran/78618
+	* intrinsic.c (gfc_convert_type_warn): Do not set the full typespec for
+	the conversion symbol, but only type and kind. Set the full typespec
+	for the expression.
+	(gfc_convert_chartype): Ditto.
+
 2016-12-03  Janus Weil  <janus@gcc.gnu.org>
 
 	PR fortran/43207
diff --git a/gcc/fortran/intrinsic.c b/gcc/fortran/intrinsic.c
index fdc11d8..fb83402 100644
--- a/gcc/fortran/intrinsic.c
+++ b/gcc/fortran/intrinsic.c
@@ -4984,12 +4984,14 @@ gfc_convert_type_warn (gfc_expr *expr, gfc_typespec *ts, int eflag, int wflag)
   new_expr->value.function.name = sym->lib_name;
   new_expr->value.function.isym = sym;
   new_expr->where = old_where;
+  new_expr->ts = *ts;
   new_expr->rank = rank;
   new_expr->shape = gfc_copy_shape (shape, rank);
 
   gfc_get_ha_sym_tree (sym->name, &new_expr->symtree);
   new_expr->symtree->n.sym->result = new_expr->symtree->n.sym;
-  new_expr->symtree->n.sym->ts = *ts;
+  new_expr->symtree->n.sym->ts.type = ts->type;
+  new_expr->symtree->n.sym->ts.kind = ts->kind;
   new_expr->symtree->n.sym->attr.flavor = FL_PROCEDURE;
   new_expr->symtree->n.sym->attr.function = 1;
   new_expr->symtree->n.sym->attr.elemental = 1;
@@ -5055,11 +5057,13 @@ gfc_convert_chartype (gfc_expr *expr, gfc_typespec *ts)
   new_expr->value.function.name = sym->lib_name;
   new_expr->value.function.isym = sym;
   new_expr->where = old_where;
+  new_expr->ts = *ts;
   new_expr->rank = rank;
   new_expr->shape = gfc_copy_shape (shape, rank);
 
   gfc_get_ha_sym_tree (sym->name, &new_expr->symtree);
-  new_expr->symtree->n.sym->ts = *ts;
+  new_expr->symtree->n.sym->ts.type = ts->type;
+  new_expr->symtree->n.sym->ts.kind = ts->kind;
   new_expr->symtree->n.sym->attr.flavor = FL_PROCEDURE;
   new_expr->symtree->n.sym->attr.function = 1;
   new_expr->symtree->n.sym->attr.elemental = 1;
-- 
cgit v1.1


From b2a8d083c5f5a0484a1a228e0db7fa7b8cf59473 Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Mon, 5 Dec 2016 00:16:15 +0000
Subject: Daily bump.

From-SVN: r243235
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index f179554..1c033b9 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20161204
+20161205
-- 
cgit v1.1


From dc7342d2911eb77f0d6a0e2e65782b7f47acb5ad Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Mon, 5 Dec 2016 09:30:57 +0000
Subject: sparc-protos.h (sparc_splitdi_legitimate): Rename to...

	* config/sparc/sparc-protos.h (sparc_splitdi_legitimate): Rename to...
	(sparc_split_reg_mem_legitimate): ...this.
	(sparc_split_reg_mem): Declare.
	(sparc_split_mem_reg): Likewise.
	(sparc_split_regreg_legitimate): Rename to...
	(sparc_split_reg_reg_legitimate): ...this.
	* config/sparc/sparc.c (sparc_splitdi_legitimate): Rename to...
	(sparc_split_reg_mem_legitimate): ...this.
	(sparc_split_reg_mem): New function.
	(sparc_split_mem_reg): Likewise.
	(sparc_split_regreg_legitimate): Rename to...
	(sparc_split_reg_reg_legitimate): ...this.
	(sparc_split_reg_reg): New function.
	* config/sparc/sparc.md (lra): Remove "none" value.
	(enabled): Adjust to above change.
	(*movdi_insn_sp32): Remove new (r,T) alternative and reorder others.
	(DImode splitters): Adjust to above renamings and use new functions.
	(*movdf_insn_sp32): Remove new (r,T) alternative and reorder others.
	(DFmode splitters): Adjust to above renamings and use new functions.
	(*mov<VM64:mode>_insn_sp64): Replace C with Z constraint and use W
	constraint in conjunction with e.
	(*mov<VM64:mode>_insn_sp32): Remove new (r,T) alternative, add (o,Y)
	alternative and reorder others.
	(VM64:mode splitters): Adjust to above renamings and use new functions.

From-SVN: r243238
---
 gcc/ChangeLog                   |  27 +++
 gcc/config/sparc/sparc-protos.h |   7 +-
 gcc/config/sparc/sparc.c        |  80 +++++++--
 gcc/config/sparc/sparc.md       | 383 +++++++++++++++-------------------------
 4 files changed, 242 insertions(+), 255 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c33ad2f..730044c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+2016-12-05  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* config/sparc/sparc-protos.h (sparc_splitdi_legitimate): Rename to...
+	(sparc_split_reg_mem_legitimate): ...this.
+	(sparc_split_reg_mem): Declare.
+	(sparc_split_mem_reg): Likewise.
+	(sparc_split_regreg_legitimate): Rename to...
+	(sparc_split_reg_reg_legitimate): ...this.
+	* config/sparc/sparc.c (sparc_splitdi_legitimate): Rename to...
+	(sparc_split_reg_mem_legitimate): ...this.
+	(sparc_split_reg_mem): New function.
+	(sparc_split_mem_reg): Likewise.
+	(sparc_split_regreg_legitimate): Rename to...
+	(sparc_split_reg_reg_legitimate): ...this.
+	(sparc_split_reg_reg): New function.
+	* config/sparc/sparc.md (lra): Remove "none" value.
+	(enabled): Adjust to above change.
+	(*movdi_insn_sp32): Remove new (r,T) alternative and reorder others.
+	(DImode splitters): Adjust to above renamings and use new functions.
+	(*movdf_insn_sp32): Remove new (r,T) alternative and reorder others.
+	(DFmode splitters): Adjust to above renamings and use new functions.
+	(*mov<VM64:mode>_insn_sp64): Replace C with Z constraint and use W
+	constraint in conjunction with e.
+	(*mov<VM64:mode>_insn_sp32): Remove new (r,T) alternative, add (o,Y)
+	alternative and reorder others.
+	(VM64:mode splitters): Adjust to above renamings and use new functions.
+
 2016-12-04  Martin Sebor  <msebor@redhat.com>
 
 	PR c/78668
diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h
index 4e23a44..931e693 100644
--- a/gcc/config/sparc/sparc-protos.h
+++ b/gcc/config/sparc/sparc-protos.h
@@ -68,8 +68,11 @@ extern void sparc_emit_call_insn (rtx, rtx);
 extern void sparc_defer_case_vector (rtx, rtx, int);
 extern bool sparc_expand_move (machine_mode, rtx *);
 extern void sparc_emit_set_symbolic_const64 (rtx, rtx, rtx);
-extern int sparc_splitdi_legitimate (rtx, rtx);
-extern int sparc_split_regreg_legitimate (rtx, rtx);
+extern int sparc_split_reg_mem_legitimate (rtx, rtx);
+extern void sparc_split_reg_mem (rtx, rtx, machine_mode);
+extern void sparc_split_mem_reg (rtx, rtx, machine_mode);
+extern int sparc_split_reg_reg_legitimate (rtx, rtx);
+extern void sparc_split_reg_reg (rtx, rtx, machine_mode);
 extern const char *output_ubranch (rtx, rtx_insn *);
 extern const char *output_cbranch (rtx, rtx, int, int, int, rtx_insn *);
 extern const char *output_return (rtx_insn *);
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index e17552a..a5537a1 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -8484,46 +8484,82 @@ order_regs_for_local_alloc (void)
 }
 
 /* Return 1 if REG and MEM are legitimate enough to allow the various
-   mem<-->reg splits to be run.  */
+   MEM<-->REG splits to be run.  */
 
 int
-sparc_splitdi_legitimate (rtx reg, rtx mem)
+sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
 {
   /* Punt if we are here by mistake.  */
   gcc_assert (reload_completed);
 
   /* We must have an offsettable memory reference.  */
-  if (! offsettable_memref_p (mem))
+  if (!offsettable_memref_p (mem))
     return 0;
 
   /* If we have legitimate args for ldd/std, we do not want
      the split to happen.  */
-  if ((REGNO (reg) % 2) == 0
-      && mem_min_alignment (mem, 8))
+  if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
     return 0;
 
   /* Success.  */
   return 1;
 }
 
-/* Like sparc_splitdi_legitimate but for REG <--> REG moves.  */
+/* Split a REG <-- MEM move into a pair of moves in MODE.  */
+
+void
+sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
+{
+  rtx high_part = gen_highpart (mode, dest);
+  rtx low_part = gen_lowpart (mode, dest);
+  rtx word0 = adjust_address (src, mode, 0);
+  rtx word1 = adjust_address (src, mode, 4);
+
+  if (reg_overlap_mentioned_p (high_part, word1))
+    {
+      emit_move_insn_1 (low_part, word1);
+      emit_move_insn_1 (high_part, word0);
+    }
+  else
+    {
+      emit_move_insn_1 (high_part, word0);
+      emit_move_insn_1 (low_part, word1);
+    }
+}
+
+/* Split a MEM <-- REG move into a pair of moves in MODE.  */
+
+void
+sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
+{
+  rtx word0 = adjust_address (dest, mode, 0);
+  rtx word1 = adjust_address (dest, mode, 4);
+  rtx high_part = gen_highpart (mode, src);
+  rtx low_part = gen_lowpart (mode, src);
+
+  emit_move_insn_1 (word0, high_part);
+  emit_move_insn_1 (word1, low_part);
+}
+
+/* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves.  */
 
 int
-sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
+sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
 {
-  int regno1, regno2;
+  /* Punt if we are here by mistake.  */
+  gcc_assert (reload_completed);
 
   if (GET_CODE (reg1) == SUBREG)
     reg1 = SUBREG_REG (reg1);
   if (GET_CODE (reg1) != REG)
     return 0;
-  regno1 = REGNO (reg1);
+  const int regno1 = REGNO (reg1);
 
   if (GET_CODE (reg2) == SUBREG)
     reg2 = SUBREG_REG (reg2);
   if (GET_CODE (reg2) != REG)
     return 0;
-  regno2 = REGNO (reg2);
+  const int regno2 = REGNO (reg2);
 
   if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
     return 1;
@@ -8538,6 +8574,30 @@ sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
   return 0;
 }
 
+/* Split a REG <--> REG move into a pair of moves in MODE.  */
+
+void
+sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
+{
+  rtx dest1 = gen_highpart (mode, dest);
+  rtx dest2 = gen_lowpart (mode, dest);
+  rtx src1 = gen_highpart (mode, src);
+  rtx src2 = gen_lowpart (mode, src);
+
+  /* Now emit using the real source and destination we found, swapping
+     the order if we detect overlap.  */
+  if (reg_overlap_mentioned_p (dest1, src2))
+    {
+      emit_move_insn_1 (dest2, src2);
+      emit_move_insn_1 (dest1, src1);
+    }
+  else
+    {
+      emit_move_insn_1 (dest1, src1);
+      emit_move_insn_1 (dest2, src2);
+    }
+}
+
 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
    This makes them candidates for using ldd and std insns.
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 000c5a3..767d508 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -254,14 +254,12 @@
 (define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4"
   (const_string "none"))
 
-(define_attr "lra" "none,disabled,enabled"
-  (const_string "none"))
+(define_attr "lra" "disabled,enabled"
+  (const_string "enabled"))
 
 (define_attr "enabled" ""
   (cond [(eq_attr "cpu_feature" "none")
-	   (cond [(eq_attr "lra" "disabled") (symbol_ref "!TARGET_LRA")
-		  (eq_attr "lra" "enabled") (symbol_ref "TARGET_LRA")]
-		 (const_int 1))
+           (cond [(eq_attr "lra" "disabled") (symbol_ref "!TARGET_LRA")] (const_int 1))
          (eq_attr "cpu_feature" "fpu") (symbol_ref "TARGET_FPU")
          (eq_attr "cpu_feature" "fpunotv9") (symbol_ref "TARGET_FPU && !TARGET_V9")
          (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9")
@@ -1707,25 +1705,23 @@
 
 (define_insn "*movdi_insn_sp32"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-			    "=T,o,T,T,U,r,o,r,r,r,?T,?*f,?*f,?o,?*e,?*e,  r,?*f,?*e,?W,b,b")
+			    "=T,o,U,T,r,o,r,r,?*f,?T,?*f,?o,?*e,?*e,  r,?*f,?*e,?W,*b,*b")
         (match_operand:DI 1 "input_operand"
-			    " J,J,U,r,T,T,r,o,i,r,*f,  T,  o,*f, *e, *e,?*f,  r,  W,*e,J,P"))]
+			    " J,J,T,U,o,r,i,r,  T,*f,  o,*f, *e, *e,?*f,  r,  W,*e, J, P"))]
   "TARGET_ARCH32
    && (register_operand (operands[0], DImode)
        || register_or_zero_operand (operands[1], DImode))"
   "@
-   stx\t%%g0, %0
+   stx\t%r1, %0
    #
-   std\t%1, %0
-   std\t%1, %0
    ldd\t%1, %0
+   std\t%1, %0
    ldd\t%1, %0
+   std\t%1, %0
    #
    #
-   #
-   #
-   std\t%1, %0
    ldd\t%1, %0
+   std\t%1, %0
    #
    #
    fmovd\t%1, %0
@@ -1736,12 +1732,12 @@
    std\t%1, %0
    fzero\t%0
    fone\t%0"
-  [(set_attr "type" "store,store,store,store,load,load,*,*,*,*,fpstore,fpload,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
-   (set_attr "length" "*,2,*,*,*,*,2,2,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
-   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
-   (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
-   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,true,true")
-   (set_attr "lra" "*,*,disabled,enabled,disabled,enabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+  [(set_attr "type" "store,*,load,store,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
+   (set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
+   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
+   (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
+   (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,true,true")
+   (set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
 
 (define_insn "*movdi_insn_sp64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e,?W,b,b")
@@ -1997,30 +1993,27 @@
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
         (match_operand:DI 1 "const_int_operand" ""))]
-  "TARGET_ARCH32
+  "reload_completed
+   && TARGET_ARCH32
    && ((GET_CODE (operands[0]) == REG
         && SPARC_INT_REG_P (REGNO (operands[0])))
        || (GET_CODE (operands[0]) == SUBREG
            && GET_CODE (SUBREG_REG (operands[0])) == REG
-           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))
-   && reload_completed"
+           && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
   [(clobber (const_int 0))]
 {
-  HOST_WIDE_INT low, high;
+  HOST_WIDE_INT low = trunc_int_for_mode (INTVAL (operands[1]), SImode);
+  HOST_WIDE_INT high = trunc_int_for_mode (INTVAL (operands[1]) >> 32, SImode);
+  rtx high_part = gen_highpart (SImode, operands[0]);
+  rtx low_part = gen_lowpart (SImode, operands[0]);
 
-  low = trunc_int_for_mode (INTVAL (operands[1]), SImode);
-  high = trunc_int_for_mode (INTVAL (operands[1]) >> 32, SImode);
-  emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), GEN_INT (high)));
+  emit_move_insn_1 (high_part, GEN_INT (high));
 
-  /* Slick... but this trick loses if this subreg constant part
-     can be done in one insn.  */
-  if (low == high
-      && !SPARC_SETHI32_P (high)
-      && !SPARC_SIMM13_P (high))
-    emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
-			  gen_highpart (SImode, operands[0])));
+  /* Slick... but this loses if the constant can be done in one insn.  */
+  if (low == high && !SPARC_SETHI32_P (high) && !SPARC_SIMM13_P (high))
+    emit_move_insn_1 (low_part, high_part);
   else
-    emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), GEN_INT (low)));
+    emit_move_insn_1 (low_part, GEN_INT (low));
 
   DONE;
 })
@@ -2031,31 +2024,10 @@
   "reload_completed
    && (!TARGET_V9
        || (TARGET_ARCH32
-           && sparc_split_regreg_legitimate (operands[0], operands[1])))"
+           && sparc_split_reg_reg_legitimate (operands[0], operands[1])))"
   [(clobber (const_int 0))]
 {
-  rtx set_dest = operands[0];
-  rtx set_src = operands[1];
-  rtx dest1, dest2;
-  rtx src1, src2;
-
-  dest1 = gen_highpart (SImode, set_dest);
-  dest2 = gen_lowpart (SImode, set_dest);
-  src1 = gen_highpart (SImode, set_src);
-  src2 = gen_lowpart (SImode, set_src);
-
-  /* Now emit using the real source and destination we found, swapping
-     the order if we detect overlap.  */
-  if (reg_overlap_mentioned_p (dest1, src2))
-    {
-      emit_insn (gen_movsi (dest2, src2));
-      emit_insn (gen_movsi (dest1, src1));
-    }
-  else
-    {
-      emit_insn (gen_movsi (dest1, src1));
-      emit_insn (gen_movsi (dest2, src2));
-    }
+  sparc_split_reg_reg (operands[0], operands[1], SImode);
   DONE;
 })
 
@@ -2064,41 +2036,24 @@
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
         (match_operand:DI 1 "memory_operand" ""))]
-  "(TARGET_ARCH32
-    && reload_completed
-    && sparc_splitdi_legitimate (operands[0], operands[1]))"
+  "reload_completed
+   && TARGET_ARCH32
+   && sparc_split_reg_mem_legitimate (operands[0], operands[1])"
   [(clobber (const_int 0))]
 {
-  rtx word0 = adjust_address (operands[1], SImode, 0);
-  rtx word1 = adjust_address (operands[1], SImode, 4);
-  rtx high_part = gen_highpart (SImode, operands[0]);
-  rtx low_part = gen_lowpart (SImode, operands[0]);
-
-  if (reg_overlap_mentioned_p (high_part, word1))
-    {
-      emit_insn (gen_movsi (low_part, word1));
-      emit_insn (gen_movsi (high_part, word0));
-    }
-  else
-    {
-      emit_insn (gen_movsi (high_part, word0));
-      emit_insn (gen_movsi (low_part, word1));
-    }
+  sparc_split_reg_mem (operands[0], operands[1], SImode);
   DONE;
 })
 
 (define_split
   [(set (match_operand:DI 0 "memory_operand" "")
         (match_operand:DI 1 "register_operand" ""))]
-  "(TARGET_ARCH32
-    && reload_completed
-    && sparc_splitdi_legitimate (operands[1], operands[0]))"
+  "reload_completed
+   && TARGET_ARCH32
+   && sparc_split_reg_mem_legitimate (operands[1], operands[0])"
   [(clobber (const_int 0))]
 {
-  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0),
-			gen_highpart (SImode, operands[1])));
-  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4),
-			gen_lowpart (SImode, operands[1])));
+  sparc_split_mem_reg (operands[0], operands[1], SImode);
   DONE;
 })
 
@@ -2112,8 +2067,8 @@
    && offsettable_memref_p (operands[0])"
   [(clobber (const_int 0))]
 {
-  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 0), const0_rtx));
-  emit_insn (gen_movsi (adjust_address (operands[0], SImode, 4), const0_rtx));
+  emit_move_insn_1 (adjust_address (operands[0], SImode, 0), const0_rtx);
+  emit_move_insn_1 (adjust_address (operands[0], SImode, 4), const0_rtx);
   DONE;
 })
 
@@ -2381,13 +2336,15 @@
 
 (define_insn "*movdf_insn_sp32"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-			    "=b,b,e,e,*r, f,  e,T,W,U,r,T,T,  f,  *r,  o,o")
+			    "=T,o,b,b,e,e,*r, f,  e,W,U,T,  f,o, *r,*r, o")
 	(match_operand:DF 1 "input_operand"
-			    " G,C,e,e, f,*r,W#F,G,e,T,T,U,r,o#F,*roF,*rG,f"))]
+			    " G,G,G,C,e,e, f,*r,W#F,e,T,U,o#F,f,*rF, o,*r"))]
   "TARGET_ARCH32
    && (register_operand (operands[0], DFmode)
        || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
   "@
+  stx\t%r1, %0
+  #
   fzero\t%0
   fone\t%0
   fmovd\t%1, %0
@@ -2395,22 +2352,20 @@
   #
   #
   ldd\t%1, %0
-  stx\t%r1, %0
   std\t%1, %0
   ldd\t%1, %0
-  ldd\t%1, %0
-  std\t%1, %0
   std\t%1, %0
   #
   #
   #
-  #"
-  [(set_attr "type" "visl,visl,fpmove,*,*,*,fpload,store,fpstore,load,load,store,store,*,*,*,*")
-   (set_attr "length" "*,*,*,2,2,2,*,*,*,*,*,*,*,2,2,2,2")
-   (set_attr "fptype" "double,double,double,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
-   (set_attr "cpu_feature" "vis,vis,v9,fpunotv9,vis3,vis3,fpu,v9,fpu,*,*,*,*,fpu,*,*,fpu")
-   (set_attr "v3pipe" "true,true,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
-   (set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,enabled,disabled,enabled,*,*,*,*")])
+  ldd\t%1, %0
+  std\t%1, %0"
+  [(set_attr "type" "store,*,visl,visl,fpmove,*,*,*,fpload,fpstore,load,store,*,*,*,load,store")
+   (set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
+   (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
+   (set_attr "v3pipe" "*,*,true,true,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
 
 (define_insn "*movdf_insn_sp64"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,  e,W, *r,*r,  m,*r")
@@ -2440,44 +2395,38 @@
 (define_split
   [(set (match_operand:DF 0 "register_operand" "")
         (match_operand:DF 1 "const_double_operand" ""))]
-  "REG_P (operands[0])
+  "reload_completed
+   && REG_P (operands[0])
    && SPARC_INT_REG_P (REGNO (operands[0]))
-   && !const_zero_operand (operands[1], GET_MODE (operands[0]))
-   && reload_completed"
+   && !const_zero_operand (operands[1], GET_MODE (operands[0]))"
   [(clobber (const_int 0))]
 {
   operands[0] = gen_raw_REG (DImode, REGNO (operands[0]));
 
   if (TARGET_ARCH64)
     {
-      machine_mode mode = GET_MODE (operands[1]);
-      rtx tem = simplify_subreg (DImode, operands[1], mode, 0);
+      rtx tem = simplify_subreg (DImode, operands[1], DFmode, 0);
       emit_insn (gen_movdi (operands[0], tem));
     }
   else
     {
-      machine_mode mode = GET_MODE (operands[1]);
-      rtx hi = simplify_subreg (SImode, operands[1], mode, 0);
-      rtx lo = simplify_subreg (SImode, operands[1], mode, 4);
+      rtx hi = simplify_subreg (SImode, operands[1], DFmode, 0);
+      rtx lo = simplify_subreg (SImode, operands[1], DFmode, 4);
+      rtx high_part = gen_highpart (SImode, operands[0]);
+      rtx low_part = gen_lowpart (SImode, operands[0]);
 
       gcc_assert (GET_CODE (hi) == CONST_INT);
       gcc_assert (GET_CODE (lo) == CONST_INT);
 
-      emit_insn (gen_movsi (gen_highpart (SImode, operands[0]), hi));
+      emit_move_insn_1 (high_part, hi);
 
-      /* Slick... but this trick loses if this subreg constant part
-         can be done in one insn.  */
+      /* Slick... but this loses if the constant can be done in one insn.  */
       if (lo == hi
 	  && !SPARC_SETHI32_P (INTVAL (hi))
 	  && !SPARC_SIMM13_P (INTVAL (hi)))
-        {
-          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]),
-			        gen_highpart (SImode, operands[0])));
-        }
+	emit_move_insn_1 (low_part, high_part);
       else
-        {
-          emit_insn (gen_movsi (gen_lowpart (SImode, operands[0]), lo));
-        }
+	emit_move_insn_1 (low_part, lo);
     }
   DONE;
 })
@@ -2489,35 +2438,31 @@
 ;; register DFmode cases must be handled.
 (define_split
   [(set (match_operand:DF 0 "register_operand" "")
-        (match_operand:DF 1 "register_operand" ""))]
-  "(!TARGET_V9
-    || (TARGET_ARCH32
-        && sparc_split_regreg_legitimate (operands[0], operands[1])))
-   && reload_completed"
+        (match_operand:DF 1 "const_zero_operand" ""))]
+  "reload_completed
+   && TARGET_ARCH32
+   && ((GET_CODE (operands[0]) == REG
+	&& SPARC_INT_REG_P (REGNO (operands[0])))
+       || (GET_CODE (operands[0]) == SUBREG
+	   && GET_CODE (SUBREG_REG (operands[0])) == REG
+	   && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
   [(clobber (const_int 0))]
 {
-  rtx set_dest = operands[0];
-  rtx set_src = operands[1];
-  rtx dest1, dest2;
-  rtx src1, src2;
-
-  dest1 = gen_highpart (SFmode, set_dest);
-  dest2 = gen_lowpart (SFmode, set_dest);
-  src1 = gen_highpart (SFmode, set_src);
-  src2 = gen_lowpart (SFmode, set_src);
+  emit_move_insn_1 (gen_highpart (SFmode, operands[0]), CONST0_RTX (SFmode));
+  emit_move_insn_1 (gen_lowpart (SFmode, operands[0]), CONST0_RTX (SFmode));
+  DONE;
+})
 
-  /* Now emit using the real source and destination we found, swapping
-     the order if we detect overlap.  */
-  if (reg_overlap_mentioned_p (dest1, src2))
-    {
-      emit_move_insn_1 (dest2, src2);
-      emit_move_insn_1 (dest1, src1);
-    }
-  else
-    {
-      emit_move_insn_1 (dest1, src1);
-      emit_move_insn_1 (dest2, src2);
-    }
+(define_split
+  [(set (match_operand:DF 0 "register_operand" "")
+        (match_operand:DF 1 "register_operand" ""))]
+  "reload_completed
+   && (!TARGET_V9
+       || (TARGET_ARCH32
+	   && sparc_split_reg_reg_legitimate (operands[0], operands[1])))"
+  [(clobber (const_int 0))]
+{
+  sparc_split_reg_reg (operands[0], operands[1], SFmode);
   DONE;
 })
 
@@ -2526,26 +2471,10 @@
 	(match_operand:DF 1 "memory_operand" ""))]
   "reload_completed
    && TARGET_ARCH32
-   && (((REGNO (operands[0]) % 2) != 0)
-       || !mem_min_alignment (operands[1], 8))
-   && offsettable_memref_p (operands[1])"
+   && sparc_split_reg_mem_legitimate (operands[0], operands[1])"
   [(clobber (const_int 0))]
 {
-  rtx word0, word1;
-
-  word0 = adjust_address (operands[1], SFmode, 0);
-  word1 = adjust_address (operands[1], SFmode, 4);
-
-  if (reg_overlap_mentioned_p (gen_highpart (SFmode, operands[0]), word1))
-    {
-      emit_move_insn_1 (gen_lowpart (SFmode, operands[0]), word1);
-      emit_move_insn_1 (gen_highpart (SFmode, operands[0]), word0);
-    }
-  else
-    {
-      emit_move_insn_1 (gen_highpart (SFmode, operands[0]), word0);
-      emit_move_insn_1 (gen_lowpart (SFmode, operands[0]), word1);
-    }
+  sparc_split_reg_mem (operands[0], operands[1], SFmode);
   DONE;
 })
 
@@ -2554,18 +2483,10 @@
 	(match_operand:DF 1 "register_operand" ""))]
   "reload_completed
    && TARGET_ARCH32
-   && (((REGNO (operands[1]) % 2) != 0)
-       || !mem_min_alignment (operands[0], 8))
-   && offsettable_memref_p (operands[0])"
+   && sparc_split_reg_mem_legitimate (operands[1], operands[0])"
   [(clobber (const_int 0))]
 {
-  rtx word0, word1;
-
-  word0 = adjust_address (operands[0], SFmode, 0);
-  word1 = adjust_address (operands[0], SFmode, 4);
-
-  emit_move_insn_1 (word0, gen_highpart (SFmode, operands[1]));
-  emit_move_insn_1 (word1, gen_lowpart (SFmode, operands[1]));
+  sparc_split_mem_reg (operands[0], operands[1], SFmode);
   DONE;
 })
 
@@ -2579,35 +2500,8 @@
    && offsettable_memref_p (operands[0])"
   [(clobber (const_int 0))]
 {
-  rtx dest1, dest2;
-
-  dest1 = adjust_address (operands[0], SFmode, 0);
-  dest2 = adjust_address (operands[0], SFmode, 4);
-
-  emit_move_insn_1 (dest1, CONST0_RTX (SFmode));
-  emit_move_insn_1 (dest2, CONST0_RTX (SFmode));
-  DONE;
-})
-
-(define_split
-  [(set (match_operand:DF 0 "register_operand" "")
-        (match_operand:DF 1 "const_zero_operand" ""))]
-  "reload_completed
-   && TARGET_ARCH32
-   && ((GET_CODE (operands[0]) == REG
-	&& SPARC_INT_REG_P (REGNO (operands[0])))
-       || (GET_CODE (operands[0]) == SUBREG
-	   && GET_CODE (SUBREG_REG (operands[0])) == REG
-	   && SPARC_INT_REG_P (REGNO (SUBREG_REG (operands[0])))))"
-  [(clobber (const_int 0))]
-{
-  rtx set_dest = operands[0];
-  rtx dest1, dest2;
-
-  dest1 = gen_highpart (SFmode, set_dest);
-  dest2 = gen_lowpart (SFmode, set_dest);
-  emit_move_insn_1 (dest1, CONST0_RTX (SFmode));
-  emit_move_insn_1 (dest2, CONST0_RTX (SFmode));
+  emit_move_insn_1 (adjust_address (operands[0], SFmode, 0), CONST0_RTX (SFmode));
+  emit_move_insn_1 (adjust_address (operands[0], SFmode, 4), CONST0_RTX (SFmode));
   DONE;
 })
 
@@ -8625,8 +8519,8 @@
    (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,true,true")])
 
 (define_insn "*mov<VM64:mode>_insn_sp64"
-  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,m,m,*r, m,*r, e,*r")
-	(match_operand:VM64 1 "input_operand"         "Y,C,e,m,e,Y, m,*r, e,*r,*r"))]
+  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,W,m,*r, m,*r, e,*r")
+	(match_operand:VM64 1 "input_operand"         "Y,Z,e,W,e,Y, m,*r, e,*r,*r"))]
   "TARGET_VIS
    && TARGET_ARCH64
    && (register_operand (operands[0], <VM64:MODE>mode)
@@ -8648,13 +8542,17 @@
    (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*")])
 
 (define_insn "*mov<VM64:mode>_insn_sp32"
-  [(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,*r, f,e,m,m,U,r,T,T, o,*r")
-	(match_operand:VM64 1 "input_operand"         "Y,C,e, f,*r,m,e,Y,T,T,U,r,*r,*r"))]
+  [(set (match_operand:VM64 0 "nonimmediate_operand"
+			      "=T,o,e,e,e,*r, f,e,W,U,T,e,o,*r,*r, o")
+	(match_operand:VM64 1 "input_operand"
+			      " Y,Y,Y,Z,e, f,*r,W,e,T,U,o,e,*r, o,*r"))]
   "TARGET_VIS
    && TARGET_ARCH32
    && (register_operand (operands[0], <VM64:MODE>mode)
        || register_or_zero_or_all_ones_operand (operands[1], <VM64:MODE>mode))"
   "@
+  stx\t%r1, %0
+  #
   fzero\t%0
   fone\t%0
   fsrc2\t%1, %0
@@ -8662,71 +8560,70 @@
   #
   ldd\t%1, %0
   std\t%1, %0
-  stx\t%r1, %0
-  ldd\t%1, %0
   ldd\t%1, %0
   std\t%1, %0
-  std\t%1, %0
   #
-  #"
-  [(set_attr "type" "visl,visl,vismv,*,*,fpload,fpstore,store,load,load,store,store,*,*")
-   (set_attr "length" "*,*,*,2,2,*,*,*,*,*,*,*,2,2")
-   (set_attr "cpu_feature" "vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
-   (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*,*,*,*")
-   (set_attr "lra" "*,*,*,*,*,*,*,*,disabled,enabled,disabled,enabled,*,*")])
+  #
+  #
+  ldd\t%1, %0
+  std\t%1, %0"
+  [(set_attr "type" "store,*,visl,visl,vismv,*,*,fpload,fpstore,load,store,*,*,*,load,store")
+   (set_attr "length" "*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
+   (set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
+   (set_attr "v3pipe" "*,*,true,true,true,*,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
 
 (define_split
-  [(set (match_operand:VM64 0 "memory_operand" "")
+  [(set (match_operand:VM64 0 "register_operand" "")
         (match_operand:VM64 1 "register_operand" ""))]
   "reload_completed
    && TARGET_VIS
    && TARGET_ARCH32
-   && (((REGNO (operands[1]) % 2) != 0)
-       || !mem_min_alignment (operands[0], 8))
-   && offsettable_memref_p (operands[0])"
+   && sparc_split_reg_reg_legitimate (operands[0], operands[1])"
   [(clobber (const_int 0))]
 {
-  rtx word0, word1;
-
-  word0 = adjust_address (operands[0], SImode, 0);
-  word1 = adjust_address (operands[0], SImode, 4);
-
-  emit_move_insn_1 (word0, gen_highpart (SImode, operands[1]));
-  emit_move_insn_1 (word1, gen_lowpart (SImode, operands[1]));
+  sparc_split_reg_reg (operands[0], operands[1], SImode);
   DONE;
 })
 
 (define_split
   [(set (match_operand:VM64 0 "register_operand" "")
-        (match_operand:VM64 1 "register_operand" ""))]
+        (match_operand:VM64 1 "memory_operand" ""))]
   "reload_completed
    && TARGET_VIS
    && TARGET_ARCH32
-   && sparc_split_regreg_legitimate (operands[0], operands[1])"
+   && sparc_split_reg_mem_legitimate (operands[0], operands[1])"
   [(clobber (const_int 0))]
 {
-  rtx set_dest = operands[0];
-  rtx set_src = operands[1];
-  rtx dest1, dest2;
-  rtx src1, src2;
+  sparc_split_reg_mem (operands[0], operands[1], SImode);
+  DONE;
+})
 
-  dest1 = gen_highpart (SImode, set_dest);
-  dest2 = gen_lowpart (SImode, set_dest);
-  src1 = gen_highpart (SImode, set_src);
-  src2 = gen_lowpart (SImode, set_src);
+(define_split
+  [(set (match_operand:VM64 0 "memory_operand" "")
+        (match_operand:VM64 1 "register_operand" ""))]
+  "reload_completed
+   && TARGET_VIS
+   && TARGET_ARCH32
+   && sparc_split_reg_mem_legitimate (operands[1], operands[0])"
+  [(clobber (const_int 0))]
+{
+  sparc_split_mem_reg (operands[0], operands[1], SImode);
+  DONE;
+})
 
-  /* Now emit using the real source and destination we found, swapping
-     the order if we detect overlap.  */
-  if (reg_overlap_mentioned_p (dest1, src2))
-    {
-      emit_insn (gen_movsi (dest2, src2));
-      emit_insn (gen_movsi (dest1, src1));
-    }
-  else
-    {
-      emit_insn (gen_movsi (dest1, src1));
-      emit_insn (gen_movsi (dest2, src2));
-    }
+(define_split
+  [(set (match_operand:VM64 0 "memory_operand" "")
+        (match_operand:VM64 1 "const_zero_operand" ""))]
+  "reload_completed
+   && TARGET_VIS
+   && TARGET_ARCH32
+   && !mem_min_alignment (operands[0], 8)
+   && offsettable_memref_p (operands[0])"
+  [(clobber (const_int 0))]
+{
+  emit_move_insn_1 (adjust_address (operands[0], SImode, 0), const0_rtx);
+  emit_move_insn_1 (adjust_address (operands[0], SImode, 4), const0_rtx);
   DONE;
 })
 
-- 
cgit v1.1


From 648e17d28df124d8eac937babae804262d605499 Mon Sep 17 00:00:00 2001
From: James Greenhalgh <james.greenhalgh@arm.com>
Date: Mon, 5 Dec 2016 09:35:28 +0000
Subject: [Patch 2/2 PR78561] Recalculate constant pool size before emitting it

gcc/testsuite/

	PR rtl-optimization/78561
	* gcc.target/aarch64/pr78561.c: Add missing testcase from r243183.

From-SVN: r243239
---
 gcc/testsuite/ChangeLog                    | 5 +++++
 gcc/testsuite/gcc.target/aarch64/pr78561.c | 9 +++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr78561.c

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 838fca5..2fe40d0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-04  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR rtl-optimization/78561
+	* gcc.target/aarch64/pr78561.c: Add missing testcase from r243183.
+
 2016-12-04  Martin Sebor  <msebor@redhat.com>
 
 	PR c/78668
diff --git a/gcc/testsuite/gcc.target/aarch64/pr78561.c b/gcc/testsuite/gcc.target/aarch64/pr78561.c
new file mode 100644
index 0000000..048d2d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr78561.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-Og -O3 -mcmodel=tiny" } */
+
+int
+main (__fp16 x)
+{
+  __fp16 a = 6.5504e4;
+  return (x <= a);
+}
-- 
cgit v1.1


From 5ffd5f36962aa5d2984f7453b553ef8076bd2e84 Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Mon, 5 Dec 2016 09:44:24 +0000
Subject: Fix arm-netbsdelf bootstrap.

    2016-12-025  Andre Vieira  <andre.simoesdiasvieira@arm.com>

	* config/arm/arm.c (TARGET_ASM_INIT_SECTIONS): Fix wrong undef
	location.

From-SVN: r243240
---
 gcc/ChangeLog        | 5 +++++
 gcc/config/arm/arm.c | 3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 730044c..5d66a96 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-025  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+
+	* config/arm/arm.c (TARGET_ASM_INIT_SECTIONS): Fix wrong undef
+	location.
+
 2016-12-05  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* config/sparc/sparc-protos.h (sparc_splitdi_legitimate): Rename to...
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index ec1f5fc..437da6f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -595,8 +595,9 @@ static const struct attribute_spec arm_attribute_table[] =
 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
 
-#undef TARGET_ASM_INIT_SECTIONS
 #endif /* ARM_UNWIND_INFO */
+
+#undef TARGET_ASM_INIT_SECTIONS
 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
 
 #undef TARGET_DWARF_REGISTER_SPAN
-- 
cgit v1.1


From aad6838ec786662b6ffb28c494564aba29ad1a4e Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Mon, 5 Dec 2016 11:15:17 +0000
Subject: system-darwin-ppc.ads (Support_Atomic_Primitives): Set to True only
 if the word size is 64.

	* system-darwin-ppc.ads (Support_Atomic_Primitives): Set to True only
	if the word size is 64.

From-SVN: r243243
---
 gcc/ada/ChangeLog             | 5 +++++
 gcc/ada/system-darwin-ppc.ads | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog
index 3f221d2..1a9f161 100644
--- a/gcc/ada/ChangeLog
+++ b/gcc/ada/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-05  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* system-darwin-ppc.ads (Support_Atomic_Primitives): Set to True only
+	if the word size is 64.
+
 2016-11-30  Gary Dismukes  <dismukes@adacore.com>
 
 	* sem_prag.adb, sem_ch6.adb: Minor reformatting and typo fixes.
diff --git a/gcc/ada/system-darwin-ppc.ads b/gcc/ada/system-darwin-ppc.ads
index b58cfed..7809e14 100644
--- a/gcc/ada/system-darwin-ppc.ads
+++ b/gcc/ada/system-darwin-ppc.ads
@@ -161,7 +161,7 @@ private
    Stack_Check_Probes        : constant Boolean := False;
    Stack_Check_Limits        : constant Boolean := False;
    Support_Aggregates        : constant Boolean := True;
-   Support_Atomic_Primitives : constant Boolean := True;
+   Support_Atomic_Primitives : constant Boolean := Word_Size = 64;
    Support_Composite_Assign  : constant Boolean := True;
    Support_Composite_Compare : constant Boolean := True;
    Support_Long_Shifts       : constant Boolean := True;
-- 
cgit v1.1


From 5a5c5784d89008664ab42c17efcab7198b132456 Mon Sep 17 00:00:00 2001
From: Claudiu Zissulescu <claziss@synopsys.com>
Date: Mon, 5 Dec 2016 12:16:38 +0100
Subject: [ARC] Remove unused patterns, refactor unspec+offset pattern gen.

2016-12-05  Claudiu Zissulescu  <claziss@synopsys.com>

	* config/arc/arc-protos.h (insn_is_tls_gd_dispatch): Remove.
	* config/arc/arc.c (arc_unspec_offset): New function.
	(arc_finalize_pic): Change.
	(arc_emit_call_tls_get_addr): Likewise.
	(arc_legitimize_tls_address): Likewise.
	(arc_legitimize_pic_address): Likewise.
	(insn_is_tls_gd_dispatch): Remove.
	* config/arc/arc.h (INSN_REFERENCES_ARE_DELAYED): Change.
	* config/arc/arc.md (ls_gd_load): Remove unused pattern.
	(tls_gd_dispatch): Likewise.

From-SVN: r243244
---
 gcc/ChangeLog               | 13 +++++++++++++
 gcc/config/arc/arc-protos.h |  1 -
 gcc/config/arc/arc.c        | 41 ++++++++++++++++++-----------------------
 gcc/config/arc/arc.h        |  2 +-
 gcc/config/arc/arc.md       | 34 ----------------------------------
 5 files changed, 32 insertions(+), 59 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5d66a96..c5095c2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,16 @@
+2016-12-05  Claudiu Zissulescu  <claziss@synopsys.com>
+
+	* config/arc/arc-protos.h (insn_is_tls_gd_dispatch): Remove.
+	* config/arc/arc.c (arc_unspec_offset): New function.
+	(arc_finalize_pic): Change.
+	(arc_emit_call_tls_get_addr): Likewise.
+	(arc_legitimize_tls_address): Likewise.
+	(arc_legitimize_pic_address): Likewise.
+	(insn_is_tls_gd_dispatch): Remove.
+	* config/arc/arc.h (INSN_REFERENCES_ARE_DELAYED): Change.
+	* config/arc/arc.md (ls_gd_load): Remove unused pattern.
+	(tls_gd_dispatch): Likewise.
+
 2016-12-025  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 
 	* config/arm/arm.c (TARGET_ASM_INIT_SECTIONS): Fix wrong undef
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 83a0b73..bdbf7ce 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -121,6 +121,5 @@ extern int regno_clobbered_p (unsigned int, rtx_insn *, machine_mode, int);
 extern int arc_return_slot_offset (void);
 extern bool arc_legitimize_reload_address (rtx *, machine_mode, int, int);
 extern void arc_secondary_reload_conv (rtx, rtx, rtx, bool);
-extern bool insn_is_tls_gd_dispatch (rtx_insn *);
 extern void arc_cpu_cpp_builtins (cpp_reader *);
 extern bool arc_store_addr_hazard_p (rtx_insn *, rtx_insn *);
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 832f567..a0aa16e 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -2771,6 +2771,15 @@ arc_return_slot_offset ()
 
 /* PIC */
 
+/* Helper to generate unspec constant.  */
+
+static rtx
+arc_unspec_offset (rtx loc, int unspec)
+{
+  return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, loc),
+					       unspec));
+}
+
 /* Emit special PIC prologues and epilogues.  */
 /* If the function has any GOTOFF relocations, then the GOTBASE
    register has to be setup in the prologue
@@ -2796,9 +2805,7 @@ arc_finalize_pic (void)
   gcc_assert (flag_pic != 0);
 
   pat = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
-  pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pat), ARC_UNSPEC_GOT);
-  pat = gen_rtx_CONST (Pmode, pat);
-
+  pat = arc_unspec_offset (pat, ARC_UNSPEC_GOT);
   pat = gen_rtx_SET (baseptr_rtx, pat);
 
   emit_insn (pat);
@@ -4866,8 +4873,7 @@ arc_emit_call_tls_get_addr (rtx sym, int reloc, rtx eqv)
 
   start_sequence ();
 
-  rtx x = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), reloc);
-  x = gen_rtx_CONST (Pmode, x);
+  rtx x = arc_unspec_offset (sym, reloc);
   emit_move_insn (r0, x);
   use_reg (&call_fusage, r0);
 
@@ -4923,17 +4929,18 @@ arc_legitimize_tls_address (rtx addr, enum tls_model model)
       addr = gen_rtx_CONST (Pmode, addr);
       base = arc_legitimize_tls_address (base, TLS_MODEL_GLOBAL_DYNAMIC);
       return gen_rtx_PLUS (Pmode, force_reg (Pmode, base), addr);
+
     case TLS_MODEL_GLOBAL_DYNAMIC:
       return arc_emit_call_tls_get_addr (addr, UNSPEC_TLS_GD, addr);
+
     case TLS_MODEL_INITIAL_EXEC:
-      addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLS_IE);
-      addr = gen_rtx_CONST (Pmode, addr);
+      addr = arc_unspec_offset (addr, UNSPEC_TLS_IE);
       addr = copy_to_mode_reg (Pmode, gen_const_mem (Pmode, addr));
       return gen_rtx_PLUS (Pmode, arc_get_tp (), addr);
+
     case TLS_MODEL_LOCAL_EXEC:
     local_exec:
-      addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLS_OFF);
-      addr = gen_rtx_CONST (Pmode, addr);
+      addr = arc_unspec_offset (addr, UNSPEC_TLS_OFF);
       return gen_rtx_PLUS (Pmode, arc_get_tp (), addr);
     default:
       gcc_unreachable ();
@@ -4964,14 +4971,11 @@ arc_legitimize_pic_address (rtx orig, rtx oldx)
       else if (!flag_pic)
 	return orig;
       else if (CONSTANT_POOL_ADDRESS_P (addr) || SYMBOL_REF_LOCAL_P (addr))
-	return gen_rtx_CONST (Pmode,
-			      gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
-			      ARC_UNSPEC_GOTOFFPC));
+	return arc_unspec_offset (addr, ARC_UNSPEC_GOTOFFPC);
 
       /* This symbol must be referenced via a load from the Global
 	 Offset Table (@GOTPC).  */
-      pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
-      pat = gen_rtx_CONST (Pmode, pat);
+      pat = arc_unspec_offset (addr, ARC_UNSPEC_GOT);
       pat = gen_const_mem (Pmode, pat);
 
       if (oldx == NULL)
@@ -9985,15 +9989,6 @@ arc_dwarf_register_span (rtx rtl)
    return p;
 }
 
-/* We can't inline this in INSN_REFERENCES_ARE_DELAYED because
-   resource.h doesn't include the required header files.  */
-
-bool
-insn_is_tls_gd_dispatch (rtx_insn *insn)
-{
-  return recog_memoized (insn) == CODE_FOR_tls_gd_dispatch;
-}
-
 /* Return true if OP is an acceptable memory operand for ARCompact
    16-bit load instructions of MODE.
 
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 611ef54..64bd9e0 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -1612,7 +1612,7 @@ extern enum arc_function_type arc_compute_function_type (struct function *);
    && (get_attr_type (X) == TYPE_CALL || get_attr_type (X) == TYPE_SFUNC))
 
 #define INSN_REFERENCES_ARE_DELAYED(insn)				\
-  (INSN_SETS_ARE_DELAYED (insn) && !insn_is_tls_gd_dispatch (insn))
+  (INSN_SETS_ARE_DELAYED (insn))
 
 #define CALL_ATTR(X, NAME) \
   ((CALL_P (X) || NONJUMP_INSN_P (X)) \
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 925fcd6..18bc68f 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -5486,21 +5486,6 @@
   [(set_attr "is_sfunc" "yes")
    (set_attr "predicable" "yes")])
 
-(define_insn "tls_gd_load"
-  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,c")
-	(unspec:SI [(match_operand:SI 1 "register_operand" "Rcq#q,c")
-		    (match_operand:SI 2 "symbolic_operand" "X,X")]
-	 UNSPEC_TLS_GD))]
-  ""
-  ".tls_gd_ld %2`ld%? %0,[%1]"
-  [(set_attr "type" "load")
-   ; if the linker has to patch this into IE, we need a long insn
-   ; (FIXME: or two short insn, ld_s / jl_s.  missing -Os optimization.)
-   (set_attr_alternative "iscompact"
-     [(cond [(ne (symbol_ref "arc_tp_regno == 30") (const_int 0))
-	     (const_string "*")] (const_string "maybe"))
-      (const_string "*")])])
-
 (define_insn "tls_gd_get_addr"
   [(set (reg:SI R0_REG)
 	(call:SI (mem:SI (unspec:SI [(match_operand:SI 0
@@ -5514,25 +5499,6 @@
    ; With TARGET_MEDIUM_CALLS, plt calls are not predicable.
    (set_attr "predicable" "no")])
 
-; We make this call specific to the tls symbol to avoid commoning this
-; with calls for other symbols; we want the linker to be able to
-(define_insn "tls_gd_dispatch"
-  [(set (reg:SI R0_REG)
-	(unspec:SI
-	  [(reg:SI R0_REG)
-	   (call (mem:SI (match_operand:SI 0 "register_operand" "Rcq,q,c"))
-		 (const_int 0))
-	   (match_operand:SI 1 "symbolic_operand" "X,X,X")]
-	 UNSPEC_TLS_GD))
-   (clobber (reg:SI RETURN_ADDR_REGNUM))
-   (clobber (reg:DI R10_REG))
-   (clobber (reg:SI R12_REG))]
-  ""
-  ".tls_gd_call %1`jl%!%* [%0]"
-  [(set_attr "type" "call")
-   (set_attr "iscompact" "maybe,false,*")
-   (set_attr "predicable" "no,no,yes")])
-
 ;; For thread pointer builtins
 (define_expand "get_thread_pointersi"
   [(set (match_operand:SI 0 "register_operand") (match_dup 1))]
-- 
cgit v1.1


From 62440b4f0e4d6e0dc193d315e79753bb4c5edd99 Mon Sep 17 00:00:00 2001
From: Cupertino Miranda <cmiranda@synopsys.com>
Date: Mon, 5 Dec 2016 11:16:52 +0000
Subject: [ARC] Fix PIE.

gcc/
2016-12-05  Cupertino Miranda  <cmiranda@synopsys.com>

	* config/arc/arc.h (STARTFILE_SPEC): Use default linux specs.
	(ENDFILE_SPEC): Likewise.

libgcc/
2016-12-05  Cupertino Miranda  <cmiranda@synopsys.com>

	* config.host (arc*-*-linux-uclibc*): Use default extra
	objects. Include linux-android header.
	* config/arc/crti.S (_init): Declare symbol as function.
	(_fini): Likewise.

From-SVN: r243245
---
 gcc/ChangeLog            |  5 +++++
 gcc/config.gcc           |  2 +-
 gcc/config/arc/arc.h     | 10 ++++------
 libgcc/ChangeLog         |  7 +++++++
 libgcc/config.host       |  4 ++--
 libgcc/config/arc/crti.S |  2 ++
 6 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c5095c2..ad903f9 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-05  Cupertino Miranda  <cmiranda@synopsys.com>
+
+	* config/arc/arc.h (STARTFILE_SPEC): Use default linux specs.
+	(ENDFILE_SPEC): Likewise.
+
 2016-12-05  Claudiu Zissulescu  <claziss@synopsys.com>
 
 	* config/arc/arc-protos.h (insn_is_tls_gd_dispatch): Remove.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 189073e..e034bc3 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1023,7 +1023,7 @@ arc*-*-elf*)
 	;;
 arc*-*-linux-uclibc*)
 	extra_headers="arc-simd.h"
-	tm_file="arc/arc-arch.h dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file}"
+	tm_file="arc/arc-arch.h dbxelf.h elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
 	tmake_file="${tmake_file} arc/t-uClibc arc/t-arc"
 	tm_defines="${tm_defines} TARGET_SDATA_DEFAULT=0"
 	tm_defines="${tm_defines} TARGET_MMEDIUM_CALLS_DEFAULT=1"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 64bd9e0..f9512c4 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -138,17 +138,15 @@ extern const char *arc_cpu_to_as (int argc, const char **argv);
 #define STARTFILE_SPEC "%{!shared:crt0.o%s} crti%O%s %{pg|p:crtg.o%s} " \
   "%(arc_tls_extra_start_spec) crtbegin.o%s"
 #else
-#define STARTFILE_SPEC   "%{!shared:%{!mkernel:crt1.o%s}} crti.o%s \
-  %{!shared:%{pg|p|profile:crtg.o%s} crtbegin.o%s} %{shared:crtbeginS.o%s}"
-
+#define STARTFILE_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
 #endif
 
 #if DEFAULT_LIBC != LIBC_UCLIBC
 #define ENDFILE_SPEC "%{pg|p:crtgend.o%s} crtend.o%s crtn%O%s"
 #else
-#define ENDFILE_SPEC "%{!shared:%{pg|p|profile:crtgend.o%s} crtend.o%s} \
-  %{shared:crtendS.o%s} crtn.o%s"
-
+#define ENDFILE_SPEC							\
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
 #endif
 
 #if DEFAULT_LIBC == LIBC_UCLIBC
diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index efadedf..d4a1bad 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-05  Cupertino Miranda  <cmiranda@synopsys.com>
+
+	* config.host (arc*-*-linux-uclibc*): Use default extra
+	objects. Include linux-android header.
+	* config/arc/crti.S (_init): Declare symbol as function.
+	(_fini): Likewise.
+
 2016-12-03  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
 	PR fortran/78379
diff --git a/libgcc/config.host b/libgcc/config.host
index e7e5413..b1a2be6 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -374,8 +374,8 @@ arc*-*-elf*)
 	;;
 arc*-*-linux-uclibc*)
 	tmake_file="${tmake_file} t-slibgcc-libgcc t-slibgcc-nolc-override arc/t-arc700-uClibc arc/t-arc"
-	extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o libgmon.a crtg.o crtgend.o"
-	extra_parts="${extra_parts} crttls.o"
+	extra_parts="$extra_parts crti.o crtn.o libgmon.a crtg.o crtgend.o"
+	extra_parts="$extra_parts crttls.o"
 	;;
 arm-wrs-vxworks)
 	tmake_file="$tmake_file arm/t-arm arm/t-elf t-softfp-sfdf t-softfp-excl arm/t-softfp t-softfp"
diff --git a/libgcc/config/arc/crti.S b/libgcc/config/arc/crti.S
index 7f64305..6867ca9 100644
--- a/libgcc/config/arc/crti.S
+++ b/libgcc/config/arc/crti.S
@@ -31,11 +31,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 	.section .init
 	.global _init
 	.word 0
+	.type	_init,@function
 _init:
 	push_s	blink
 
 	.section .fini
 	.global _fini
 	.word 0
+	.type	_fini,@function
 _fini:
 	push_s	blink
-- 
cgit v1.1


From fb5e7daea59060603a2e526cab4e35b32a8e9438 Mon Sep 17 00:00:00 2001
From: Mikael Pettersson <mikpe@it.uu.se>
Date: Mon, 5 Dec 2016 12:27:55 +0100
Subject: re PR ada/48835 (porting GNAT to m68k-linux)

	PR ada/48835
	* gcc-interface/Makefile.in: Add support for m68k-linux.
	* system-linux-m68k.ads: New file.

From-SVN: r243247
---
 gcc/ada/ChangeLog                 |   6 ++
 gcc/ada/gcc-interface/Makefile.in |  29 +++++++
 gcc/ada/system-linux-m68k.ads     | 155 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 190 insertions(+)
 create mode 100644 gcc/ada/system-linux-m68k.ads

diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog
index 1a9f161..858f8cb 100644
--- a/gcc/ada/ChangeLog
+++ b/gcc/ada/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-05  Mikael Pettersson  <mikpe@it.uu.se>
+
+	PR ada/48835
+	* gcc-interface/Makefile.in: Add support for m68k-linux.
+	* system-linux-m68k.ads: New file.
+
 2016-12-05  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* system-darwin-ppc.ads (Support_Atomic_Primitives): Set to True only
diff --git a/gcc/ada/gcc-interface/Makefile.in b/gcc/ada/gcc-interface/Makefile.in
index ec8aa07..98889c0 100644
--- a/gcc/ada/gcc-interface/Makefile.in
+++ b/gcc/ada/gcc-interface/Makefile.in
@@ -2049,6 +2049,35 @@ ifeq ($(strip $(filter-out hppa% linux%,$(target_cpu) $(target_os))),)
   LIBRARY_VERSION := $(LIB_VERSION)
 endif
 
+# M68K Linux
+ifeq ($(strip $(filter-out m68k% linux%,$(target_cpu) $(target_os))),)
+  LIBGNAT_TARGET_PAIRS = \
+  a-intnam.ads<a-intnam-linux.ads \
+  s-inmaop.adb<s-inmaop-posix.adb \
+  s-intman.adb<s-intman-posix.adb \
+  s-linux.ads<s-linux.ads \
+  s-osinte.adb<s-osinte-posix.adb \
+  s-osinte.ads<s-osinte-linux.ads \
+  s-osprim.adb<s-osprim-posix.adb \
+  s-taprop.adb<s-taprop-linux.adb \
+  s-tasinf.ads<s-tasinf-linux.ads \
+  s-tasinf.adb<s-tasinf-linux.adb \
+  s-taspri.ads<s-taspri-posix.ads \
+  s-tpopsp.adb<s-tpopsp-posix-foreign.adb \
+  system.ads<system-linux-m68k.ads
+
+  TOOLS_TARGET_PAIRS =  \
+    mlib-tgt-specific.adb<mlib-tgt-specific-linux.adb \
+    indepsw.adb<indepsw-gnu.adb
+
+  EXTRA_GNATRTL_TASKING_OBJS=s-linux.o
+  EH_MECHANISM=-gcc
+  THREADSLIB = -lpthread
+  GNATLIB_SHARED = gnatlib-shared-dual
+  GMEM_LIB = gmemlib
+  LIBRARY_VERSION := $(LIB_VERSION)
+endif
+
 # SH4 Linux
 ifeq ($(strip $(filter-out sh4% linux%,$(target_cpu) $(target_os))),)
   LIBGNAT_TARGET_PAIRS = \
diff --git a/gcc/ada/system-linux-m68k.ads b/gcc/ada/system-linux-m68k.ads
new file mode 100644
index 0000000..9aa6143
--- /dev/null
+++ b/gcc/ada/system-linux-m68k.ads
@@ -0,0 +1,155 @@
+------------------------------------------------------------------------------
+--                                                                          --
+--                        GNAT RUN-TIME COMPONENTS                          --
+--                                                                          --
+--                               S Y S T E M                                --
+--                                                                          --
+--                                 S p e c                                  --
+--                         (GNU/Linux/m68k Version)                         --
+--                                                                          --
+--          Copyright (C) 2014-2016, Free Software Foundation, Inc.         --
+--                                                                          --
+-- This specification is derived from the Ada Reference Manual for use with --
+-- GNAT. The copyright notice above, and the license provisions that follow --
+-- apply solely to the  contents of the part following the private keyword. --
+--                                                                          --
+-- GNAT is free software;  you can  redistribute it  and/or modify it under --
+-- terms of the  GNU General Public License as published  by the Free Soft- --
+-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
+-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
+-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
+-- or FITNESS FOR A PARTICULAR PURPOSE.                                     --
+--                                                                          --
+-- As a special exception under Section 7 of GPL version 3, you are granted --
+-- additional permissions described in the GCC Runtime Library Exception,   --
+-- version 3.1, as published by the Free Software Foundation.               --
+--                                                                          --
+-- You should have received a copy of the GNU General Public License and    --
+-- a copy of the GCC Runtime Library Exception along with this program;     --
+-- see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see    --
+-- <http://www.gnu.org/licenses/>.                                          --
+--                                                                          --
+-- GNAT was originally developed  by the GNAT team at  New York University. --
+-- Extensive contributions were provided by Ada Core Technologies Inc.      --
+--                                                                          --
+------------------------------------------------------------------------------
+
+package System is
+   pragma Pure;
+   --  Note that we take advantage of the implementation permission to make
+   --  this unit Pure instead of Preelaborable; see RM 13.7.1(15). In Ada
+   --  2005, this is Pure in any case (AI-362).
+
+   type Name is (SYSTEM_NAME_GNAT);
+   System_Name : constant Name := SYSTEM_NAME_GNAT;
+
+   --  System-Dependent Named Numbers
+
+   Min_Int               : constant := Long_Long_Integer'First;
+   Max_Int               : constant := Long_Long_Integer'Last;
+
+   Max_Binary_Modulus    : constant := 2 ** Long_Long_Integer'Size;
+   Max_Nonbinary_Modulus : constant := 2 ** Integer'Size - 1;
+
+   Max_Base_Digits       : constant := Long_Long_Float'Digits;
+   Max_Digits            : constant := Long_Long_Float'Digits;
+
+   Max_Mantissa          : constant := 63;
+   Fine_Delta            : constant := 2.0 ** (-Max_Mantissa);
+
+   Tick                  : constant := 0.000_001;
+
+   --  Storage-related Declarations
+
+   type Address is private;
+   pragma Preelaborable_Initialization (Address);
+   Null_Address : constant Address;
+
+   Storage_Unit : constant := 8;
+   Word_Size    : constant := 32;
+   Memory_Size  : constant := 2 ** 32;
+
+   --  Address comparison
+
+   function "<"  (Left, Right : Address) return Boolean;
+   function "<=" (Left, Right : Address) return Boolean;
+   function ">"  (Left, Right : Address) return Boolean;
+   function ">=" (Left, Right : Address) return Boolean;
+   function "="  (Left, Right : Address) return Boolean;
+
+   pragma Import (Intrinsic, "<");
+   pragma Import (Intrinsic, "<=");
+   pragma Import (Intrinsic, ">");
+   pragma Import (Intrinsic, ">=");
+   pragma Import (Intrinsic, "=");
+
+   --  Other System-Dependent Declarations
+
+   type Bit_Order is (High_Order_First, Low_Order_First);
+   Default_Bit_Order : constant Bit_Order := High_Order_First;
+   pragma Warnings (Off, Default_Bit_Order); -- kill constant condition warning
+
+   --  Priority-related Declarations (RM D.1)
+
+   --  Is the following actually true for GNU/Linux/m68k?
+   --
+   --  0 .. 98 corresponds to the system priority range 1 .. 99.
+   --
+   --  If the scheduling policy is SCHED_FIFO or SCHED_RR the runtime makes use
+   --  of the entire range provided by the system.
+   --
+   --  If the scheduling policy is SCHED_OTHER the only valid system priority
+   --  is 1 and other values are simply ignored.
+
+   Max_Priority           : constant Positive := 97;
+   Max_Interrupt_Priority : constant Positive := 98;
+
+   subtype Any_Priority       is Integer      range  0 .. 98;
+   subtype Priority           is Any_Priority range  0 .. 97;
+   subtype Interrupt_Priority is Any_Priority range 98 .. 98;
+
+   Default_Priority : constant Priority := 48;
+
+private
+
+   type Address is mod Memory_Size;
+   Null_Address : constant Address := 0;
+
+   --------------------------------------
+   -- System Implementation Parameters --
+   --------------------------------------
+
+   --  These parameters provide information about the target that is used
+   --  by the compiler. They are in the private part of System, where they
+   --  can be accessed using the special circuitry in the Targparm unit
+   --  whose source should be consulted for more detailed descriptions
+   --  of the individual switch values.
+
+   Backend_Divide_Checks     : constant Boolean := False;
+   Backend_Overflow_Checks   : constant Boolean := False;
+   Command_Line_Args         : constant Boolean := True;
+   Configurable_Run_Time     : constant Boolean := False;
+   Denorm                    : constant Boolean := True;
+   Duration_32_Bits          : constant Boolean := False;
+   Exit_Status_Supported     : constant Boolean := True;
+   Fractional_Fixed_Ops      : constant Boolean := False;
+   Frontend_Layout           : constant Boolean := False;
+   Machine_Overflows         : constant Boolean := False;
+   Machine_Rounds            : constant Boolean := True;
+   Preallocated_Stacks       : constant Boolean := False;
+   Signed_Zeros              : constant Boolean := True;
+   Stack_Check_Default       : constant Boolean := False;
+   Stack_Check_Probes        : constant Boolean := False;
+   Stack_Check_Limits        : constant Boolean := False;
+   Support_Aggregates        : constant Boolean := True;
+   Support_Atomic_Primitives : constant Boolean := True;
+   Support_Composite_Assign  : constant Boolean := True;
+   Support_Composite_Compare : constant Boolean := True;
+   Support_Long_Shifts       : constant Boolean := True;
+   Always_Compatible_Rep     : constant Boolean := False;
+   Suppress_Standard_Library : constant Boolean := False;
+   Use_Ada_Main_Program_Name : constant Boolean := False;
+   Frontend_Exceptions       : constant Boolean := False;
+   ZCX_By_Default            : constant Boolean := True;
+
+end System;
-- 
cgit v1.1


From d0ea9f0aa270f9791df42eb409e90c718575ad9a Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Mon, 5 Dec 2016 12:24:39 +0000
Subject: diagnostic.c (diagnostic_check_max_errors): New, broken out of ...

	gcc/
	* diagnostic.c (diagnostic_check_max_errors): New, broken out of ...
	(diagnostic_action_after_output): ... here.
	(diagnostic_report_diagnostic): Call it for non-notes.
	* diagnostic.h (struct diagnostic_context): Make max_errors signed
	int.
	(diagnostic_check_max_errors): Declare.

	gcc/fortran/
	* error.c (gfc_warning_check): Call diagnostic_check_max_errors.
	(gfc_error_check): Likewise.

	gcc/testsuite/
	* c-c++-common/fmax_errors.c: Check notes after last error are
	emitted.

From-SVN: r243254
---
 gcc/ChangeLog                            |  9 +++++++
 gcc/diagnostic.c                         | 40 ++++++++++++++++++++++----------
 gcc/diagnostic.h                         |  3 ++-
 gcc/fortran/ChangeLog                    |  5 ++++
 gcc/fortran/error.c                      |  2 ++
 gcc/testsuite/ChangeLog                  |  5 ++++
 gcc/testsuite/c-c++-common/fmax-errors.c | 14 +++++++++--
 7 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ad903f9..9488b0f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2016-12-05  Nathan Sidwell  <nathan@acm.org>
+
+	* diagnostic.c (diagnostic_check_max_errors): New, broken out of ...
+	(diagnostic_action_after_output): ... here.
+	(diagnostic_report_diagnostic): Call it for non-notes.
+	* diagnostic.h (struct diagnostic_context): Make max_errors signed
+	int.
+	(diagnostic_check_max_errors): Declare.
+
 2016-12-05  Cupertino Miranda  <cmiranda@synopsys.com>
 
 	* config/arc/arc.h (STARTFILE_SPEC): Use default linux specs.
diff --git a/gcc/diagnostic.c b/gcc/diagnostic.c
index 4278a10..c06d266 100644
--- a/gcc/diagnostic.c
+++ b/gcc/diagnostic.c
@@ -446,6 +446,31 @@ bt_err_callback (void *data ATTRIBUTE_UNUSED, const char *msg, int errnum)
 	   errnum == 0 ? "" : xstrerror (errnum));
 }
 
+/* Check if we've met the maximum error limit, and if so fatally exit
+   with a message.  CONTEXT is the context to check, and FLUSH
+   indicates whether a diagnostic_finish call is needed.  */
+
+void
+diagnostic_check_max_errors (diagnostic_context *context, bool flush)
+{
+  if (!context->max_errors)
+    return;
+
+  int count = (diagnostic_kind_count (context, DK_ERROR)
+	       + diagnostic_kind_count (context, DK_SORRY)
+	       + diagnostic_kind_count (context, DK_WERROR));
+
+  if (count >= context->max_errors)
+    {
+      fnotice (stderr,
+	       "compilation terminated due to -fmax-errors=%u.\n",
+	       context->max_errors);
+      if (flush)
+	diagnostic_finish (context);
+      exit (FATAL_EXIT_CODE);
+    }
+}
+
 /* Take any action which is expected to happen after the diagnostic
    is written out.  This function does not always return.  */
 void
@@ -470,18 +495,6 @@ diagnostic_action_after_output (diagnostic_context *context,
 	  diagnostic_finish (context);
 	  exit (FATAL_EXIT_CODE);
 	}
-      if (context->max_errors != 0
-	  && ((unsigned) (diagnostic_kind_count (context, DK_ERROR)
-			  + diagnostic_kind_count (context, DK_SORRY)
-			  + diagnostic_kind_count (context, DK_WERROR))
-	      >= context->max_errors))
-	{
-	  fnotice (stderr,
-		   "compilation terminated due to -fmax-errors=%u.\n",
-		   context->max_errors);
-	  diagnostic_finish (context);
-	  exit (FATAL_EXIT_CODE);
-	}
       break;
 
     case DK_ICE:
@@ -890,6 +903,9 @@ diagnostic_report_diagnostic (diagnostic_context *context,
 	return false;
     }
 
+  if (diagnostic->kind != DK_NOTE)
+    diagnostic_check_max_errors (context);
+
   context->lock++;
 
   if (diagnostic->kind == DK_ICE || diagnostic->kind == DK_ICE_NOBT)
diff --git a/gcc/diagnostic.h b/gcc/diagnostic.h
index ead4d2a..f3bb494 100644
--- a/gcc/diagnostic.h
+++ b/gcc/diagnostic.h
@@ -143,7 +143,7 @@ struct diagnostic_context
   bool dc_warn_system_headers;
 
   /* Maximum number of errors to report.  */
-  unsigned int max_errors;
+  int max_errors;
 
   /* This function is called before any message is printed out.  It is
      responsible for preparing message prefix and such.  For example, it
@@ -320,6 +320,7 @@ void default_diagnostic_start_span_fn (diagnostic_context *,
 void default_diagnostic_finalizer (diagnostic_context *, diagnostic_info *);
 void diagnostic_set_caret_max_width (diagnostic_context *context, int value);
 void diagnostic_action_after_output (diagnostic_context *, diagnostic_t);
+void diagnostic_check_max_errors (diagnostic_context *, bool flush = false);
 
 void diagnostic_file_cache_fini (void);
 
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 2c06b31..f1858ea 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-05  Nathan Sidwell  <nathan@acm.org>
+
+	* error.c (gfc_warning_check): Call diagnostic_check_max_errors.
+	(gfc_error_check): Likewise.
+
 2016-12-04  Janus Weil  <janus@gcc.gnu.org>
 
 	PR fortran/78618
diff --git a/gcc/fortran/error.c b/gcc/fortran/error.c
index 0fd8a4e..757f7e2 100644
--- a/gcc/fortran/error.c
+++ b/gcc/fortran/error.c
@@ -1226,6 +1226,7 @@ gfc_warning_check (void)
       diagnostic_action_after_output (global_dc,
 				      warningcount_buffered
 				      ? DK_WARNING : DK_ERROR);
+      diagnostic_check_max_errors (global_dc, true);
     }
 }
 
@@ -1370,6 +1371,7 @@ gfc_error_check (void)
       gcc_assert (gfc_output_buffer_empty_p (pp_error_buffer));
       pp->buffer = tmp_buffer;
       diagnostic_action_after_output (global_dc, DK_ERROR);
+      diagnostic_check_max_errors (global_dc, true);
       return true;
     }
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2fe40d0..c40ffd6 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-05  Nathan Sidwell  <nathan@acm.org>
+
+	* c-c++-common/fmax_errors.c: Check notes after last error are
+	emitted.
+
 2016-12-04  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	PR rtl-optimization/78561
diff --git a/gcc/testsuite/c-c++-common/fmax-errors.c b/gcc/testsuite/c-c++-common/fmax-errors.c
index 1ef78eb..b44e238 100644
--- a/gcc/testsuite/c-c++-common/fmax-errors.c
+++ b/gcc/testsuite/c-c++-common/fmax-errors.c
@@ -1,11 +1,21 @@
 /* PR c/44782 */
 /* { dg-do compile } */
-/* { dg-options "-fmax-errors=3" } */
+/* { dg-options "-fmax-errors=3 -Wall" } */
 
 void foo (unsigned int i, unsigned int j)
 {
   (i) ();			/* { dg-error "" } */
   (j) ();			/* { dg-error "" } */
-  (i+j) ();			/* { dg-error "" } */
+
+  i + j; /* { dg-warning "" }  */
+
+  (k) ();			/* { dg-error "" } */
+  /* Make sure we see the notes related to the final error we emit.  */
+  /* { dg-message "identifier" "" { target c } 12 } */
+
+  /* Warnings after the final error should not appear.  */
+  i + j; /* no warning.  */
+
   (i*j) ();			/* no error here due to -fmax-errors */
+
 } /* { dg-prune-output "compilation terminated" } */
-- 
cgit v1.1


From 519e0faa00bda70c9ffd66b7e7a6011c5e742d2b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <bonzini@gcc.gnu.org>
Date: Mon, 5 Dec 2016 13:19:34 +0000
Subject: match.pd: Simplify X ? C : 0 where C is a power of 2 and X tests a
 single bit.

gcc:
* match.pd: Simplify X ? C : 0 where C is a power of 2 and
X tests a single bit.

gcc/testsuite:
* gcc.dg/fold-and-lshift.c, gcc.dg/fold-and-rshift-1.c,
gcc.dg/fold-and-rshift-2.c: New testcases.

From-SVN: r243255
---
 gcc/ChangeLog                            |  7 ++++++-
 gcc/match.pd                             | 28 +++++++++++++++++++++++++
 gcc/testsuite/ChangeLog                  |  5 +++++
 gcc/testsuite/gcc.dg/fold-and-lshift.c   | 35 ++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/fold-and-rshift-1.c | 35 ++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.dg/fold-and-rshift-2.c | 25 +++++++++++++++++++++++
 6 files changed, 134 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/fold-and-lshift.c
 create mode 100644 gcc/testsuite/gcc.dg/fold-and-rshift-1.c
 create mode 100644 gcc/testsuite/gcc.dg/fold-and-rshift-2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9488b0f..2ba0302 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-05  Paolo Bonzini  <bonzini@gnu.org>
+
+	* match.pd: Simplify X ? C : 0 where C is a power of 2 and
+	X tests a single bit.
+
 2016-12-05  Nathan Sidwell  <nathan@acm.org>
 
 	* diagnostic.c (diagnostic_check_max_errors): New, broken out of ...
@@ -25,7 +30,7 @@
 	* config/arc/arc.md (ls_gd_load): Remove unused pattern.
 	(tls_gd_dispatch): Likewise.
 
-2016-12-025  Andre Vieira  <andre.simoesdiasvieira@arm.com>
+2016-12-05  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 
 	* config/arm/arm.c (TARGET_ASM_INIT_SECTIONS): Fix wrong undef
 	location.
diff --git a/gcc/match.pd b/gcc/match.pd
index dbb9103..1fe003b 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2737,6 +2737,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (cmp (bit_and@2 @0 integer_pow2p@1) @1)
   (icmp @2 { build_zero_cst (TREE_TYPE (@0)); })))
  
+/* If we have (A & C) != 0 ? D : 0 where C and D are powers of 2,
+   convert this into a shift followed by ANDing with D.  */
+(simplify
+ (cond
+  (ne (bit_and @0 integer_pow2p@1) integer_zerop)
+  integer_pow2p@2 integer_zerop)
+ (with {
+    int shift = wi::exact_log2 (@2) - wi::exact_log2 (@1);
+  }
+  (if (shift > 0)
+   (bit_and
+    (lshift (convert @0) { build_int_cst (integer_type_node, shift); }) @2)
+   (bit_and
+    (convert (rshift @0 { build_int_cst (integer_type_node, -shift); })) @2))))
+
 /* If we have (A & C) != 0 where C is the sign bit of A, convert
    this into A < 0.  Similarly for (A & C) == 0 into A >= 0.  */
 (for cmp (eq ne)
@@ -2751,6 +2766,19 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
     (ncmp (convert:stype @0) { build_zero_cst (stype); })))))
 
+/* If we have A < 0 ? C : 0 where C is a power of 2, convert
+   this into a right shift followed by ANDing with C.  */
+(simplify
+ (cond
+  (lt @0 integer_zerop)
+  integer_pow2p@1 integer_zerop)
+ (with {
+    int shift = element_precision (@0) - wi::exact_log2 (@1) - 1;
+  }
+  (bit_and
+   (convert (rshift @0 { build_int_cst (integer_type_node, shift); }))
+   @1)))
+
 /* When the addresses are not directly of decls compare base and offset.
    This implements some remaining parts of fold_comparison address
    comparisons but still no complete part of it.  Still it is good
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c40ffd6..d9edb52 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-05  Paolo Bonzini  <bonzini@gnu.org>
+
+	* gcc.dg/fold-and-lshift.c, gcc.dg/fold-and-rshift-1.c,
+	gcc.dg/fold-and-rshift-2.c: New testcases.
+
 2016-12-05  Nathan Sidwell  <nathan@acm.org>
 
 	* c-c++-common/fmax_errors.c: Check notes after last error are
diff --git a/gcc/testsuite/gcc.dg/fold-and-lshift.c b/gcc/testsuite/gcc.dg/fold-and-lshift.c
new file mode 100644
index 0000000..2905095
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-and-lshift.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-original" } */
+
+int f(int x)
+{
+	return (x << 2) & 128;
+}
+
+int g(int x)
+{
+	return !!(x & 32) << 7;
+}
+
+int h(int x)
+{
+	return ((x >> 5) & 1) << 7;
+}
+
+int i(int x)
+{
+	return (x & 32) >> 5 << 7;
+}
+
+int j(int x)
+{
+	return ((x >> 5) & 1) ? 128 : 0;
+}
+
+int k(int x)
+{
+	return (x & 32) ? 128 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not " \\? " "original" } } */
+/* { dg-final { scan-assembler-not "sarl" { target i?86-*-* x86_64-*-* } } }" */
diff --git a/gcc/testsuite/gcc.dg/fold-and-rshift-1.c b/gcc/testsuite/gcc.dg/fold-and-rshift-1.c
new file mode 100644
index 0000000..11c13d5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-and-rshift-1.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-original" } */
+
+int f(int x)
+{
+	return (x >> 2) & 128;
+}
+
+int g(int x)
+{
+	return !!(x & 512) << 7;
+}
+
+int h(int x)
+{
+	return ((x >> 9) & 1) << 7;
+}
+
+int i(int x)
+{
+	return (x & 512) >> 9 << 7;
+}
+
+int j(int x)
+{
+	return ((x >> 9) & 1) ? 128 : 0;
+}
+
+int k(int x)
+{
+	return (x & 512) ? 128 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not " \\? " "original" } } */
+/* { dg-final { scan-assembler-not "sall" { target i?86-*-* x86_64-*-* } } }" */
diff --git a/gcc/testsuite/gcc.dg/fold-and-rshift-2.c b/gcc/testsuite/gcc.dg/fold-and-rshift-2.c
new file mode 100644
index 0000000..f88d48d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/fold-and-rshift-2.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-original" } */
+
+unsigned f(unsigned x)
+{
+	return (x >> 29) & 32;
+}
+
+unsigned g(unsigned x)
+{
+	return !!(x & 0x80000000) << 5;
+}
+
+unsigned j(unsigned x)
+{
+	return ((x >> 31) & 1) ? 32 : 0;
+}
+
+unsigned k(unsigned x)
+{
+	return (x & 0x80000000) ? 32 : 0;
+}
+
+/* { dg-final { scan-tree-dump-not " \\? " "original" } } */
+/* { dg-final { scan-assembler-not "sall" { target i?86-*-* x86_64-*-* } } }" */
-- 
cgit v1.1


From a80b4f579a10d01a6cfdfff37150cfccd134dc41 Mon Sep 17 00:00:00 2001
From: Segher Boessenkool <segher@kernel.crashing.org>
Date: Mon, 5 Dec 2016 14:54:42 +0100
Subject: Subject: [PATCH] Revert "Do not simplify "(and (reg) (const bit)" to 
 if_then_else."

	* combine.c: Revert r243162.

From-SVN: r243256
---
 gcc/ChangeLog |  4 ++++
 gcc/combine.c | 12 ------------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2ba0302..1ace8b0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-05  Segher Boessenkool  <segher@kernel.crashing.org>
+
+	* combine.c: Revert r243162.
+
 2016-12-05  Paolo Bonzini  <bonzini@gnu.org>
 
 	* match.pd: Simplify X ? C : 0 where C is a power of 2 and
diff --git a/gcc/combine.c b/gcc/combine.c
index 7ba634a..b429453 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -5602,18 +5602,6 @@ combine_simplify_rtx (rtx x, machine_mode op0_mode, int in_dest,
 		     && OBJECT_P (SUBREG_REG (XEXP (x, 0)))))))
     {
       rtx cond, true_rtx, false_rtx;
-      unsigned HOST_WIDE_INT nz;
-
-      /* If the operation is an AND wrapped in a SIGN_EXTEND or ZERO_EXTEND with
-	 either operand being just a constant single bit value, do nothing since
-	 IF_THEN_ELSE is likely to increase the expression's complexity.  */
-      if (HWI_COMPUTABLE_MODE_P (mode)
-	  && pow2p_hwi (nz = nonzero_bits (x, mode))
-	  && ! ((code == SIGN_EXTEND || code == ZERO_EXTEND)
-		&& GET_CODE (XEXP (x, 0)) == AND
-		&& CONST_INT_P (XEXP (XEXP (x, 0), 0))
-		&& UINTVAL (XEXP (XEXP (x, 0), 0)) == nz))
-	      return x;
 
       cond = if_then_else_cond (x, &true_rtx, &false_rtx);
       if (cond != 0
-- 
cgit v1.1


From 6901ea625b473fd0f13194bcaaf2a1f9ac458d62 Mon Sep 17 00:00:00 2001
From: Andrew Senkevich <andrew.senkevich@intel.com>
Date: Mon, 5 Dec 2016 17:18:42 +0000
Subject: Add AVX512 k-mask intrinsics

gcc/

2016-12-05  Andrew Senkevich  <andrew.senkevich@intel.com>

	* config/i386/avx512bwintrin.h: Add new k-mask intrinsics.
	* config/i386/avx512dqintrin.h: Ditto.
	* config/i386/avx512fintrin.h: Ditto.
	* config/i386/i386-builtin-types.def (UCHAR_FTYPE_UQI_UQI_PUCHAR,
	UCHAR_FTYPE_UHI_UHI_PUCHAR, UCHAR_FTYPE_USI_USI_PUCHAR,
	UCHAR_FTYPE_UDI_UDI_PUCHAR, UCHAR_FTYPE_UQI_UQI, UCHAR_FTYPE_UHI_UHI,
	UCHAR_FTYPE_USI_USI, UCHAR_FTYPE_UDI_UDI, UQI_FTYPE_UQI_INT,
	UHI_FTYPE_UHI_INT, USI_FTYPE_USI_INT, UDI_FTYPE_UDI_INT,
	UQI_FTYPE_UQI, USI_FTYPE_USI, UDI_FTYPE_UDI, UQI_FTYPE_UQI_UQI): New
	function types.
	* config/i386/i386-builtin.def (__builtin_ia32_knotqi,
	__builtin_ia32_knotsi, __builtin_ia32_knotdi,
	__builtin_ia32_korqi, __builtin_ia32_korsi, __builtin_ia32_kordi,
	__builtin_ia32_kxnorqi, __builtin_ia32_kxnorsi,
	__builtin_ia32_kxnordi, __builtin_ia32_kxorqi, __builtin_ia32_kxorsi,
	__builtin_ia32_kxordi, __builtin_ia32_kandqi,
	__builtin_ia32_kandsi, __builtin_ia32_kanddi, __builtin_ia32_kandnqi,
	__builtin_ia32_kandnsi, __builtin_ia32_kandndi): New.
	* config/i386/i386.c (ix86_expand_args_builtin): Handle new types.

gcc/testsuite/

2016-12-05  Andrew Senkevich  <andrew.senkevich@intel.com>

	* gcc.target/i386/avx512bw-kandd-1.c: New.
	* gcc.target/i386/avx512bw-kandnd-1.c: Ditto.
	* gcc.target/i386/avx512bw-kandnq-1.c: Ditto.
	* gcc.target/i386/avx512bw-kandq-1.c: Ditto.
	* gcc.target/i386/avx512bw-knotd-1.c: Ditto.
	* gcc.target/i386/avx512bw-knotq-1.c: Ditto.
	* gcc.target/i386/avx512bw-kord-1.c: Ditto.
	* gcc.target/i386/avx512bw-korq-1.c: Ditto.
	* gcc.target/i386/avx512bw-kunpckdq-3.c: Ditto.
	* gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto.
	* gcc.target/i386/avx512bw-kxnord-1.c: Ditto.
	* gcc.target/i386/avx512bw-kxnorq-1.c: Ditto.
	* gcc.target/i386/avx512bw-kxord-1.c: Ditto.
	* gcc.target/i386/avx512bw-kxorq-1.c: Ditto.
	* gcc.target/i386/avx512dq-kandb-1.c: Ditto.
	* gcc.target/i386/avx512dq-kandnb-1.c: Ditto.
	* gcc.target/i386/avx512dq-knotb-1.c: Ditto.
	* gcc.target/i386/avx512dq-korb-1.c: Ditto.
	* gcc.target/i386/avx512dq-kxnorb-1.c: Ditto.
	* gcc.target/i386/avx512dq-kxorb-1.c: Ditto.
	* gcc.target/i386/avx512f-kunpckbw-3.c: Ditto.
	* gcc.target/i386/avx512f-kandnw-1.c: Removed unneeded check.

From-SVN: r243265
---
 gcc/ChangeLog                                      |  22 +++++
 gcc/config/i386/avx512bwintrin.h                   | 100 +++++++++++++++++++++
 gcc/config/i386/avx512dqintrin.h                   |  42 +++++++++
 gcc/config/i386/avx512fintrin.h                    |  17 +++-
 gcc/config/i386/i386-builtin-types.def             |  28 ++++++
 gcc/config/i386/i386-builtin.def                   |  18 ++++
 gcc/config/i386/i386.c                             |  10 +++
 gcc/testsuite/ChangeLog                            |  25 ++++++
 gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c  |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c  |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c    |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c    |  18 ++++
 .../gcc.target/i386/avx512bw-kunpckdq-3.c          |  16 ++++
 .../gcc.target/i386/avx512bw-kunpckwd-3.c          |  16 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c  |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c  |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c  |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c    |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c  |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c   |  18 ++++
 gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c   |   1 -
 gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c |  18 ++++
 30 files changed, 635 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1ace8b0..02d560d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2016-12-05  Andrew Senkevich  <andrew.senkevich@intel.com>
+
+	* config/i386/avx512bwintrin.h: Add new k-mask intrinsics.
+	* config/i386/avx512dqintrin.h: Ditto.
+	* config/i386/avx512fintrin.h: Ditto.
+	* config/i386/i386-builtin-types.def (UCHAR_FTYPE_UQI_UQI_PUCHAR,
+	UCHAR_FTYPE_UHI_UHI_PUCHAR, UCHAR_FTYPE_USI_USI_PUCHAR,
+	UCHAR_FTYPE_UDI_UDI_PUCHAR, UCHAR_FTYPE_UQI_UQI, UCHAR_FTYPE_UHI_UHI,
+	UCHAR_FTYPE_USI_USI, UCHAR_FTYPE_UDI_UDI, UQI_FTYPE_UQI_INT,
+	UHI_FTYPE_UHI_INT, USI_FTYPE_USI_INT, UDI_FTYPE_UDI_INT,
+	UQI_FTYPE_UQI, USI_FTYPE_USI, UDI_FTYPE_UDI, UQI_FTYPE_UQI_UQI): New
+	function types.
+	* config/i386/i386-builtin.def (__builtin_ia32_knotqi,
+	__builtin_ia32_knotsi, __builtin_ia32_knotdi,
+	__builtin_ia32_korqi, __builtin_ia32_korsi, __builtin_ia32_kordi,
+	__builtin_ia32_kxnorqi, __builtin_ia32_kxnorsi,
+	__builtin_ia32_kxnordi, __builtin_ia32_kxorqi, __builtin_ia32_kxorsi,
+	__builtin_ia32_kxordi, __builtin_ia32_kandqi,
+	__builtin_ia32_kandsi, __builtin_ia32_kanddi, __builtin_ia32_kandnqi,
+	__builtin_ia32_kandnsi, __builtin_ia32_kandndi): New.
+	* config/i386/i386.c (ix86_expand_args_builtin): Handle new types.
+
 2016-12-05  Segher Boessenkool  <segher@kernel.crashing.org>
 
 	* combine.c: Revert r243162.
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index 4069802..9e6e0ce 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -40,6 +40,90 @@ typedef char __v64qi __attribute__ ((__vector_size__ (64)));
 
 typedef unsigned long long __mmask64;
 
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_knot_mask32 (__mmask32 __A)
+{
+  return (__mmask32) __builtin_ia32_knotsi ((__mmask32) __A);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_knot_mask64 (__mmask64 __A)
+{
+  return (__mmask64) __builtin_ia32_knotdi ((__mmask64) __A);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kor_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_korsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kor_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxnor_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kxnorsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxnor_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kxnordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxor_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kxorsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxor_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kxordi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kandsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kanddi ((__mmask64) __A, (__mmask64) __B);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kandn_mask32 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask32) __builtin_ia32_kandnsi ((__mmask32) __A, (__mmask32) __B);
+}
+
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kandn_mask64 (__mmask64 __A, __mmask64 __B)
+{
+  return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
@@ -114,6 +198,14 @@ _mm512_kunpackw (__mmask32 __A, __mmask32 __B)
 					      (__mmask32) __B);
 }
 
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kunpackw_mask32 (__mmask16 __A, __mmask16 __B)
+{
+  return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
+					      (__mmask32) __B);
+}
+
 extern __inline __mmask64
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
@@ -122,6 +214,14 @@ _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
 					      (__mmask64) __B);
 }
 
+extern __inline __mmask64
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kunpackd_mask64 (__mmask32 __A, __mmask32 __B)
+{
+  return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
+					      (__mmask64) __B);
+}
+
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index 4b954f9..d2405c3 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -34,6 +34,48 @@
 #define __DISABLE_AVX512DQ__
 #endif /* __AVX512DQ__ */
 
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_knot_mask8 (__mmask8 __A)
+{
+  return (__mmask8) __builtin_ia32_knotqi ((__mmask8) __A);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kor_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_korqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxnor_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kxnorqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kxor_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kxorqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kand_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kandqi ((__mmask8) __A, (__mmask8) __B);
+}
+
+extern __inline __mmask8
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kandn_mask8 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask8) __builtin_ia32_kandnqi ((__mmask8) __A, (__mmask8) __B);
+}
+
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f64x2 (__m128d __A)
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 2372c83..ab1704b 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -9977,6 +9977,13 @@ _mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P)
 }
 
 /* Mask arithmetic operations */
+#define _kand_mask16 _mm512_kand
+#define _kandn_mask16 _mm512_kandn
+#define _knot_mask16 _mm512_knot
+#define _kor_mask16 _mm512_kor
+#define _kxnor_mask16 _mm512_kxnor
+#define _kxor_mask16 _mm512_kxor
+
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kand (__mmask16 __A, __mmask16 __B)
@@ -9988,7 +9995,8 @@ extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_kandn (__mmask16 __A, __mmask16 __B)
 {
-  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
+  return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A,
+					     (__mmask16) __B);
 }
 
 extern __inline __mmask16
@@ -10042,6 +10050,13 @@ _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
 }
 
+extern __inline __mmask16
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_kunpackb_mask16 (__mmask8 __A, __mmask8 __B)
+{
+  return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
+}
+
 #ifdef __OPTIMIZE__
 extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 4a38c12..6e938eb 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -139,6 +139,12 @@ DEF_POINTER_TYPE (PLONGLONG, LONGLONG)
 DEF_POINTER_TYPE (PULONGLONG, ULONGLONG)
 DEF_POINTER_TYPE (PUNSIGNED, UNSIGNED)
 
+DEF_POINTER_TYPE (PUQI, UQI)
+DEF_POINTER_TYPE (PUHI, UHI)
+DEF_POINTER_TYPE (PUSI, USI)
+DEF_POINTER_TYPE (PUDI, UDI)
+DEF_POINTER_TYPE (PUCHAR, UCHAR)
+
 DEF_POINTER_TYPE (PV2SI, V2SI)
 DEF_POINTER_TYPE (PV2DF, V2DF)
 DEF_POINTER_TYPE (PV2DI, V2DI)
@@ -536,7 +542,28 @@ DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, V16SI, V16SI, V16SI, PCV4SI)
 
 
 # Instructions returning mask
+DEF_FUNCTION_TYPE (UCHAR, UQI, UQI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, UQI, UQI)
+DEF_FUNCTION_TYPE (UCHAR, UHI, UHI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, UHI, UHI)
+DEF_FUNCTION_TYPE (UCHAR, USI, USI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, USI, USI)
+DEF_FUNCTION_TYPE (UCHAR, UDI, UDI, PUCHAR)
+DEF_FUNCTION_TYPE (UCHAR, UDI, UDI)
+
+DEF_FUNCTION_TYPE (USI, UQI)
+DEF_FUNCTION_TYPE (USI, UHI)
+DEF_FUNCTION_TYPE (UQI, USI)
+DEF_FUNCTION_TYPE (UHI, USI)
+
+DEF_FUNCTION_TYPE (UQI, UQI, INT)
+DEF_FUNCTION_TYPE (UHI, UHI, INT)
+DEF_FUNCTION_TYPE (USI, USI, INT)
+DEF_FUNCTION_TYPE (UDI, UDI, INT)
+DEF_FUNCTION_TYPE (UQI, UQI)
 DEF_FUNCTION_TYPE (UHI, UHI)
+DEF_FUNCTION_TYPE (USI, USI)
+DEF_FUNCTION_TYPE (UDI, UDI)
 DEF_FUNCTION_TYPE (UHI, V16QI)
 DEF_FUNCTION_TYPE (USI, V32QI)
 DEF_FUNCTION_TYPE (UDI, V64QI)
@@ -549,6 +576,7 @@ DEF_FUNCTION_TYPE (UHI, V16SI)
 DEF_FUNCTION_TYPE (UQI, V2DI)
 DEF_FUNCTION_TYPE (UQI, V4DI)
 DEF_FUNCTION_TYPE (UQI, V8DI)
+DEF_FUNCTION_TYPE (UQI, UQI, UQI)
 DEF_FUNCTION_TYPE (UHI, UHI, UHI)
 DEF_FUNCTION_TYPE (USI, USI, USI)
 DEF_FUNCTION_TYPE (UDI, UDI, UDI)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index a9c272a..83a5089 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1436,15 +1436,33 @@ BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__bu
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
 
 /* Mask arithmetic operations */
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kandqi, "__builtin_ia32_kandqi", IX86_BUILTIN_KAND8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kandhi, "__builtin_ia32_kandhi", IX86_BUILTIN_KAND16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kandsi, "__builtin_ia32_kandsi", IX86_BUILTIN_KAND32, UNKNOWN, (int) USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kanddi, "__builtin_ia32_kanddi", IX86_BUILTIN_KAND64, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kandnqi, "__builtin_ia32_kandnqi", IX86_BUILTIN_KANDN8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kandnhi, "__builtin_ia32_kandnhi", IX86_BUILTIN_KANDN16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kandnsi, "__builtin_ia32_kandnsi", IX86_BUILTIN_KANDN32, UNKNOWN, (int) USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kandndi, "__builtin_ia32_kandndi", IX86_BUILTIN_KANDN64, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_knotqi, "__builtin_ia32_knotqi", IX86_BUILTIN_KNOT8, UNKNOWN, (int) UQI_FTYPE_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_knothi, "__builtin_ia32_knothi", IX86_BUILTIN_KNOT16, UNKNOWN, (int) UHI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_knotsi, "__builtin_ia32_knotsi", IX86_BUILTIN_KNOT32, UNKNOWN, (int) USI_FTYPE_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_knotdi, "__builtin_ia32_knotdi", IX86_BUILTIN_KNOT64, UNKNOWN, (int) UDI_FTYPE_UDI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kiorqi, "__builtin_ia32_korqi", IX86_BUILTIN_KOR8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kiorhi, "__builtin_ia32_korhi", IX86_BUILTIN_KOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kiorsi, "__builtin_ia32_korsi", IX86_BUILTIN_KOR32, UNKNOWN, (int) USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kiordi, "__builtin_ia32_kordi", IX86_BUILTIN_KOR64, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestchi, "__builtin_ia32_kortestchi", IX86_BUILTIN_KORTESTC16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kortestzhi, "__builtin_ia32_kortestzhi", IX86_BUILTIN_KORTESTZ16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kunpckhi, "__builtin_ia32_kunpckhi", IX86_BUILTIN_KUNPCKBW, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kxnorqi, "__builtin_ia32_kxnorqi", IX86_BUILTIN_KXNOR8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kxnorhi, "__builtin_ia32_kxnorhi", IX86_BUILTIN_KXNOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxnorsi, "__builtin_ia32_kxnorsi", IX86_BUILTIN_KXNOR32, UNKNOWN, (int) USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxnordi, "__builtin_ia32_kxnordi", IX86_BUILTIN_KXNOR64, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, CODE_FOR_kxorqi, "__builtin_ia32_kxorqi", IX86_BUILTIN_KXOR8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kxorhi, "__builtin_ia32_kxorhi", IX86_BUILTIN_KXOR16, UNKNOWN, (int) UHI_FTYPE_UHI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxorsi, "__builtin_ia32_kxorsi", IX86_BUILTIN_KXOR32, UNKNOWN, (int) USI_FTYPE_USI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, CODE_FOR_kxordi, "__builtin_ia32_kxordi", IX86_BUILTIN_KXOR64, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512F, CODE_FOR_kmovw, "__builtin_ia32_kmov16", IX86_BUILTIN_KMOV16, UNKNOWN, (int) UHI_FTYPE_UHI)
 
 /* SHA */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 41717da..003439f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -34842,7 +34842,12 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V4DI_FTYPE_V8HI:
     case V4DI_FTYPE_V4SI:
     case V4DI_FTYPE_V2DI:
+    case UQI_FTYPE_UQI:
     case UHI_FTYPE_UHI:
+    case USI_FTYPE_USI:
+    case USI_FTYPE_UQI:
+    case USI_FTYPE_UHI:
+    case UDI_FTYPE_UDI:
     case UHI_FTYPE_V16QI:
     case USI_FTYPE_V32QI:
     case UDI_FTYPE_V64QI:
@@ -34976,6 +34981,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case UINT_FTYPE_UINT_UCHAR:
     case UINT16_FTYPE_UINT16_INT:
     case UINT8_FTYPE_UINT8_INT:
+    case UQI_FTYPE_UQI_UQI:
     case UHI_FTYPE_UHI_UHI:
     case USI_FTYPE_USI_USI:
     case UDI_FTYPE_UDI_UDI:
@@ -35023,6 +35029,10 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V4DI_FTYPE_V8DI_INT:
     case QI_FTYPE_V4SF_INT:
     case QI_FTYPE_V2DF_INT:
+    case UQI_FTYPE_UQI_INT:
+    case UHI_FTYPE_UHI_INT:
+    case USI_FTYPE_USI_INT:
+    case UDI_FTYPE_UDI_INT:
       nargs = 2;
       nargs_constant = 1;
       break;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d9edb52..3b0a8fa 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,28 @@
+2016-12-05  Andrew Senkevich  <andrew.senkevich@intel.com>
+
+	* gcc.target/i386/avx512bw-kandd-1.c: New.
+	* gcc.target/i386/avx512bw-kandnd-1.c: Ditto.
+	* gcc.target/i386/avx512bw-kandnq-1.c: Ditto.
+	* gcc.target/i386/avx512bw-kandq-1.c: Ditto.
+	* gcc.target/i386/avx512bw-knotd-1.c: Ditto.
+	* gcc.target/i386/avx512bw-knotq-1.c: Ditto.
+	* gcc.target/i386/avx512bw-kord-1.c: Ditto.
+	* gcc.target/i386/avx512bw-korq-1.c: Ditto.
+	* gcc.target/i386/avx512bw-kunpckdq-3.c: Ditto.
+	* gcc.target/i386/avx512bw-kunpckwd-3.c: Ditto.
+	* gcc.target/i386/avx512bw-kxnord-1.c: Ditto.
+	* gcc.target/i386/avx512bw-kxnorq-1.c: Ditto.
+	* gcc.target/i386/avx512bw-kxord-1.c: Ditto.
+	* gcc.target/i386/avx512bw-kxorq-1.c: Ditto.
+	* gcc.target/i386/avx512dq-kandb-1.c: Ditto.
+	* gcc.target/i386/avx512dq-kandnb-1.c: Ditto.
+	* gcc.target/i386/avx512dq-knotb-1.c: Ditto.
+	* gcc.target/i386/avx512dq-korb-1.c: Ditto.
+	* gcc.target/i386/avx512dq-kxnorb-1.c: Ditto.
+	* gcc.target/i386/avx512dq-kxorb-1.c: Ditto.
+	* gcc.target/i386/avx512f-kunpckbw-3.c: Ditto.
+	* gcc.target/i386/avx512f-kandnw-1.c: Removed unneeded check.
+
 2016-12-05  Paolo Bonzini  <bonzini@gnu.org>
 
 	* gcc.dg/fold-and-lshift.c, gcc.dg/fold-and-rshift-1.c,
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c
new file mode 100644
index 0000000..2a934f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_epi32();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kand_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c
new file mode 100644
index 0000000..69cbe04
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandnd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandnd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kandn_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c
new file mode 100644
index 0000000..e8b7a5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandnq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandnq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kandn_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c
new file mode 100644
index 0000000..a1aaed6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kandq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kandq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_epi32();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kand_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c
new file mode 100644
index 0000000..8a7e033
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-knotd-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "knotd\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (45) );
+
+  k2 = _knot_mask32 (k1);
+  x = _mm512_mask_add_epi16 (x, k1, x, x);
+  x = _mm512_mask_add_epi16 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c
new file mode 100644
index 0000000..deb6579
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-knotq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "knotq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (45) );
+
+  k2 = _knot_mask64 (k1);
+  x = _mm512_mask_add_epi8 (x, k1, x, x);
+  x = _mm512_mask_add_epi8 (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c
new file mode 100644
index 0000000..4c35a81
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kord-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kord\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kor_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c
new file mode 100644
index 0000000..89753f0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-korq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "korq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kor_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-3.c
new file mode 100644
index 0000000..951260f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckdq-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kunpckdq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test () {
+  volatile __mmask64 k3;
+  __mmask32 k1, k2;
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kunpackd_mask64 (k1, k2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
new file mode 100644
index 0000000..c68ad8c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kunpckwd-3.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kunpckwd\[ \\t\]+\[^\{\n\]*%k\[1-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test () {
+  volatile __mmask32 k3;
+  __mmask16 k1, k2;
+
+  __asm__( "kmovw %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovw %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kunpackw_mask32 (k1, k2);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c
new file mode 100644
index 0000000..d93d61e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxnord-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxnord\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxnor_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c
new file mode 100644
index 0000000..ba72e1f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxnorq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxnorq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxnor_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c
new file mode 100644
index 0000000..97ea291
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxord-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxord\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask32 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovd %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovd %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxor_mask32 (k1, k2);
+  x = _mm512_mask_add_epi16 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c
new file mode 100644
index 0000000..abf4280
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-kxorq-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -O2" } */
+/* { dg-final { scan-assembler-times "kxorq\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512bw_test ()
+{
+  __mmask64 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_si512 ();
+
+  __asm__( "kmovq %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovq %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxor_mask64 (k1, k2);
+  x = _mm512_mask_add_epi8 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c
new file mode 100644
index 0000000..b5b5367
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kandb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kandb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512i x = _mm512_setzero_epi32();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kand_mask8 (k1, k2);
+  x = _mm512_mask_add_epi64 (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c
new file mode 100644
index 0000000..a0e96fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kandnb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kandnb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kandn_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c
new file mode 100644
index 0000000..03bbf83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-knotb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "knotb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (45) );
+
+  k2 = _knot_mask8 (k1);
+  x = _mm512_mask_add_pd (x, k1, x, x);
+  x = _mm512_mask_add_pd (x, k2, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c
new file mode 100644
index 0000000..7717aee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-korb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "korb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kor_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c
new file mode 100644
index 0000000..faa974f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kxnorb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kxnorb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxnor_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c b/gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c
new file mode 100644
index 0000000..a21830b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512dq-kxorb-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512dq -O2" } */
+/* { dg-final { scan-assembler-times "kxorb\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512dq_test ()
+{
+  __mmask8 k1, k2, k3;
+  volatile __m512d x = _mm512_setzero_pd();
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kxor_mask8 (k1, k2);
+  x = _mm512_mask_add_pd (x, k3, x, x);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c b/gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c
index 727a589..17b7b29 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kandnw-1.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mavx512f -O2" } */
 /* { dg-final { scan-assembler-times "kandnw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
-/* { dg-final { scan-assembler-times "kmovw" 2 } } */
 
 #include <immintrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c
new file mode 100644
index 0000000..2061f0a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-kunpckbw-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "kunpckbw\[ \\t\]+\[^\{\n\]*%k\[0-7\](?:\n|\[ \\t\]+#)" 1 } } */
+
+#include <immintrin.h>
+
+void
+avx512f_test () {
+  __mmask8 k1, k2;
+  __mmask16 k3;
+  volatile __m512 x = _mm512_setzero_ps(); 
+
+  __asm__( "kmovb %1, %0" : "=k" (k1) : "r" (1) );
+  __asm__( "kmovb %1, %0" : "=k" (k2) : "r" (2) );
+
+  k3 = _kunpackb_mask16 (k1, k2);
+  x = _mm512_mask_add_ps (x, k3, x, x);
+}
-- 
cgit v1.1


From 09955a32593b5ac9aedf40d1a0a9d715e840b5a3 Mon Sep 17 00:00:00 2001
From: Waldemar Brodkorb <wbx@openadk.org>
Date: Mon, 5 Dec 2016 17:48:39 +0000
Subject: * config.gcc (*-*-uclinux*): Enable posix threads.

From-SVN: r243268
---
 gcc/ChangeLog  | 4 ++++
 gcc/config.gcc | 3 +++
 2 files changed, 7 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 02d560d..793f710 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-05  Waldemar Brodkorb  <wbx@openadk.org>
+
+	* config.gcc (*-*-uclinux*): Enable posix threads.
+
 2016-12-05  Andrew Senkevich  <andrew.senkevich@intel.com>
 
 	* config/i386/avx512bwintrin.h: Add new k-mask intrinsics.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index e034bc3..d11e579 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -831,6 +831,9 @@ case ${target} in
 *-*-uclinux*)
   extra_options="$extra_options gnu-user.opt"
   use_gcc_stdint=wrap
+  case ${enable_threads} in
+    "" | yes | posix) thread_file='posix' ;;
+  esac
   tm_defines="$tm_defines DEFAULT_LIBC=LIBC_UCLIBC SINGLE_LIBC"
   ;;
 *-*-rdos*)
-- 
cgit v1.1


From e3cc0f6bb67b012c26d3e7a5e2e76a90456c4d16 Mon Sep 17 00:00:00 2001
From: Jeff Law <law@gcc.gnu.org>
Date: Mon, 5 Dec 2016 10:49:41 -0700
Subject: re PR target/71721 (uclinux posix threads)

	PR target/71721
	* config.gcc (*-*-uclinux*): Enable posix threads.
Adding BZ marker

From-SVN: r243269
---
 gcc/ChangeLog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 793f710..57c2450 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,6 @@
 2016-12-05  Waldemar Brodkorb  <wbx@openadk.org>
 
+	PR target/71721
 	* config.gcc (*-*-uclinux*): Enable posix threads.
 
 2016-12-05  Andrew Senkevich  <andrew.senkevich@intel.com>
-- 
cgit v1.1


From 8907a722fdfbe6992b4c5ee7ee5fa939213154af Mon Sep 17 00:00:00 2001
From: Bill Schmidt <wschmidt@linux.vnet.ibm.com>
Date: Mon, 5 Dec 2016 21:48:27 +0000
Subject: re PR tree-optimization/78646 (incorrect result type for pointer
 addition in slsr)

2016-12-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
	    Stefan Freudenberger  <stefan@reservoir.com>

	PR tree-optimization/78646
	* gimple-ssa-strength-reduction.c (replace_ref): The pointer
	addition used for the memory base expression should have the type
	of the candidate.


Co-Authored-By: Stefan Freudenberger <stefan@reservoir.com>

From-SVN: r243272
---
 gcc/ChangeLog                       | 8 ++++++++
 gcc/gimple-ssa-strength-reduction.c | 2 +-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 57c2450..beef921 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
+	    Stefan Freudenberger  <stefan@reservoir.com>
+
+	PR tree-optimization/78646
+	* gimple-ssa-strength-reduction.c (replace_ref): The pointer
+	addition used for the memory base expression should have the type
+	of the candidate.
+
 2016-12-05  Waldemar Brodkorb  <wbx@openadk.org>
 
 	PR target/71721
diff --git a/gcc/gimple-ssa-strength-reduction.c b/gcc/gimple-ssa-strength-reduction.c
index bdfdb9a..21dcbb0 100644
--- a/gcc/gimple-ssa-strength-reduction.c
+++ b/gcc/gimple-ssa-strength-reduction.c
@@ -1921,7 +1921,7 @@ replace_ref (tree *expr, slsr_cand_t c)
   if (align < TYPE_ALIGN (acc_type))
     acc_type = build_aligned_type (acc_type, align);
 
-  add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr),
+  add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
 			  c->base_expr, c->stride);
   mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
 			 wide_int_to_tree (c->cand_type, c->index));
-- 
cgit v1.1


From 51d20f21607bc03375f687424a8c11c66dd37416 Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Mon, 5 Dec 2016 22:49:31 +0000
Subject: * es.po, fr.po: Update.

From-SVN: r243273
---
 gcc/po/ChangeLog |    4 +
 gcc/po/es.po     |  278 +++-----------
 gcc/po/fr.po     | 1133 ++++++++++++++++++++++--------------------------------
 3 files changed, 524 insertions(+), 891 deletions(-)

diff --git a/gcc/po/ChangeLog b/gcc/po/ChangeLog
index 4b6ca4b..fa76569 100644
--- a/gcc/po/ChangeLog
+++ b/gcc/po/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-05  Joseph Myers  <joseph@codesourcery.com>
+
+	* es.po, fr.po: Update.
+
 2016-12-01  Joseph Myers  <joseph@codesourcery.com>
 
 	* es.po: Update.
diff --git a/gcc/po/es.po b/gcc/po/es.po
index cf04cb3..4172026 100644
--- a/gcc/po/es.po
+++ b/gcc/po/es.po
@@ -35,7 +35,7 @@ msgstr ""
 "Project-Id-Version: gcc 6.2.0\n"
 "Report-Msgid-Bugs-To: http://gcc.gnu.org/bugs.html\n"
 "POT-Creation-Date: 2016-08-19 21:03+0000\n"
-"PO-Revision-Date: 2016-12-01 23:28+0100\n"
+"PO-Revision-Date: 2016-12-04 14:54+0100\n"
 "Last-Translator: Antonio Ceballos <aceballos@gmail.com>\n"
 "Language-Team: Spanish <es@tp.org.es>\n"
 "Language: es\n"
@@ -8972,42 +8972,30 @@ msgid "Known code models (for use with the -mcmodel= option):"
 msgstr "Modelos de código conocidos (para uso con la opción -mcmodel=):"
 
 #: config/i386/i386.opt:296
-#, fuzzy
-#| msgid "Use complex addressing modes"
 msgid "Use given address mode."
-msgstr "Usar modos de direccionamiento complejos."
+msgstr "Usar el modo de dirección dado."
 
 #: config/i386/i386.opt:300
-#, fuzzy
-#| msgid "Known code models (for use with the -mcmodel= option):"
 msgid "Known address mode (for use with the -maddress-mode= option):"
-msgstr "Modelos de código conocidos (para uso con la opción -mcmodel=):"
+msgstr "Modo de dirección conocido (para uso con la opción -maddress-mode=):"
 
 #: config/i386/i386.opt:309
 msgid "%<-mcpu=%> is deprecated; use %<-mtune=%> or %<-march=%> instead"
 msgstr "%<-mcpu=%> es obsoleto; utilice %<-mtune=%> o %<-march%> en su lugar"
 
 #: config/i386/i386.opt:313
-#, fuzzy
-#| msgid "Generate sin, cos, sqrt for FPU"
 msgid "Generate sin, cos, sqrt for FPU."
 msgstr "Genera sin, cos, sqrt para FPU."
 
 #: config/i386/i386.opt:317
-#, fuzzy
-#| msgid "Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack"
 msgid "Always use Dynamic Realigned Argument Pointer (DRAP) to realign stack."
 msgstr "Usar siempre el Puntero de Argumento Realineado Dinámicamente (DRAP) para realinear la pila."
 
 #: config/i386/i386.opt:321
-#, fuzzy
-#| msgid "Return values of functions in FPU registers"
 msgid "Return values of functions in FPU registers."
 msgstr "Devuelve los valores de funciones en registros FPU."
 
 #: config/i386/i386.opt:325
-#, fuzzy
-#| msgid "Generate floating point mathematics using given instruction set"
 msgid "Generate floating point mathematics using given instruction set."
 msgstr "Genera matemáticas de coma flotante usando el conjunto de instrucciones dado."
 
@@ -9016,14 +9004,10 @@ msgid "Valid arguments to -mfpmath=:"
 msgstr "Argumentos válidos para -mfpmath=:"
 
 #: config/i386/i386.opt:362
-#, fuzzy
-#| msgid "Inline all known string operations"
 msgid "Inline all known string operations."
 msgstr "Incluye en línea todas las operaciones de cadenas conocidas."
 
 #: config/i386/i386.opt:366
-#, fuzzy
-#| msgid "Inline memset/memcpy string operations, but perform inline version only for small blocks"
 msgid "Inline memset/memcpy string operations, but perform inline version only for small blocks."
 msgstr "Incluye en línea las operaciones de cadena memset/memcpy, pero realiza la versión inline sólo para los bloques pequeños."
 
@@ -9032,100 +9016,70 @@ msgid "%<-mintel-syntax%> and %<-mno-intel-syntax%> are deprecated; use %<-masm=
 msgstr "%<-mintel-syntax%> y %<-mno-intel-syntax%> son obsoletos; utilice %<-masm=intel%> y %<-masm=att%> en su lugar"
 
 #: config/i386/i386.opt:374
-#, fuzzy
-#| msgid "Use native (MS) bitfield layout"
 msgid "Use native (MS) bitfield layout."
 msgstr "Usa la disposición de campos de bits nativos (MS)."
 
 #: config/i386/i386.opt:394
-#, fuzzy
-#| msgid "Set 80387 floating-point precision to 32-bit"
 msgid "Set 80387 floating-point precision to 32-bit."
 msgstr "Establece la precisión de coma flotante 80387 a 32-bit."
 
 #: config/i386/i386.opt:398
-#, fuzzy
-#| msgid "Set 80387 floating-point precision to 64-bit"
 msgid "Set 80387 floating-point precision to 64-bit."
 msgstr "Establece la precisión de coma flotante 80387 a 64-bit."
 
 #: config/i386/i386.opt:402
-#, fuzzy
-#| msgid "Set 80387 floating-point precision to 80-bit"
 msgid "Set 80387 floating-point precision to 80-bit."
 msgstr "Establece la precisión de coma flotante 80387 a 80-bit."
 
 #: config/i386/i386.opt:406
-#, fuzzy
-#| msgid "Attempt to keep stack aligned to this power of 2"
 msgid "Attempt to keep stack aligned to this power of 2."
 msgstr "Trata de mantenter la pila alineada a esta potencia de 2."
 
 #: config/i386/i386.opt:410
-#, fuzzy
-#| msgid "Assume incoming stack aligned to this power of 2"
 msgid "Assume incoming stack aligned to this power of 2."
 msgstr "Asume que la pila de entrada está alineada a esta potencia de 2."
 
 #: config/i386/i386.opt:414
-#, fuzzy
-#| msgid "Use push instructions to save outgoing arguments"
 msgid "Use push instructions to save outgoing arguments."
 msgstr "Usa instrucciones push para guardar los argumentos de salida."
 
 #: config/i386/i386.opt:418
-#, fuzzy
-#| msgid "Use red-zone in the x86-64 code"
 msgid "Use red-zone in the x86-64 code."
 msgstr "Usa la zona roja en el código x86-64."
 
 #: config/i386/i386.opt:422
-#, fuzzy
-#| msgid "Number of registers used to pass integer arguments"
 msgid "Number of registers used to pass integer arguments."
 msgstr "Número de registros usados para pasar argumentos enteros."
 
 #: config/i386/i386.opt:426
-#, fuzzy
-#| msgid "Alternate calling convention"
 msgid "Alternate calling convention."
-msgstr "Convención de llamada alternativa."
+msgstr "Convenio de llamada alternativa."
 
 #: config/i386/i386.opt:430 config/alpha/alpha.opt:23
-#, fuzzy
-#| msgid "Do not use hardware fp"
 msgid "Do not use hardware fp."
 msgstr "No usa fp de hardware."
 
 #: config/i386/i386.opt:434
-#, fuzzy
-#| msgid "Use SSE register passing conventions for SF and DF mode"
 msgid "Use SSE register passing conventions for SF and DF mode."
-msgstr "Usa las convenciones de paso de registro SSE para los modos SF y DF."
+msgstr "Usa los convenios de paso de registro SSE para los modos SF y DF."
 
 #: config/i386/i386.opt:438
-#, fuzzy
-#| msgid "Realign stack in prologue"
 msgid "Realign stack in prologue."
 msgstr "Realinea la pila en el prólogo."
 
 #: config/i386/i386.opt:442
-#, fuzzy
-#| msgid "Enable stack probing"
 msgid "Enable stack probing."
 msgstr "Habilita la prueba de la pila."
 
 #: config/i386/i386.opt:446
 msgid "Specify memcpy expansion strategy when expected size is known."
-msgstr ""
+msgstr "Especifica la estrategia de expansión de memcpy cuando se conoce el tamaño esperado."
 
 #: config/i386/i386.opt:450
 msgid "Specify memset expansion strategy when expected size is known."
-msgstr ""
+msgstr "Especifica la estrategia de expansión de memset cuando se conoce el tamaño esperado."
 
 #: config/i386/i386.opt:454
-#, fuzzy
-#| msgid "Chose strategy to generate stringop using"
 msgid "Chose strategy to generate stringop using."
 msgstr "Escoge la estrategia para generar stringop using."
 
@@ -9134,8 +9088,6 @@ msgid "Valid arguments to -mstringop-strategy=:"
 msgstr "Argumentos válidos para -mstringop-strategy=:"
 
 #: config/i386/i386.opt:486
-#, fuzzy
-#| msgid "Use given thread-local storage dialect"
 msgid "Use given thread-local storage dialect."
 msgstr "Usa el dialecto de almacenamiento thread-local dado."
 
@@ -9144,30 +9096,23 @@ msgid "Known TLS dialects (for use with the -mtls-dialect= option):"
 msgstr "Dialectos TLS conocidos (para usar con la opción -mtls-dialect=):"
 
 #: config/i386/i386.opt:500
-#, fuzzy, c-format
-#| msgid "Use direct references against %gs when accessing tls data"
+#, c-format
 msgid "Use direct references against %gs when accessing tls data."
-msgstr "Usa referencias directas contra %gs cuando se accesen datos tls."
+msgstr "Usa referencias directas contra %gs cuando se acceden datos tls."
 
 #: config/i386/i386.opt:508
 msgid "Fine grain control of tune features."
-msgstr ""
+msgstr "Control fino de las características de ajuste."
 
 #: config/i386/i386.opt:512
-#, fuzzy
-#| msgid "Allow all ugly features"
 msgid "Clear all tune features."
-msgstr "Desactiva todas las características feas."
+msgstr "Quita todas las características de ajuste."
 
 #: config/i386/i386.opt:519
-#, fuzzy
-#| msgid "Generate code that conforms to the given ABI"
 msgid "Generate code that conforms to Intel MCU psABI."
-msgstr "Genera código que cumpla con la ABI dada."
+msgstr "Genera código que cumpla con Intel MCU psABI."
 
 #: config/i386/i386.opt:523
-#, fuzzy
-#| msgid "Generate code that conforms to the given ABI"
 msgid "Generate code that conforms to the given ABI."
 msgstr "Genera código que cumpla con la ABI dada."
 
@@ -9176,8 +9121,6 @@ msgid "Known ABIs (for use with the -mabi= option):"
 msgstr "ABIs conocidas (para usar con la opción -mabi=):"
 
 #: config/i386/i386.opt:537 config/rs6000/rs6000.opt:189
-#, fuzzy
-#| msgid "Vector library ABI to use"
 msgid "Vector library ABI to use."
 msgstr "ABI de biblioteca de vectores a utilizar."
 
@@ -9186,8 +9129,6 @@ msgid "Known vectorization library ABIs (for use with the -mveclibabi= option):"
 msgstr "ABIs de biblioteca de vectorización conocidas (para usar con la opción -mveclibabi=):"
 
 #: config/i386/i386.opt:551
-#, fuzzy
-#| msgid "Return 8-byte vectors in memory"
 msgid "Return 8-byte vectors in memory."
 msgstr "Devuelve vectores de 8 bytes en memoria."
 
@@ -9207,101 +9148,72 @@ msgstr "Genera la instrucción cld en el prólogo de función."
 msgid "Generate vzeroupper instruction before a transfer of control flow out of"
 msgstr "Genera la instrucción vzeroupper antes de una transferencia de flujo de control fuera de"
 
+# TODO review
 #: config/i386/i386.opt:572
 msgid "Disable Scalar to Vector optimization pass transforming 64-bit integer"
-msgstr ""
+msgstr "Desactiva el paso de optimización de escalar a vector al transformar enteros de 64 bits"
 
 #: config/i386/i386.opt:577
-#, fuzzy
-#| msgid "Do dispatch scheduling if processor is bdver1 or bdver2 and Haifa scheduling"
 msgid "Do dispatch scheduling if processor is bdver1, bdver2, bdver3, bdver4"
-msgstr "Despacha al planificador si el procesador es bdver1 o bdver2 y la planificación es Haifa"
+msgstr "Despacha al planificador si el procesador es bdver1, bdver2, bdver3, bdver4"
 
 #: config/i386/i386.opt:582
 msgid "Use 128-bit AVX instructions instead of 256-bit AVX instructions in the auto-vectorizer."
 msgstr "Usa instrucciones AVX de 128-bit en lugar de instrucciones AVX de 256-bit en el auto-vectorizador."
 
 #: config/i386/i386.opt:588
-#, fuzzy
-#| msgid "Generate 32bit i386 code"
 msgid "Generate 32bit i386 code."
 msgstr "Genera código i386 de 32bit."
 
 #: config/i386/i386.opt:592
-#, fuzzy
-#| msgid "Generate 64bit x86-64 code"
 msgid "Generate 64bit x86-64 code."
 msgstr "Genera código x86-64 de 64bit."
 
 #: config/i386/i386.opt:596
-#, fuzzy
-#| msgid "Generate 32bit x86-64 code"
 msgid "Generate 32bit x86-64 code."
 msgstr "Genera código x86-64 de 32bit."
 
 #: config/i386/i386.opt:600
-#, fuzzy
-#| msgid "Generate 32bit i386 code"
 msgid "Generate 16bit i386 code."
-msgstr "Genera código i386 de 32bit."
+msgstr "Genera código i386 de 16bit."
 
 #: config/i386/i386.opt:604
-#, fuzzy
-#| msgid "Support MMX built-in functions"
 msgid "Support MMX built-in functions."
 msgstr "Admite funciones internas MMX."
 
 #: config/i386/i386.opt:608
-#, fuzzy
-#| msgid "Support 3DNow! built-in functions"
 msgid "Support 3DNow! built-in functions."
 msgstr "Admite funciones internas 3DNow!."
 
 #: config/i386/i386.opt:612
-#, fuzzy
-#| msgid "Support Athlon 3Dnow! built-in functions"
 msgid "Support Athlon 3Dnow! built-in functions."
 msgstr "Admite funciones internas Athlon 3DNow!."
 
 #: config/i386/i386.opt:616
-#, fuzzy
-#| msgid "Support MMX and SSE built-in functions and code generation"
 msgid "Support MMX and SSE built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX y SSE."
 
 #: config/i386/i386.opt:620
-#, fuzzy
-#| msgid "Support MMX, SSE and SSE2 built-in functions and code generation"
 msgid "Support MMX, SSE and SSE2 built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE y SSE2."
 
 #: config/i386/i386.opt:624
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2 and SSE3 built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2 y SSE3."
 
 #: config/i386/i386.opt:628
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3 y SSSE3."
 
 #: config/i386/i386.opt:632
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3 y SSE4.1."
 
 #: config/i386/i386.opt:636 config/i386/i386.opt:640
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 y SSE4.2."
 
 #: config/i386/i386.opt:644
-#, fuzzy
-#| msgid "Do not support SSE4.1 and SSE4.2 built-in functions and code generation"
 msgid "Do not support SSE4.1 and SSE4.2 built-in functions and code generation."
 msgstr "No admite funciones internas y generación de código SSE4.1 y SSE4.2."
 
@@ -9310,100 +9222,68 @@ msgid "%<-msse5%> was removed"
 msgstr "se eliminó %<-msse5%>"
 
 #: config/i386/i386.opt:652
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 and AVX built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2 y AVX."
 
 #: config/i386/i386.opt:656
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
 
 #: config/i386/i386.opt:660
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 y AVX512F."
 
 #: config/i386/i386.opt:664
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512PF built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512PF."
 
 #: config/i386/i386.opt:668
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512ER built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512ER."
 
 #: config/i386/i386.opt:672
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512CD built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512CD."
 
 #: config/i386/i386.opt:676
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512DQ built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512DQ."
 
 #: config/i386/i386.opt:680
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512BW built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512BW."
 
 #: config/i386/i386.opt:684
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VL built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512VL."
 
 #: config/i386/i386.opt:688
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512IFMA built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512IFMA."
 
 #: config/i386/i386.opt:692
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and AVX2 built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and AVX512F and AVX512VBMI built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y AVX2."
+msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, AVX512F y AVX512VBMI."
 
 #: config/i386/i386.opt:696
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX and FMA built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX y FMA."
 
 #: config/i386/i386.opt:700
-#, fuzzy
-#| msgid "Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation"
 msgid "Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código MMX, SSE, SSE2, SSE3 y SSE4A."
 
 #: config/i386/i386.opt:704
-#, fuzzy
-#| msgid "Support FMA4 built-in functions and code generation "
 msgid "Support FMA4 built-in functions and code generation."
-msgstr "Admite funciones internas FMA4 y generación de código ."
+msgstr "Admite funciones internas FMA4 y generación de código."
 
 #: config/i386/i386.opt:708
-#, fuzzy
-#| msgid "Support XOP built-in functions and code generation "
 msgid "Support XOP built-in functions and code generation."
-msgstr "Admite funciones internas XOP y generación de código ."
+msgstr "Admite funciones internas XOP y generación de código."
 
 #: config/i386/i386.opt:712
-#, fuzzy
-#| msgid "Support LWP built-in functions and code generation "
 msgid "Support LWP built-in functions and code generation."
-msgstr "Admite funciones internas LWP y generación de código ."
+msgstr "Admite funciones internas LWP y generación de código."
 
 #: config/i386/i386.opt:716
 msgid "Support code generation of Advanced Bit Manipulation (ABM) instructions."
@@ -9414,36 +9294,28 @@ msgid "Support code generation of popcnt instruction."
 msgstr "Admite la generación de código de la instrucción popcnt."
 
 #: config/i386/i386.opt:724
-#, fuzzy
-#| msgid "Support BMI built-in functions and code generation"
 msgid "Support BMI built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código BMI."
 
 #: config/i386/i386.opt:728
-#, fuzzy
-#| msgid "Support BMI2 built-in functions and code generation"
 msgid "Support BMI2 built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código BMI2."
 
 #: config/i386/i386.opt:732
-#, fuzzy
-#| msgid "Support LZCNT built-in function and code generation"
 msgid "Support LZCNT built-in function and code generation."
 msgstr "Admite funciones internas y generación de código LZCNT."
 
 #: config/i386/i386.opt:736
 msgid "Support Hardware Lock Elision prefixes."
-msgstr ""
+msgstr "Admite prefijos de elisión de bloqueo de hardware."
 
 #: config/i386/i386.opt:740
-#, fuzzy
-#| msgid "no support for induction"
 msgid "Support RDSEED instruction."
-msgstr "no se admite la inducción."
+msgstr "Admite la instrucción RDSEED."
 
 #: config/i386/i386.opt:744
 msgid "Support PREFETCHW instruction."
-msgstr ""
+msgstr "Admite la instrucción PREFETCHW."
 
 #: config/i386/i386.opt:748
 #, fuzzy
@@ -9453,49 +9325,37 @@ msgstr "Admite la generación de código de la instrucción crc32."
 
 #: config/i386/i386.opt:752
 msgid "Support CLFLUSHOPT instructions."
-msgstr ""
+msgstr "Admite las instrucciones CLFLUSHOPT."
 
 #: config/i386/i386.opt:756
-#, fuzzy
-#| msgid "no support for induction"
 msgid "Support CLWB instruction."
-msgstr "no se admite la inducción."
+msgstr "Admite la instrucción CLWB.."
 
 #: config/i386/i386.opt:760
-#, fuzzy
-#| msgid "Support MMX built-in functions"
 msgid "Support PCOMMIT instruction."
-msgstr "Admite funciones internas MMX."
+msgstr "Admite la instrucción PCOMMIT."
 
 #: config/i386/i386.opt:764
 msgid "Support FXSAVE and FXRSTOR instructions."
-msgstr ""
+msgstr "Admite las instrucciones PXSAVE y FXRSTOR."
 
 #: config/i386/i386.opt:768
-#, fuzzy
-#| msgid "Support code generation of movbe instruction."
 msgid "Support XSAVE and XRSTOR instructions."
-msgstr "Admite la generación de código de la instrucción movbe."
+msgstr "Admite las instrucciones XSAV y XRSTOR."
 
 #: config/i386/i386.opt:772
-#, fuzzy
-#| msgid "Support MMX built-in functions"
 msgid "Support XSAVEOPT instruction."
-msgstr "Admite funciones internas MMX."
+msgstr "Admite las instrucciones XSAVEOPT."
 
 #: config/i386/i386.opt:776
-#, fuzzy
-#| msgid "Support MMX built-in functions"
 msgid "Support XSAVEC instructions."
-msgstr "Admite funciones internas MMX."
+msgstr "Admite las instrucciones XSAVEC."
 
 #: config/i386/i386.opt:780
 msgid "Support XSAVES and XRSTORS instructions."
-msgstr ""
+msgstr "Admite las instrucciones XSAVES y XRSTORS."
 
 #: config/i386/i386.opt:784
-#, fuzzy
-#| msgid "Support TBM built-in functions and code generation"
 msgid "Support TBM built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código TBM."
 
@@ -9516,52 +9376,36 @@ msgid "Support code generation of crc32 instruction."
 msgstr "Admite la generación de código de la instrucción crc32."
 
 #: config/i386/i386.opt:804
-#, fuzzy
-#| msgid "Support AES built-in functions and code generation"
 msgid "Support AES built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código AES."
 
 #: config/i386/i386.opt:808
-#, fuzzy
-#| msgid "Support MMX and SSE built-in functions and code generation"
 msgid "Support SHA1 and SHA256 built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX y SSE."
+msgstr "Admite funciones internas y generación de código SHA1 y SHA256."
 
 #: config/i386/i386.opt:812
-#, fuzzy
-#| msgid "Support PCLMUL built-in functions and code generation"
 msgid "Support PCLMUL built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código PCLMUL."
 
 #: config/i386/i386.opt:816
-#, fuzzy
-#| msgid "Encode SSE instructions with VEX prefix"
 msgid "Encode SSE instructions with VEX prefix."
 msgstr "Codifica las instrucciones SSE con el prefijo VEX."
 
 #: config/i386/i386.opt:820
-#, fuzzy
-#| msgid "Support FSGSBASE built-in functions and code generation"
 msgid "Support FSGSBASE built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código FSGSBASE."
 
 #: config/i386/i386.opt:824
-#, fuzzy
-#| msgid "Support RDRND built-in functions and code generation"
 msgid "Support RDRND built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código RDRND."
 
 #: config/i386/i386.opt:828
-#, fuzzy
-#| msgid "Support F16C built-in functions and code generation"
 msgid "Support F16C built-in functions and code generation."
 msgstr "Admite funciones internas y generación de código F16C."
 
 #: config/i386/i386.opt:832
-#, fuzzy
-#| msgid "Support F16C built-in functions and code generation"
 msgid "Support PREFETCHWT1 built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código F16C."
+msgstr "Admite funciones internas y generación de código PREFETCHWT1."
 
 #: config/i386/i386.opt:836
 msgid "Emit profiling counter call at function entry before prologue."
@@ -9569,77 +9413,59 @@ msgstr "Emite llamada a contador de perfil en la entrada de función antes del p
 
 #: config/i386/i386.opt:840
 msgid "Generate __mcount_loc section with all mcount or __fentry__ calls."
-msgstr ""
+msgstr "Genera sección __mcount_loc con todos los mcount o llamadas __fentry__."
 
 #: config/i386/i386.opt:844
 msgid "Generate mcount/__fentry__ calls as nops. To activate they need to be"
-msgstr ""
+msgstr "Genera llamadas mcount/__fentry__ como nops. Para activarlo han de ser"
 
 #: config/i386/i386.opt:849
 msgid "Skip setting up RAX register when passing variable arguments."
-msgstr ""
+msgstr "Salta configurar registro RAX cuando se pasan argumentos variables."
 
 #: config/i386/i386.opt:853
-#, fuzzy
-#| msgid "Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check"
 msgid "Expand 32bit/64bit integer divide into 8bit unsigned integer divide with run-time check."
 msgstr "Expande la división entera de 32bit/64bit en división entera sin signo de 8bit con revisión en tiempo de ejecución."
 
 #: config/i386/i386.opt:857
-#, fuzzy
-#| msgid "Split 32-byte AVX unaligned load"
 msgid "Split 32-byte AVX unaligned load."
 msgstr "Divide load sin alinear AVX de 32-byte."
 
 #: config/i386/i386.opt:861
-#, fuzzy
-#| msgid "Split 32-byte AVX unaligned store"
 msgid "Split 32-byte AVX unaligned store."
 msgstr "Divide store sin alinear AVX de 32-byte."
 
 #: config/i386/i386.opt:865
-#, fuzzy
-#| msgid "Support TBM built-in functions and code generation"
 msgid "Support RTM built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código TBM."
+msgstr "Admite funciones internas y generación de código RTM."
 
 #: config/i386/i386.opt:869
-#, fuzzy
-#| msgid "Support code generation of movbe instruction."
 msgid "Support MPX code generation."
-msgstr "Admite la generación de código de la instrucción movbe."
+msgstr "Admite la generación de código MPX."
 
 #: config/i386/i386.opt:873
-#, fuzzy
-#| msgid "Support MMX and SSE built-in functions and code generation"
 msgid "Support MWAITX and MONITORX built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código MMX y SSE."
+msgstr "Admite funciones internas y generación de código MWAITX y MONITORX."
 
 #: config/i386/i386.opt:877
-#, fuzzy
-#| msgid "Support AES built-in functions and code generation"
 msgid "Support CLZERO built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código AES."
+msgstr "Admite funciones internas y generación de código CLZERO."
 
 #: config/i386/i386.opt:881
-#, fuzzy
-#| msgid "Support PCLMUL built-in functions and code generation"
 msgid "Support PKU built-in functions and code generation."
-msgstr "Admite funciones internas y generación de código PCLMUL."
+msgstr "Admite funciones internas y generación de código PKU."
 
 #: config/i386/i386.opt:885
 msgid "Use given stack-protector guard."
-msgstr ""
+msgstr "USa la guarda de protección de la pila dada."
 
 #: config/i386/i386.opt:889
-#, fuzzy
-#| msgid "Known ARM architectures (for use with the -march= option):"
 msgid "Known stack protector guard (for use with the -mstack-protector-guard= option):"
-msgstr "Arquitecturas ARM conocidas (para usar con la opción -march=):"
+msgstr "Guarda de protección de pila conocida (para usar con la opción -mstack-protector-guard=):"
 
 #: config/i386/i386.opt:899
 msgid "Attempt to avoid generating instruction sequences containing ret bytes."
-msgstr ""
+msgstr "Trata de evitar que se generen secuencias de instrucciones que contengan ret bytes."
 
 #: config/i386/stringop.opt:8
 msgid "the Free Software Foundation; either version 3, or (at your option)"
diff --git a/gcc/po/fr.po b/gcc/po/fr.po
index 6b7518c..96a8789 100644
--- a/gcc/po/fr.po
+++ b/gcc/po/fr.po
@@ -133,7 +133,7 @@ msgstr ""
 "Project-Id-Version: gcc 6.2.0\n"
 "Report-Msgid-Bugs-To: http://gcc.gnu.org/bugs.html\n"
 "POT-Creation-Date: 2016-08-19 21:03+0000\n"
-"PO-Revision-Date: 2016-11-27 16:58+0100\n"
+"PO-Revision-Date: 2016-12-03 14:29+0100\n"
 "Last-Translator: Frédéric Marchal <fmarchal@perso.be>\n"
 "Language-Team: French <traduc@traduc.org>\n"
 "Language: fr\n"
@@ -2284,336 +2284,327 @@ msgstr "Le nombre de registres de chaque classe laissés inutilisés par les dé
 #: params.def:814
 #, no-c-format
 msgid "The max number of reload pseudos which are considered during spilling a non-reload pseudo."
-msgstr "Le nombre maximum de pseudos qui peuvent être rechargés qui doivent être considérés pendant le versage de pseudos qui ne peuvent pas être rechargés."
+msgstr "Le nombre maximum de pseudos rechargeables qui doivent être considérés pendant le versage de pseudos qui ne peuvent pas être rechargés."
 
 #: params.def:819
 #, no-c-format
 msgid "Minimal fall-through edge probability in percentage used to add BB to inheritance EBB in LRA."
-msgstr ""
+msgstr "La probabilité minimale, en pourcents, qu'une arrête enchaîne immédiatement sur une autre pour décider d'ajouter un BB à un EBB d'héritage dans LRA."
 
 #: params.def:827
 #, no-c-format
 msgid "The maximum ratio between array size and switch branches for a switch conversion to take place."
-msgstr ""
+msgstr "La rapport maximum entre la taille du tableau et les branchements du switch pour qu'une conversion du switch ait lieu."
 
 #: params.def:835
 #, no-c-format
 msgid "size of tiles for loop blocking."
-msgstr ""
+msgstr "la taille des blocs pour le découpage de boucle en blocs (loop blocking)."
 
 #: params.def:842
 #, no-c-format
 msgid "maximum number of parameters in a SCoP."
-msgstr "Le nombre maximum de paramètres dans un scope."
+msgstr "le nombre maximum de paramètres dans un SCoP."
 
 #: params.def:849
 #, no-c-format
 msgid "maximum number of basic blocks per function to be analyzed by Graphite."
-msgstr ""
+msgstr "le nombre maximum de blocs de base par fonction à analyser par Graphite."
 
 #: params.def:856
 #, no-c-format
 msgid "maximum number of arrays per scop."
-msgstr "Le nombre maximum de tableaux par scope."
+msgstr "le nombre maximum de tableaux par scop."
 
 #: params.def:863
 #, no-c-format
 msgid "minimal number of loops per function to be analyzed by Graphite."
-msgstr ""
+msgstr "le nombre minimum de boucles par fonction à analyser par Graphite."
 
 #: params.def:868
 #, no-c-format
 msgid "maximum number of isl operations, 0 means unlimited"
-msgstr "Le nombre maximum d'opérations isl, 0 pour illimité"
+msgstr "le nombre maximum d'opérations isl, 0 pour illimité"
 
 #: params.def:874
 #, no-c-format
 msgid "Maximum number of datarefs in loop for building loop data dependencies."
-msgstr ""
+msgstr "Le nombre maximum de datarefs dans une boucle pour construire les dépendances de données de la boucle."
 
 #: params.def:881
 #, no-c-format
 msgid "Max basic blocks number in loop for loop invariant motion."
-msgstr ""
+msgstr "Le nombre max de blocs de base dans une boucle pour un déplacement de boucle invariant."
 
 #: params.def:889
-#, fuzzy, no-c-format
-#| msgid "cannot find file for class %s"
+#, no-c-format
 msgid "use internal function id in profile lookup."
-msgstr "Ne peut repérer le fichier pour la classe %s."
+msgstr "utiliser un ID de fonction interne dans la recherche de profilage."
 
 #: params.def:897
 #, no-c-format
 msgid "track topn target addresses in indirect-call profile."
-msgstr ""
+msgstr "pister les N adresses cibles les plus fréquentes dans le profilage d'appels indirects."
 
 #: params.def:903
-#, fuzzy, no-c-format
-#| msgid "The maximum number of instructions in a single function eligible for inlining"
+#, no-c-format
 msgid "Maximum number of instructions in basic block to be considered for SLP vectorization."
-msgstr "Le nombre maximum d'instructions dans une fonction simple éligible au type enligne"
+msgstr "Le nombre maximum d'instructions dans un bloc de base à considérer pour la vectorisation SLP."
 
 #: params.def:908
 #, no-c-format
 msgid "Min. ratio of insns to prefetches to enable prefetching for a loop with an unknown trip count."
-msgstr ""
+msgstr "Le rapport min entre les insns et les pré-extractions pour activer la pré-extraction dans une boucle avec un nombre inconnu d'itérations."
 
 #: params.def:914
 #, no-c-format
 msgid "Min. ratio of insns to mem ops to enable prefetching in a loop."
-msgstr ""
+msgstr "Le rapport min entre les insns et les opérandes mémoire pour activer la pré-extraction dans une boucle."
 
 #: params.def:921
 #, no-c-format
 msgid "Max. size of var tracking hash tables."
-msgstr ""
+msgstr "La taille max des tables de hachage pour le pistage des variables."
 
 #: params.def:929
 #, no-c-format
 msgid "Max. recursion depth for expanding var tracking expressions."
-msgstr ""
+msgstr "La profondeur de récursion max pour développer les expressions de pistage de variables."
 
 #: params.def:937
 #, no-c-format
 msgid "Max. size of loc list for which reverse ops should be added."
-msgstr ""
+msgstr "La taille max de la liste des emplacements pour laquelle des opérations inverses devraient être ajoutées."
 
 #: params.def:944
 #, no-c-format
 msgid "The minimum UID to be used for a nondebug insn."
-msgstr ""
+msgstr "Le UID minimum à utiliser pour une insn qui n'est pas pour le déboguage."
 
 #: params.def:949
 #, no-c-format
 msgid "Maximum allowed growth of size of new parameters ipa-sra replaces a pointer to an aggregate with."
-msgstr ""
+msgstr "La croissance maximale de la taille autorisée quand IPA-SRA remplace un pointeur vers un agrégat par de nouveaux paramètres."
 
 #: params.def:955
 #, no-c-format
 msgid "Size in bytes after which thread-local aggregates should be instrumented with the logging functions instead of save/restore pairs."
-msgstr ""
+msgstr "La taille en octets au delà de laquelle des agrégats locaux au thread doivent être manipulés par les fonctions de journalisation au lieu de paires de save/restore."
 
 #: params.def:962
 #, no-c-format
 msgid "Maximum size, in storage units, of an aggregate which should be considered for scalarization when compiling for speed."
-msgstr ""
+msgstr "La taille maximale, en unité de stockage, d'un agrégat qui devrait être considéré pour une conversion scalaire lors d'une compilation pour la vitesse."
 
 #: params.def:968
 #, no-c-format
 msgid "Maximum size, in storage units, of an aggregate which should be considered for scalarization when compiling for size."
-msgstr ""
+msgstr "La taille maximale, en unité de stockage, d'un agrégat qui devrait être considéré pour une conversion scalaire lors d'une compilation pour l'espace."
 
 #: params.def:974
 #, no-c-format
 msgid "Maximum size of a list of values associated with each parameter for interprocedural constant propagation."
-msgstr ""
+msgstr "La taille maximale d'une liste de valeurs associées à chaque paramètre pour la propagation de constantes interprocédurale."
 
 #: params.def:980
 #, no-c-format
 msgid "Threshold ipa-cp opportunity evaluation that is still considered beneficial to clone.."
-msgstr ""
+msgstr "Le seuil de l'évaluation de l'opportunité de IPA-CP qui est encore considérée comme bénéfique au clone."
 
 #: params.def:986
 #, no-c-format
 msgid "Percentage penalty the recursive functions will receive when they are evaluated for cloning.."
-msgstr ""
+msgstr "Le pourcentage de pénalité que des fonctions récursives recevront quand elles seront évaluées pour un clonage."
 
 #: params.def:992
 #, no-c-format
 msgid "Percentage penalty functions containg a single call to another function will receive when they are evaluated for cloning.."
-msgstr ""
+msgstr "Le pourcentage de pénalité que des fonctions contenant un unique appel à une autre fonction recevront quand elles seront évaluées pour un clonage."
 
 #: params.def:998
 #, no-c-format
 msgid "Maximum number of aggregate content items for a parameter in jump functions and lattices."
-msgstr ""
+msgstr "Le nombre maximum d'éléments contenus dans un agrégat d'un paramètre dans des fonctions de saut ou des maillages."
 
 #: params.def:1004
 #, no-c-format
 msgid "Compile-time bonus IPA-CP assigns to candidates which make loop bounds or strides known.."
-msgstr ""
+msgstr "Bonus à la compilation que IPA-CP assigne aux candidats qui rendent les limites ou les pas des boucles connus."
 
 #: params.def:1010
 #, no-c-format
 msgid "Compile-time bonus IPA-CP assigns to candidates which make an array index known.."
-msgstr ""
+msgstr "Bonus à la compilation que IPA-CP assigne aux candidats qui rendent connus l'index dans un tableau."
 
 #: params.def:1016
 #, no-c-format
 msgid "Maximum number of statements that will be visited by IPA formal parameter analysis based on alias analysis in any given function."
-msgstr ""
+msgstr "Le nombre maximum d'expressions qui seront visitées par l'analyse des paramètres formels de IPA basé sur l'analyse d'alias dans une fonction quelconque."
 
 #: params.def:1024
 #, no-c-format
 msgid "Number of partitions the program should be split to."
-msgstr ""
+msgstr "Le nombre de partitions dans lesquelles il faudrait scinder le programme."
 
 #: params.def:1029
 #, no-c-format
 msgid "Minimal size of a partition for LTO (in estimated instructions)."
-msgstr ""
+msgstr "La taille minimale d'une partition pour LTO (en instructions estimées)."
 
 #: params.def:1036
-#, fuzzy, no-c-format
-#| msgid "The maximum number of instructions to consider to fill a delay slot"
+#, no-c-format
 msgid "Maximum number of namespaces to search for alternatives when name lookup fails."
-msgstr "Le nombre maximum d'instructions à considérer pour remplir une slot délai"
+msgstr "Le nombre maximum d'espaces de noms à parcourir pour chercher des alternatives quand la recherche de nom a échoué."
 
 #: params.def:1043
 #, no-c-format
 msgid "Maximum number of conditional store pairs that can be sunk."
-msgstr ""
+msgstr "Le nombre maximum de paires de stockage conditionnel qui peuvent être descendues."
 
 #: params.def:1051
 #, no-c-format
 msgid "The smallest number of different values for which it is best to use a jump-table instead of a tree of conditional branches, if 0, use the default for the machine."
-msgstr ""
+msgstr "Le plus petit nombre de valeurs différentes pour lequel il est préférable d'utiliser une table de sauts plutôt qu'un arbre de branchements conditionnels. Si 0, utilise le choix par défaut pour la machine."
 
 #: params.def:1059
 #, no-c-format
 msgid "Allow new data races on stores to be introduced."
-msgstr ""
+msgstr "Permet l'introduction de compétitions sur de nouvelles données durant un stockage."
 
 #: params.def:1065
 #, no-c-format
 msgid "Set the maximum number of instructions executed in parallel in reassociated tree. If 0, use the target dependent heuristic.."
-msgstr ""
+msgstr "Fixe le nombre maximum d'instructions exécutées en parallèle dans un arbre ré-associé. Si 0, utilise l'heuristique dépendant de la cible."
 
 #: params.def:1071
 #, no-c-format
 msgid "Maximum amount of similar bbs to compare a bb with."
-msgstr ""
+msgstr "La quantité maximale de bbs similaires à comparer avec un bb."
 
 #: params.def:1076
 #, no-c-format
 msgid "Maximum amount of iterations of the pass over a function."
-msgstr ""
+msgstr "Le nombre maximum d'itérations du passage sur une fonction."
 
 #: params.def:1083
 #, no-c-format
 msgid "Maximum number of strings for which strlen optimization pass will track string lengths."
-msgstr ""
+msgstr "Le nombre maximum de chaînes pour lesquelles l'optimisation de strlen conserve la longueur des chaînes."
 
 #: params.def:1090
 #, no-c-format
 msgid "Which -fsched-pressure algorithm to apply."
-msgstr ""
+msgstr "Quel algorithme de -fsched-pressure appliquer."
 
 #: params.def:1096
-#, fuzzy, no-c-format
-#| msgid "Perform strength reduction optimizations"
+#, no-c-format
 msgid "Maximum length of candidate scans for straight-line strength reduction."
-msgstr "Exécuter un réduction en force des optimisations"
+msgstr "La longueur maximale de la recherche dans les candidats pour la simplification des instructions dupliquées lors de l'aplatissage d'une boucle (Straight-Line Strength Reduction)."
 
 #: params.def:1102
 #, no-c-format
 msgid "Enable asan stack protection."
-msgstr "Autoriser la protection de pile asan"
+msgstr "Autoriser ASan à protéger la pile."
 
 #: params.def:1107
 #, no-c-format
 msgid "Enable asan globals protection."
-msgstr "Autoriser la protection des constantes asan."
+msgstr "Autoriser ASan à protéger les globales."
 
 #: params.def:1112
-#, fuzzy, no-c-format
-#| msgid "Enable parallel instructions"
+#, no-c-format
 msgid "Enable asan store operations protection."
-msgstr "Autoriser les instructions parallèles"
+msgstr "Autoriser ASan à protéger les opérations d'écriture."
 
 #: params.def:1117
-#, fuzzy, no-c-format
-#| msgid "Enable parallel instructions"
+#, no-c-format
 msgid "Enable asan load operations protection."
-msgstr "Autoriser les instructions parallèles"
+msgstr "Autoriser ASan à protéger les opérations de chargement."
 
 #: params.def:1122
-#, fuzzy, no-c-format
-#| msgid "Enable function profiling"
+#, no-c-format
 msgid "Enable asan builtin functions protection."
-msgstr "Autoriser le profilage de fonction"
+msgstr "Autoriser ASan à protéger les fonctions intégrées."
 
 #: params.def:1127
 #, no-c-format
 msgid "Enable asan detection of use-after-return bugs."
-msgstr ""
+msgstr "Autoriser la détection par ASan des bogues d'utilisation après retour."
 
 #: params.def:1132
 #, no-c-format
 msgid "Use callbacks instead of inline code if number of accesses in function becomes greater or equal to this number."
-msgstr ""
+msgstr "Utiliser une fonction de rappel au lieu de code en ligne si le nombre d'accès dans la fonction devient plus grand ou égal à ce nombre."
 
 #: params.def:1138
 #, no-c-format
 msgid "Maximum number of nested calls to search for control dependencies during uninitialized variable analysis."
-msgstr ""
+msgstr "Nombre maximum d'appels imbriqués pour rechercher une dépendance de contrôle pendant l'analyse d'une variable non initialisée."
 
 #: params.def:1144
 #, no-c-format
 msgid "Maximum number of statements to be included into a single static constructor generated by Pointer Bounds Checker."
-msgstr ""
+msgstr "Le nombre maximum d'expressions à inclure dans un seul constructeur statique généré par Pointer Bounds Checker."
 
 #: params.def:1150
 #, no-c-format
 msgid "Scale factor to apply to the number of statements in a threading path when comparing to the number of (scaled) blocks."
-msgstr ""
+msgstr "Facteur d'échelle à appliquer au nombre d'expressions dans un chemin de sauts enchaînés lors de la comparaison de blocs (mis à l'échelle)."
 
 #: params.def:1155
 #, no-c-format
 msgid "Maximum number of arguments a PHI may have before the FSM threader will not try to thread through its block."
-msgstr ""
+msgstr "Le nombre maximum d'arguments qu'un PHI peut avoir avant que le FSM qui est responsable de l'enchaînement arrête d'essayer d'enchaîner les sauts dans ses blocs."
 
 #: params.def:1160
 #, no-c-format
 msgid "Scale factor to apply to the number of blocks in a threading path when comparing to the number of (scaled) statements."
-msgstr ""
+msgstr "Facteur d'échelle à appliquer au nombre de blocs dans un chemin de sauts enchaînés lors de la comparaison avec le nombre d'expressions (mises à l'échelle)."
 
 #: params.def:1165
 #, no-c-format
 msgid "Maximum number of instructions to copy when duplicating blocks on a finite state automaton jump thread path."
-msgstr ""
+msgstr "Le nombre maximum d'instructions à copier en dupliquant des blocs le long du chemin de sauts enchaînés d'un automate à états finis."
 
 #: params.def:1170
 #, no-c-format
 msgid "Maximum number of basic blocks on a finite state automaton jump thread path."
-msgstr ""
+msgstr "Le nombre maximum de blocs de base sur le chemin de sauts enchaînés d'un automate à états finis."
 
 #: params.def:1175
-#, fuzzy, no-c-format
-#| msgid "The maximum number of instructions to consider to fill a delay slot"
+#, no-c-format
 msgid "Maximum number of new jump thread paths to create for a finite state automaton."
-msgstr "Le nombre maximum d'instructions à considérer pour remplir une slot délai"
+msgstr "Le nombre maximum de nouveaux chemins pour enchaîner des sauts pour un automate à états finis."
 
 #: params.def:1180
 #, no-c-format
 msgid "Chunk size of omp schedule for loops parallelized by parloops."
-msgstr ""
+msgstr "Taille des fragments de l'ordonnanceur OMP pour des boucles parallélisées par parloops."
 
 #: params.def:1185
 #, no-c-format
 msgid "Schedule type of omp schedule for loops parallelized by parloops (static, dynamic, guided, auto, runtime)."
-msgstr ""
+msgstr "Type d'ordonnancement de l'ordonnanceur OMP pour des boucles parallélisées par parloops (static, dynamic, guided, auto, runtime)."
 
 #: params.def:1192
 #, no-c-format
 msgid "Maximum recursion depth allowed when querying a property of an SSA name."
-msgstr ""
+msgstr "La profondeur de récursion maximum permise en interrogeant une propriété d'un nom SSA."
 
 #: params.def:1198
-#, fuzzy, no-c-format
-#| msgid "The maximum number of instructions in a single function eligible for inlining"
+#, no-c-format
 msgid "Maximum number of insns in a basic block to consider for RTL if-conversion."
-msgstr "Le nombre maximum d'instructions dans une fonction simple éligible au type enligne"
+msgstr "Le nombre maximum d'insns dans un bloc de base pour considérer une conversion du if par RTL."
 
 #: params.def:1204
 #, no-c-format
 msgid "Level of hsa debug stores verbosity"
-msgstr ""
+msgstr "Le niveau de verbosité des stockages de déboguage de hsa"
 
 #: params.def:1209
 #, no-c-format
 msgid "Maximum number of may-defs visited when devirtualizing speculatively"
-msgstr ""
+msgstr "Le nombre maximum de may-defs visités lors d'une dévirtualisation spéculative"
 
 #: c-family/c-format.c:417
 msgid "format"
@@ -2729,7 +2720,7 @@ msgstr "fanion « q »"
 
 #: c-family/c-format.c:593
 msgid "the 'q' diagnostic flag"
-msgstr "le fanion « d » de diagnostique"
+msgstr "le fanion « q » de diagnostique"
 
 #: c-family/c-format.c:606 config/i386/msformat-c.c:63
 msgid "assignment suppression"
@@ -2737,7 +2728,7 @@ msgstr "suppression d'affectation"
 
 #: c-family/c-format.c:606 config/i386/msformat-c.c:63
 msgid "the assignment suppression scanf feature"
-msgstr "options de scanf pour la suppression d'affectation"
+msgstr "la fonctionnalité de suppression d'affectation de scanf"
 
 #: c-family/c-format.c:607 config/i386/msformat-c.c:64
 msgid "'a' flag"
@@ -2865,19 +2856,19 @@ msgstr "largeur de champ dans le format de strfmon"
 
 #: c-family/c-format.c:660
 msgid "left precision"
-msgstr "précision de gauche"
+msgstr "précision à gauche"
 
 #: c-family/c-format.c:660
 msgid "left precision in strfmon format"
-msgstr "précision de gauche dans le format de strfmon"
+msgstr "précision à gauche dans le format de strfmon"
 
 #: c-family/c-format.c:661
 msgid "right precision"
-msgstr "précision de droite"
+msgstr "précision à droite"
 
 #: c-family/c-format.c:661
 msgid "right precision in strfmon format"
-msgstr "précision de droite dans le format de strfmon"
+msgstr "précision à droite dans le format de strfmon"
 
 #: c-family/c-format.c:662
 msgid "length modifier in strfmon format"
@@ -2892,7 +2883,7 @@ msgstr "<ligne-de-commande>"
 #: config/arm/arm.c:21997 config/nios2/nios2.c:2642
 #, c-format
 msgid "Unsupported operand for code '%c'"
-msgstr "opérande non supportée pour le code « %c »"
+msgstr "Opérande non supporté pour le code « %c »"
 
 #: config/aarch64/aarch64.c:4463 config/aarch64/aarch64.c:4479
 #: config/aarch64/aarch64.c:4492 config/aarch64/aarch64.c:4504
@@ -2900,18 +2891,18 @@ msgstr "opérande non supportée pour le code « %c »"
 #: config/aarch64/aarch64.c:4591 config/aarch64/aarch64.c:4794
 #, c-format
 msgid "invalid operand for '%%%c'"
-msgstr "opérande invalide pour « %%%c »"
+msgstr "opérande invalide pour « %%%c »"
 
 #: config/aarch64/aarch64.c:4558 config/aarch64/aarch64.c:4571
 #: config/aarch64/aarch64.c:4581
 #, c-format
 msgid "incompatible floating point / vector register operand for '%%%c'"
-msgstr ""
+msgstr "opérande en virgule flottante ou registre vecteur incompatible pour « %%%c »"
 
 #: config/aarch64/aarch64.c:4627 config/arm/arm.c:22504
 #, c-format
 msgid "missing operand"
-msgstr "opérande manquante"
+msgstr "opérande manquant"
 
 #: config/aarch64/aarch64.c:4689
 #, c-format
@@ -2926,13 +2917,13 @@ msgstr "opérande invalide"
 #: config/aarch64/aarch64.c:4805
 #, c-format
 msgid "invalid operand prefix '%%%c'"
-msgstr "préfixe d'opérande invalide « %%%c »"
+msgstr "préfixe d'opérande invalide « %%%c »"
 
 #: config/alpha/alpha.c:5102 config/i386/i386.c:17140
 #: config/rs6000/rs6000.c:21150 config/sparc/sparc.c:8749
 #, c-format
 msgid "'%%&' used without any local dynamic TLS references"
-msgstr ""
+msgstr "« %%& » utilisé sans référence à un TLS dynamique local"
 
 #: config/alpha/alpha.c:5160 config/bfin/bfin.c:1423
 #, c-format
@@ -3078,16 +3069,14 @@ msgid "invalid shift operand"
 msgstr "opérande shift invalide"
 
 #: config/arm/arm.c:21835 config/arm/arm.c:21853
-#, fuzzy, c-format
-#| msgid "Generate char instructions"
+#, c-format
 msgid "predicated Thumb instruction"
-msgstr "Générer des instructions « char »"
+msgstr "instruction Thumb établie"
 
 #: config/arm/arm.c:21841
-#, fuzzy, c-format
-#| msgid "ret instruction not implemented"
+#, c-format
 msgid "predicated instruction in conditional sequence"
-msgstr "instruction ret n'est pas implantée"
+msgstr "instruction établie dans la séquence conditionnelle"
 
 #: config/arm/arm.c:22074 config/arm/arm.c:22096 config/arm/arm.c:22106
 #: config/arm/arm.c:22116 config/arm/arm.c:22126 config/arm/arm.c:22165
@@ -3113,66 +3102,53 @@ msgstr "instruction jamais exécutée"
 #: config/arm/arm.c:22199
 #, c-format
 msgid "obsolete Maverick format code '%c'"
-msgstr ""
+msgstr "code de format Maverick « %c » obsolète"
 
 #: config/arm/arm.c:23618
-#, fuzzy
-#| msgid "function returns an aggregate"
 msgid "function parameters cannot have __fp16 type"
-msgstr "fonction retourne un agrégat"
+msgstr "les paramètres de fonction ne peuvent pas avoir le type __fp16"
 
 #: config/arm/arm.c:23628
-#, fuzzy
-#| msgid "function does not return string type"
 msgid "functions cannot return __fp16 type"
-msgstr "fonction ne retourne pas un type « string »"
+msgstr "les fonctions ne peuvent pas retourner le type __fp16"
 
 #: config/avr/avr.c:2124
-#, fuzzy, c-format
-#| msgid "read-write constraint does not allow a register"
+#, c-format
 msgid "address operand requires constraint for X, Y, or Z register"
-msgstr "contrainte de lecture-écriture ne permet pas de registre"
+msgstr "l'opérande d'adresse requiert une contrainte sur le registre X, Y ou Z"
 
 #: config/avr/avr.c:2282
-#, fuzzy
-#| msgid "output operand %d must use `&' constraint"
 msgid "operands to %T/%t must be reg + const_int:"
-msgstr "opérande de sortie %d doit utiliser la contrainte « & »"
+msgstr "les opérandes de %T/%t doivent être reg + const_int:"
 
 #: config/avr/avr.c:2332 config/avr/avr.c:2399
-#, fuzzy
-#| msgid "bad address, not (reg+disp):"
 msgid "bad address, not an I/O address:"
-msgstr "adresse erronée, pas (reg+disp):"
+msgstr "mauvaise adresse, pas une adresse E/S:"
 
 #: config/avr/avr.c:2341
-#, fuzzy
-#| msgid "address offset not a constant"
 msgid "bad address, not a constant:"
-msgstr "décalage d'adresse n'est pas une constante"
+msgstr "mauvaise adresse, pas une constante:"
 
 #: config/avr/avr.c:2359 config/avr/avr.c:2366
 msgid "bad address, not (reg+disp):"
-msgstr "adresse erronée, pas (reg+disp):"
+msgstr "mauvaise adresse, pas (reg+disp):"
 
 #: config/avr/avr.c:2373
-#, fuzzy
-#| msgid "bad address, not (reg+disp):"
 msgid "bad address, not post_inc or pre_dec:"
-msgstr "adresse erronée, pas (reg+disp):"
+msgstr "mauvaise adresse, pas post_inc ou pre_dec:"
 
 #: config/avr/avr.c:2385
 msgid "internal compiler error.  Bad address:"
-msgstr "erreur internal du compilateur. Adresse erronée:"
+msgstr "erreur interne du compilateur. Mauvaise adresse:"
 
 #: config/avr/avr.c:2418
 #, c-format
 msgid "Unsupported code '%c' for fixed-point:"
-msgstr ""
+msgstr "Code « %c » non supporté en virgule fixe:"
 
 #: config/avr/avr.c:2426
 msgid "internal compiler error.  Unknown mode:"
-msgstr "erreur internal du compilateur. Mode inconnu:"
+msgstr "erreur interne du compilateur. Mode inconnu:"
 
 #: config/avr/avr.c:3419 config/avr/avr.c:4349 config/avr/avr.c:4798
 msgid "invalid insn:"
@@ -3183,35 +3159,34 @@ msgstr "insn invalide :"
 #: config/avr/avr.c:4201 config/avr/avr.c:4485 config/avr/avr.c:4691
 #: config/avr/avr.c:4855 config/avr/avr.c:4949 config/avr/avr.c:5145
 msgid "incorrect insn:"
-msgstr "insn incorrect :"
+msgstr "insn incorrecte :"
 
 #: config/avr/avr.c:3717 config/avr/avr.c:3992 config/avr/avr.c:4272
 #: config/avr/avr.c:4557 config/avr/avr.c:4737 config/avr/avr.c:5005
 #: config/avr/avr.c:5203
 msgid "unknown move insn:"
-msgstr "insn de déplacement inconnu :"
+msgstr "insn de déplacement inconnue :"
 
 #: config/avr/avr.c:5634
 msgid "bad shift insn:"
-msgstr "décalage insn erroné :"
+msgstr "insn de décalage erronée :"
 
 #: config/avr/avr.c:5742 config/avr/avr.c:6223 config/avr/avr.c:6638
 msgid "internal compiler error.  Incorrect shift:"
-msgstr "erreur internal du compilateur. Décalage incorrect:"
+msgstr "erreur interne du compilateur. Décalage incorrect:"
 
 #: config/avr/avr.c:7975
-#, fuzzy
-#| msgid "unsupported version"
 msgid "unsupported fixed-point conversion"
-msgstr "version non reconnue"
+msgstr "conversion en virgule fixe non supportée"
 
 #: config/avr/driver-avr.c:71
-#, fuzzy, c-format
-#| msgid "unknown spec function `%s'"
+#, c-format
 msgid ""
 "Running spec function '%s' with %d args\n"
 "\n"
-msgstr "spécification de fonction inconnue « %s »:"
+msgstr ""
+"Exécution de la fonction spec « %s » avec %d args\n"
+"\n"
 
 #: config/avr/driver-avr.c:118
 #, c-format
@@ -3220,6 +3195,9 @@ msgid ""
 "'%s': specfile='%s'\n"
 "\n"
 msgstr ""
+"« %s »: mmcu=« %s »\n"
+"« %s »: specfile=« %s »\n"
+"\n"
 
 #: config/bfin/bfin.c:1385
 #, c-format
@@ -3254,16 +3232,12 @@ msgid "invalid operand for 'b' modifier"
 msgstr "opérande invalide pour le modificateur « b »"
 
 #: config/cris/cris.c:761
-#, fuzzy
-#| msgid "invalid operand for 'b' modifier"
 msgid "invalid operand for 'o' modifier"
-msgstr "opérande invalide pour le modificateur « b »"
+msgstr "opérande invalide pour le modificateur « o »"
 
 #: config/cris/cris.c:780
-#, fuzzy
-#| msgid "invalid operand for 'b' modifier"
 msgid "invalid operand for 'O' modifier"
-msgstr "opérande invalide pour le modificateur « b »"
+msgstr "opérande invalide pour le modificateur « O »"
 
 #: config/cris/cris.c:813
 msgid "invalid operand for 'p' modifier"
@@ -3279,7 +3253,7 @@ msgstr "opérande invalide pour le modificateur « H »"
 
 #: config/cris/cris.c:926
 msgid "bad register"
-msgstr "registre erroné"
+msgstr "mauvais registre"
 
 #: config/cris/cris.c:970
 msgid "invalid operand for 'e' modifier"
@@ -3303,15 +3277,15 @@ msgstr "opérande invalide pour le modificateur « T »"
 
 #: config/cris/cris.c:1116 config/ft32/ft32.c:230 config/moxie/moxie.c:173
 msgid "invalid operand modifier letter"
-msgstr "opérande invalide pour le modificateur de lettre"
+msgstr "lettre de modificateur d'opérande invalide"
 
 #: config/cris/cris.c:1170
 msgid "unexpected multiplicative operand"
-msgstr "opérande multiplicative inattendue"
+msgstr "opérande multiplicatif inattendu"
 
 #: config/cris/cris.c:1190 config/ft32/ft32.c:253 config/moxie/moxie.c:198
 msgid "unexpected operand"
-msgstr "opérande inattendue"
+msgstr "opérande inattendu"
 
 #: config/cris/cris.c:1229 config/cris/cris.c:1239
 msgid "unrecognized address"
@@ -3319,21 +3293,21 @@ msgstr "adresse non reconnue"
 
 #: config/cris/cris.c:2559
 msgid "unrecognized supposed constant"
-msgstr "supposée constante non reconnue"
+msgstr "constante supposée non reconnue"
 
 #: config/cris/cris.c:2958 config/cris/cris.c:3016
 msgid "unexpected side-effects in address"
-msgstr "effets de bord inattendue dans l'adresse"
+msgstr "effets de bord inattendus dans l'adresse"
 
 #. Can't possibly get anything else for a function-call, right?
 #: config/cris/cris.c:3844
 msgid "unidentifiable call op"
-msgstr ""
+msgstr "opérande d'appel non identifiable"
 
 #: config/cris/cris.c:3906
 #, c-format
 msgid "PIC register isn't set up"
-msgstr "le registre n'est pas initialisé"
+msgstr "le registre PIC n'est pas initialisé"
 
 #: config/fr30/fr30.c:496
 #, c-format
@@ -3343,12 +3317,12 @@ msgstr "fr30_print_operand_address: adresse non traitée"
 #: config/fr30/fr30.c:520
 #, c-format
 msgid "fr30_print_operand: unrecognized %%p code"
-msgstr "fr30_print_operand: code %%p non reconnue"
+msgstr "fr30_print_operand: code %%p non reconnu"
 
 #: config/fr30/fr30.c:540
 #, c-format
 msgid "fr30_print_operand: unrecognized %%b code"
-msgstr "fr30_print_operand: code %%b non reconnue"
+msgstr "fr30_print_operand: code %%b non reconnu"
 
 #: config/fr30/fr30.c:561
 #, c-format
@@ -3358,7 +3332,7 @@ msgstr "fr30_print_operand: code %%B non reconnu"
 #: config/fr30/fr30.c:569
 #, c-format
 msgid "fr30_print_operand: invalid operand to %%A code"
-msgstr "fr30_print_operand: opérande invalide pour code %%A"
+msgstr "fr30_print_operand: opérande invalide pour le code %%A"
 
 #: config/fr30/fr30.c:586
 #, c-format
@@ -3382,23 +3356,17 @@ msgid "fr30_print_operand: unhandled MEM"
 msgstr "fr30_print_operand: MEM non traité"
 
 #: config/frv/frv.c:2507
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand_address:"
 msgid "bad insn to frv_print_operand_address:"
-msgstr "insn erroné pour frv_print_operand_addresse:"
+msgstr "mauvaise insn pour frv_print_operand_addresse:"
 
 #: config/frv/frv.c:2518
-#, fuzzy
-#| msgid "Bad register to frv_print_operand_memory_reference_reg:"
 msgid "bad register to frv_print_operand_memory_reference_reg:"
-msgstr "registre erroné pour frv_print_operand_memory_reference_reg:"
+msgstr "mauvais registre pour frv_print_operand_memory_reference_reg:"
 
 #: config/frv/frv.c:2557 config/frv/frv.c:2567 config/frv/frv.c:2576
 #: config/frv/frv.c:2597 config/frv/frv.c:2602
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand_memory_reference:"
 msgid "bad insn to frv_print_operand_memory_reference:"
-msgstr "insn erroné pour frv_print_operand_memory_reference:"
+msgstr "mauvaise insn pour frv_print_operand_memory_reference:"
 
 #: config/frv/frv.c:2688
 #, c-format
@@ -3406,92 +3374,64 @@ msgid "bad condition code"
 msgstr "mauvais code de condition"
 
 #: config/frv/frv.c:2762
-#, fuzzy
-#| msgid "Bad insn in frv_print_operand, bad const_double"
 msgid "bad insn in frv_print_operand, bad const_double"
-msgstr "insn erroné dans frv_print_operand, bad const_double"
+msgstr "mauvaise insn dans frv_print_operand, mauvais const_double"
 
 #: config/frv/frv.c:2823
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, 'e' modifier:"
 msgid "bad insn to frv_print_operand, 'e' modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « e »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « e »:"
 
 #: config/frv/frv.c:2831
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, 'F' modifier:"
 msgid "bad insn to frv_print_operand, 'F' modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « F »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « F »:"
 
 #: config/frv/frv.c:2847
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, 'f' modifier:"
 msgid "bad insn to frv_print_operand, 'f' modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « f »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « f »:"
 
 #: config/frv/frv.c:2861
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, 'C' modifier:"
 msgid "bad insn to frv_print_operand, 'g' modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « C »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « g »:"
 
 #: config/frv/frv.c:2909
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, 'L' modifier:"
 msgid "bad insn to frv_print_operand, 'L' modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « L »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « L »:"
 
 #: config/frv/frv.c:2922
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, 'M/N' modifier:"
 msgid "bad insn to frv_print_operand, 'M/N' modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « M/N »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « M/N »:"
 
 #: config/frv/frv.c:2943
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, 'O' modifier:"
 msgid "bad insn to frv_print_operand, 'O' modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « O »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « O »:"
 
 #: config/frv/frv.c:2961
-#, fuzzy
-#| msgid "Bad insn to frv_print_operand, P modifier:"
 msgid "bad insn to frv_print_operand, P modifier:"
-msgstr "insn erroné pour frv_print_operand, modificateur « P »:"
+msgstr "mauvaise insn pour frv_print_operand, modificateur « P »:"
 
 #: config/frv/frv.c:2981
-#, fuzzy
-#| msgid "Bad insn in frv_print_operand, z case"
 msgid "bad insn in frv_print_operand, z case"
-msgstr "insn erroné dans frv_print_operand, case z"
+msgstr "mauvaise insn dans frv_print_operand, case z"
 
 #: config/frv/frv.c:3012
-#, fuzzy
-#| msgid "Bad insn in frv_print_operand, 0 case"
 msgid "bad insn in frv_print_operand, 0 case"
-msgstr "insn erroné dans frv_print_operand, case 0"
+msgstr "mauvaise insn dans frv_print_operand, case 0"
 
 #: config/frv/frv.c:3017
 msgid "frv_print_operand: unknown code"
 msgstr "frv_print_operand: code inconnu"
 
 #: config/frv/frv.c:4421
-#, fuzzy
-#| msgid "Bad output_move_single operand"
 msgid "bad output_move_single operand"
-msgstr "opérande output_move_single erronée"
+msgstr "mauvais opérande output_move_single"
 
 #: config/frv/frv.c:4548
-#, fuzzy
-#| msgid "Bad output_move_double operand"
 msgid "bad output_move_double operand"
-msgstr "opérande output_move_double erronée"
+msgstr "mauvais opérande output_move_double"
 
 #: config/frv/frv.c:4690
-#, fuzzy
-#| msgid "Bad output_condmove_single operand"
 msgid "bad output_condmove_single operand"
-msgstr "opérande output_condmove_single erronée"
+msgstr "mauvais opérande output_condmove_single"
 
 #: config/i386/i386.c:16060
 #, c-format
@@ -3516,7 +3456,7 @@ msgstr "type opérande invalide utilisé avec le code d'opérande « Z »"
 #: config/i386/i386.c:16874
 #, c-format
 msgid "invalid operand size for operand code 'Z'"
-msgstr "taille opérande invalide pour le code d'opérande « Z »"
+msgstr "taille d'opérande invalide pour le code d'opérande « Z »"
 
 #: config/i386/i386.c:16950
 #, c-format
@@ -3534,10 +3474,9 @@ msgid "operand is not a condition code, invalid operand code '%c'"
 msgstr "l'opérande n'est pas du code de condition, code d'opérande invalide « %c »"
 
 #: config/i386/i386.c:17053
-#, fuzzy, c-format
-#| msgid "operand is neither a constant nor a condition code, invalid operand code 'c'"
+#, c-format
 msgid "operand is not an offsettable memory reference, invalid operand code 'H'"
-msgstr "l'opérande n'est ni une constante ni du code de condition, code d'opérande invalide « c »"
+msgstr "l'opérande n'est pas une référence mémoire avec décalage, code d'opérande invalide « H »"
 
 #: config/i386/i386.c:17218
 #, c-format
@@ -3554,10 +3493,9 @@ msgid "unknown insn mode"
 msgstr "mode insn inconnu"
 
 #: config/i386/djgpp.h:146
-#, fuzzy, c-format
-#| msgid "-f%s not supported: ignored"
+#, c-format
 msgid "-f%s ignored (not supported for DJGPP)\n"
-msgstr "-f%s n'est pas supporté: ignoré"
+msgstr "-f%s ignoré (pas supporté par DJGPP)\n"
 
 #: config/i386/i386-interix.h:77
 msgid "Use native (MS) bitfield layout"
@@ -3608,15 +3546,15 @@ msgstr "utilisation invalide de %%d, %%x, ou %%X"
 #: config/lm32/lm32.c:507
 #, c-format
 msgid "only 0.0 can be loaded as an immediate"
-msgstr ""
+msgstr "seul 0.0 peut être chargé comme valeur immédiate"
 
 #: config/lm32/lm32.c:577
 msgid "bad operand"
-msgstr "opérande erronée"
+msgstr "mauvais opérande"
 
 #: config/lm32/lm32.c:589
 msgid "can't use non gp relative absolute address"
-msgstr ""
+msgstr "impossible d'utiliser une adresse absolue pas relative à gp"
 
 #: config/lm32/lm32.c:593
 msgid "invalid addressing mode"
@@ -3634,7 +3572,7 @@ msgstr "opérande invalide pour le code %%p"
 
 #: config/m32r/m32r.c:2137
 msgid "bad insn for 'A'"
-msgstr "insn erroné pour « A »"
+msgstr "mauvaise insn pour « A »"
 
 #: config/m32r/m32r.c:2184
 #, c-format
@@ -3648,20 +3586,20 @@ msgstr "opérande invalide pour le code %%N"
 
 #: config/m32r/m32r.c:2240
 msgid "pre-increment address is not a register"
-msgstr "pré-incrément d'adresse n'est pas un registre"
+msgstr "le pré-incrément d'adresse n'est pas un registre"
 
 #: config/m32r/m32r.c:2247
 msgid "pre-decrement address is not a register"
-msgstr "pré-décrément d'adresse n'est pas un registre"
+msgstr "le pré-décrément d'adresse n'est pas un registre"
 
 #: config/m32r/m32r.c:2254
 msgid "post-increment address is not a register"
-msgstr "post-incrément d'adresse n'est pas un registre"
+msgstr "le post-incrément d'adresse n'est pas un registre"
 
 #: config/m32r/m32r.c:2328 config/m32r/m32r.c:2343
 #: config/rs6000/rs6000.c:32640
 msgid "bad address"
-msgstr "adresse erronée"
+msgstr "mauvaise adresse"
 
 #: config/m32r/m32r.c:2348
 msgid "lo_sum not of register"
@@ -3685,12 +3623,12 @@ msgstr "pointeur null"
 #: config/microblaze/microblaze.c:2234
 #, c-format
 msgid "PRINT_OPERAND, invalid insn for %%C"
-msgstr "PRINT_OPERAND insn invalide pour %%C"
+msgstr "PRINT_OPERAND, insn invalide pour %%C"
 
 #: config/microblaze/microblaze.c:2263
 #, c-format
 msgid "PRINT_OPERAND, invalid insn for %%N"
-msgstr "PRINT_OPERAND insn invalide pour %%N"
+msgstr "PRINT_OPERAND, insn invalide pour %%N"
 
 #: config/microblaze/microblaze.c:2283 config/microblaze/microblaze.c:2458
 msgid "insn contains an invalid address !"
@@ -3720,44 +3658,44 @@ msgstr "utilisation invalide de « %%%c »"
 
 #: config/mmix/mmix.c:1547 config/mmix/mmix.c:1677
 msgid "MMIX Internal: Expected a CONST_INT, not this"
-msgstr "MMIX interne: attendait CONST_INT, pas ceci"
+msgstr "MMIX interne: Attendait CONST_INT, pas ceci"
 
 #: config/mmix/mmix.c:1626
 msgid "MMIX Internal: Bad value for 'm', not a CONST_INT"
-msgstr "MMIX interne: valeur erronée pour « m », pas un CONST_INT"
+msgstr "MMIX interne: Valeur erronée pour « m », pas un CONST_INT"
 
 #: config/mmix/mmix.c:1645
 msgid "MMIX Internal: Expected a register, not this"
-msgstr "MMIX interne: attendait un registre, pas ceci"
+msgstr "MMIX interne: Attendait un registre, pas ceci"
 
 #: config/mmix/mmix.c:1655
 msgid "MMIX Internal: Expected a constant, not this"
-msgstr "MMIX interne: attendait une constante, pas ceci"
+msgstr "MMIX interne: Attendait une constante, pas ceci"
 
 #. We need the original here.
 #: config/mmix/mmix.c:1739
 msgid "MMIX Internal: Cannot decode this operand"
-msgstr "MMIX interne: ne peut décoder cette opérande"
+msgstr "MMIX interne: Ne peut décoder cet opérande"
 
 #: config/mmix/mmix.c:1795
 msgid "MMIX Internal: This is not a recognized address"
-msgstr "MMIX interne: ce n'est pas une adresse reconnue"
+msgstr "MMIX interne: Ce n'est pas une adresse reconnue"
 
 #: config/mmix/mmix.c:2671
 msgid "MMIX Internal: Trying to output invalidly reversed condition:"
-msgstr "MMIX interne: tentative de produire incorrectement une condition renversée:"
+msgstr "MMIX interne: Tentative de produire une condition incorrectement inversée:"
 
 #: config/mmix/mmix.c:2678
 msgid "MMIX Internal: What's the CC of this?"
-msgstr "MMIX interne: quel sorte de CC est-ce?"
+msgstr "MMIX interne: Quel est le CC de ceci ?"
 
 #: config/mmix/mmix.c:2682
 msgid "MMIX Internal: What is the CC of this?"
-msgstr "MMIX interne: quel sorte de CC est-ce?"
+msgstr "MMIX interne: Quel est le CC de ceci ?"
 
 #: config/mmix/mmix.c:2724
 msgid "MMIX Internal: This is not a constant:"
-msgstr "interne MMIX: ce n'est pas une constante:"
+msgstr "MMIX interne: Ce n'est pas une constante:"
 
 #: config/msp430/msp430.c:3609
 #, c-format
@@ -3767,12 +3705,12 @@ msgstr "préfixe d'opérande invalide"
 #: config/msp430/msp430.c:3643
 #, c-format
 msgid "invalid zero extract"
-msgstr ""
+msgstr "« zero extract » invalide"
 
 #: config/rl78/rl78.c:1797 config/rl78/rl78.c:1883
 #, c-format
 msgid "q/Q modifiers invalid for symbol references"
-msgstr ""
+msgstr "modificateurs q/Q invalides pour des références de symboles"
 
 #: config/rs6000/host-darwin.c:94
 #, c-format
@@ -3782,11 +3720,11 @@ msgstr "Manque d'espace sur la pile.\n"
 #: config/rs6000/host-darwin.c:115
 #, c-format
 msgid "Try running '%s' in the shell to raise its limit.\n"
-msgstr "Essayer d'exécuter « %s » dans le shell pour augmenter la limite.\n"
+msgstr "Essayez d'exécuter « %s » dans le shell pour augmenter sa limite.\n"
 
 #: config/rs6000/rs6000.c:3959
 msgid "-maltivec=le not allowed for big-endian targets"
-msgstr ""
+msgstr "-maltivec=le pas permis pour des cibles gros-boutistes"
 
 #: config/rs6000/rs6000.c:3971
 msgid "-mvsx requires hardware floating point"
@@ -3794,7 +3732,7 @@ msgstr "-mvsx nécessite une unité matérielle en virgule flottante"
 
 #: config/rs6000/rs6000.c:3979
 msgid "-mvsx and -mpaired are incompatible"
-msgstr "-mvsx et -mpaired -msystem-v sont incompatibles"
+msgstr "-mvsx et -mpaired sont incompatibles"
 
 #: config/rs6000/rs6000.c:3981
 msgid "-mvsx needs indexed addressing"
@@ -3814,29 +3752,27 @@ msgstr "-mquad-memory requiert le mode 64 bits"
 
 #: config/rs6000/rs6000.c:4132
 msgid "-mquad-memory-atomic requires 64-bit mode"
-msgstr ""
+msgstr "-mquad-memory-atomic requiert le mode 64 bits"
 
 #: config/rs6000/rs6000.c:4144
 msgid "-mquad-memory is not available in little endian mode"
-msgstr ""
+msgstr "-mquad-memory n'est pas disponible en mode petit-boutiste"
 
 #: config/rs6000/rs6000.c:4212
-#, fuzzy
-#| msgid "-mquad-memory requires 64-bit mode"
 msgid "-mtoc-fusion requires 64-bit"
-msgstr "-mquad-memory requiert le mode 64 bits"
+msgstr "-mtoc-fusion requiert le mode 64 bits"
 
 #: config/rs6000/rs6000.c:4219
 msgid "-mtoc-fusion requires medium/large code model"
-msgstr ""
+msgstr "-mtoc-fusion requiert le modèle de code moyen/large"
 
 #: config/rs6000/rs6000.c:9919
 msgid "bad move"
-msgstr "mauvais mouvement"
+msgstr "mauvais déplacement"
 
 #: config/rs6000/rs6000.c:20411
 msgid "Bad 128-bit move"
-msgstr ""
+msgstr "Mauvais déplacement sur 128 bits"
 
 #: config/rs6000/rs6000.c:20602
 #, c-format
@@ -3909,78 +3845,71 @@ msgid "invalid %%x value"
 msgstr "valeur %%x invalide"
 
 #: config/rs6000/rs6000.c:21099
-#, fuzzy, c-format
-#| msgid "invalid punctuation `%c' in constraint"
+#, c-format
 msgid "invalid %%y value, try using the 'Z' constraint"
-msgstr "ponctuation invalide « %c » dans la contrainte"
+msgstr "valeur %%y invalide, essayez d'utiliser la contrainte « Z »"
 
 #: config/rs6000/rs6000.c:21814
 msgid "__float128 and __ibm128 cannot be used in the same expression"
-msgstr ""
+msgstr "__float128 et __ibm128 ne peuvent pas être utilisés dans la même expression"
 
 #: config/rs6000/rs6000.c:21820
 msgid "__ibm128 and long double cannot be used in the same expression"
-msgstr ""
+msgstr "__ibm128 et long double ne peuvent pas être utilisés dans la même expression"
 
 #: config/rs6000/rs6000.c:21826
 msgid "__float128 and long double cannot be used in the same expression"
-msgstr ""
+msgstr "__float128 et long double ne peuvent pas être utilisés dans la même expression"
 
 #: config/rs6000/rs6000.c:35706
-#, fuzzy
-#| msgid "too few arguments to function"
 msgid "AltiVec argument passed to unprototyped function"
-msgstr "trop peu d'arguments pour la fonction"
+msgstr "Argument AltiVec passé à une fonction sans prototype"
 
 #: config/rs6000/rs6000.c:37429
-#, fuzzy
-#| msgid "Do not generate code for a Sun FPA"
 msgid "Could not generate addis value for fusion"
-msgstr "Ne pas générer le code pour un Sun FPA"
+msgstr "N'a pu générer de valeur addis pour la fusion"
 
 #: config/rs6000/rs6000.c:37501
-#, fuzzy
-#| msgid "unable to generate reloads for:"
 msgid "Unable to generate load/store offset for fusion"
-msgstr "incapable de générer des recharges pour:"
+msgstr "Impossible de générer un offset load/store pour la fusion"
 
 #: config/rs6000/rs6000.c:37605
 msgid "Bad GPR fusion"
-msgstr ""
+msgstr "Mauvaise fusion GPR"
 
 #: config/rs6000/rs6000.c:37823
 msgid "emit_fusion_p9_load, bad reg #1"
-msgstr ""
+msgstr "emit_fusion_p9_load, mauvais reg #1"
 
 #: config/rs6000/rs6000.c:37860
 msgid "emit_fusion_p9_load, bad reg #2"
-msgstr ""
+msgstr "emit_fusion_p9_load, mauvais reg #2"
 
 #: config/rs6000/rs6000.c:37863
 msgid "emit_fusion_p9_load not MEM"
-msgstr ""
+msgstr "emit_fusion_p9_load pas MEM"
 
 #: config/rs6000/rs6000.c:37901
 msgid "emit_fusion_p9_store, bad reg #1"
-msgstr ""
+msgstr "emit_fusion_p9_store, mauvais reg #1"
 
 #: config/rs6000/rs6000.c:37938
 msgid "emit_fusion_p9_store, bad reg #2"
-msgstr ""
+msgstr "emit_fusion_p9_store, mauvais reg #2"
 
 #: config/rs6000/rs6000.c:37941
 msgid "emit_fusion_p9_store not MEM"
-msgstr ""
+msgstr "emit_fusion_p9_store pas MEM"
 
 #: config/s390/s390.c:7168
 #, c-format
 msgid "symbolic memory references are only supported on z10 or later"
-msgstr ""
+msgstr "les références mémoire symboliques sont uniquement supportées sur z10 ou ultérieur"
 
 #: config/s390/s390.c:7179
 #, c-format
 msgid "cannot decompose address"
-msgstr "Adresse indécomposable"
+msgstr "adresse indécomposable"
 
 #: config/s390/s390.c:7248
 #, c-format
@@ -4005,7 +3934,7 @@ msgstr "adresse invalide pour le modificateur de sortie « R »"
 #: config/s390/s390.c:7329
 #, c-format
 msgid "memory reference expected for 'S' output modifier"
-msgstr ""
+msgstr "référence mémoire attendue pour le modificateur de sortie « S »"
 
 #: config/s390/s390.c:7339
 #, c-format
@@ -4015,12 +3944,12 @@ msgstr "adresse invalide pour le modificateur de sortie « S »"
 #: config/s390/s390.c:7360
 #, c-format
 msgid "register or memory expression expected for 'N' output modifier"
-msgstr ""
+msgstr "registre ou expression mémoire attendue pour le modificateur de sortie « N »"
 
 #: config/s390/s390.c:7371
 #, c-format
 msgid "register or memory expression expected for 'M' output modifier"
-msgstr ""
+msgstr "registre ou expression mémoire attendue pour le modificateur de sortie « M »"
 
 #: config/s390/s390.c:7456 config/s390/s390.c:7477
 #, c-format
@@ -4030,13 +3959,12 @@ msgstr "constante invalide pour le modificateur de sortie « %c »"
 #: config/s390/s390.c:7474
 #, c-format
 msgid "invalid constant - try using an output modifier"
-msgstr "opérande invalide - essayez un modificateur de sortie"
+msgstr "constante invalide - essayez un modificateur de sortie"
 
 #: config/s390/s390.c:7515
-#, fuzzy, c-format
-#| msgid "invalid constant for output modifier '%c'"
+#, c-format
 msgid "invalid constant vector for output modifier '%c'"
-msgstr "constante invalide pour le modificateur de sortie « %c »"
+msgstr "vecteur constant invalide pour le modificateur de sortie « %c »"
 
 #: config/s390/s390.c:7522
 #, c-format
@@ -4049,30 +3977,24 @@ msgid "invalid expression for output modifier '%c'"
 msgstr "expression invalide pour le modificateur de sortie « %c »"
 
 #: config/s390/s390.c:11377
-#, fuzzy
-#| msgid "too few arguments to function"
 msgid "Vector argument passed to unprototyped function"
-msgstr "trop peu d'arguments pour la fonction"
+msgstr "Vecteur passé en argument à une fonction sans prototype"
 
 #: config/s390/s390.c:15036
-#, fuzzy
-#| msgid "pointer targets in %s differ in signedness"
 msgid "types differ in signess"
-msgstr "les cibles des pointeurs dans %s n'ont pas toutes de signe"
+msgstr "les types diffèrent sur le type signé/non-signé"
 
 #: config/s390/s390.c:15046
 msgid "binary operator does not support two vector bool operands"
-msgstr ""
+msgstr "l'opérateur binaire ne supporte pas deux opérandes booléens vectoriels"
 
 #: config/s390/s390.c:15049
-#, fuzzy
-#| msgid "target format does not support infinity"
 msgid "binary operator does not support vector bool operand"
-msgstr "le format cible ne supporte pas l'infini"
+msgstr "l'opérateur binaire ne supporte pas l'opérande booléen vectoriel"
 
 #: config/s390/s390.c:15057
 msgid "binary operator does not support mixing vector bool with floating point vector operands"
-msgstr ""
+msgstr "l'opérateur binaire ne supporte pas le mélange d'un booléen vectoriel avec un vecteur en virgule flottante"
 
 #: config/sh/sh.c:1313
 #, c-format
@@ -4109,7 +4031,7 @@ msgstr "opérande %%A invalide"
 #: config/sparc/sparc.c:8844
 #, c-format
 msgid "invalid %%B operand"
-msgstr "Opérande %%B invalide"
+msgstr "opérande %%B invalide"
 
 #: config/sparc/sparc.c:8873 config/tilegx/tilegx.c:5095
 #: config/tilepro/tilepro.c:4510
@@ -4133,25 +4055,24 @@ msgid "invalid %%s operand"
 msgstr "opérande %%s invalide"
 
 #: config/sparc/sparc.c:8963
-#, fuzzy, c-format
-#| msgid "floating point constant not a valid immediate operand"
+#, c-format
 msgid "floating-point constant not a valid immediate operand"
-msgstr "constante en virgule flottante n'est pas une opérande immédiate valide"
+msgstr "la constante en virgule flottante n'est pas un opérande immédiat valide"
 
 #: config/stormy16/stormy16.c:1733 config/stormy16/stormy16.c:1804
 #, c-format
 msgid "'B' operand is not constant"
-msgstr "opérande « B » n'est pas une constante"
+msgstr "l'opérande « B » n'est pas une constante"
 
 #: config/stormy16/stormy16.c:1760
 #, c-format
 msgid "'B' operand has multiple bits set"
-msgstr "l'opérande « B » a de multiples jeux de bits"
+msgstr "l'opérande « B » a plusieurs bits activés"
 
 #: config/stormy16/stormy16.c:1786
 #, c-format
 msgid "'o' operand is not constant"
-msgstr "opérande « o » n'est pas une constante"
+msgstr "l'opérande « o » n'est pas une constante"
 
 #: config/stormy16/stormy16.c:1818
 #, c-format
@@ -4206,12 +4127,12 @@ msgstr "opérande %%N invalide"
 #: config/tilegx/tilegx.c:5385
 #, c-format
 msgid "invalid operand for 'r' specifier"
-msgstr "opérande invalide pour le modificateur « r »"
+msgstr "opérande invalide pour le spécificateur « r »"
 
 #: config/tilegx/tilegx.c:5409 config/tilepro/tilepro.c:4816
 #, c-format
 msgid "unable to print out operand yet; code == %d (%c)"
-msgstr ""
+msgstr "impossible d'imprimer l'opérande pour l'instant; code == %d (%c)"
 
 #: config/tilepro/tilepro.c:4560
 #, c-format
@@ -4245,7 +4166,7 @@ msgstr "opérande %%r invalide"
 
 #: config/v850/v850.c:293
 msgid "const_double_split got a bad insn:"
-msgstr "const_double_split a reçu un insn erroné :"
+msgstr "const_double_split a reçu une mauvaise insn :"
 
 #: config/v850/v850.c:899
 msgid "output_move_single:"
@@ -4254,27 +4175,25 @@ msgstr "output_move_single :"
 #: config/vax/vax.c:453
 #, c-format
 msgid "symbol used with both base and indexed registers"
-msgstr ""
+msgstr "symbole utilisé conjointement avec des registres de base et d'index"
 
 #: config/vax/vax.c:462
-#, fuzzy, c-format
-#| msgid "code model %s not supported in PIC mode"
+#, c-format
 msgid "symbol with offset used in PIC mode"
-msgstr "model de code %s n'est pas supporté en mode PIC"
+msgstr "symbole avec offset utilisé en mode PIC"
 
 #: config/vax/vax.c:550
-#, fuzzy, c-format
-#| msgid "long long constant not a valid immediate operand"
+#, c-format
 msgid "symbol used as immediate operand"
-msgstr "constante long long n'est pas une opérande immédiate valide"
+msgstr "symbole utilisé comme opérande immédiat"
 
 #: config/vax/vax.c:1577
 msgid "illegal operand detected"
-msgstr "opérande illégale détectée"
+msgstr "opérande illégal détecté"
 
 #: config/visium/visium.c:3255
 msgid "illegal operand "
-msgstr "opérande illégale "
+msgstr "opérande illégal "
 
 #: config/visium/visium.c:3306
 msgid "illegal operand address (1)"
@@ -4295,7 +4214,7 @@ msgstr "adresse d'opérande illégale (4)"
 #: config/xtensa/xtensa.c:768 config/xtensa/xtensa.c:800
 #: config/xtensa/xtensa.c:809
 msgid "bad test"
-msgstr "test erroné"
+msgstr "mauvais test"
 
 #: config/xtensa/xtensa.c:2301
 #, c-format
@@ -4322,7 +4241,7 @@ msgstr "pas de registre dans l'adresse"
 
 #: config/xtensa/xtensa.c:2487
 msgid "address offset not a constant"
-msgstr "décalage d'adresse n'est pas une constante"
+msgstr "le décalage d'adresse n'est pas une constante"
 
 #: c/c-objc-common.c:160
 msgid "aka"
@@ -4406,7 +4325,7 @@ msgstr "%<]%> attendu"
 
 #: c/c-parser.c:3759
 msgid "expected %<;%>, %<,%> or %<)%>"
-msgstr "%<;%>, %<,%> or %<)%> attendu"
+msgstr "%<;%>, %<,%> ou %<)%> attendu"
 
 #: c/c-parser.c:4372 c/c-parser.c:14517 cp/parser.c:26967 cp/parser.c:28889
 #, gcc-internal-format
@@ -4430,19 +4349,19 @@ msgstr "%<:%> attendu"
 
 #: c/c-parser.c:5185 cp/semantics.c:613
 msgid "Cilk array notation cannot be used as a computed goto expression"
-msgstr ""
+msgstr "La notation Cilk d'un tableau ne peut pas être utilisée comme expression d'un goto calculé"
 
 #: c/c-parser.c:5244
 msgid "Cilk array notation cannot be used for a throw expression"
-msgstr ""
+msgstr "La notation Cilk d'un tableau ne peut pas être utilisée comme expression pour déclencher une exception"
 
 #: c/c-parser.c:5556 cp/semantics.c:1136
 msgid "Cilk array notation cannot be used as a condition for switch statement"
-msgstr ""
+msgstr "La notation Cilk d'un tableau ne peut pas être utilisée comme condition d'un switch"
 
 #: c/c-parser.c:5605 cp/semantics.c:791
 msgid "Cilk array notation cannot be used as a condition for while statement"
-msgstr ""
+msgstr "La notation Cilk d'un tableau ne peut pas être utilisée comme condition d'un while"
 
 #: c/c-parser.c:5656 cp/parser.c:26897
 #, gcc-internal-format
@@ -4451,11 +4370,11 @@ msgstr "%<while%> attendu"
 
 #: c/c-parser.c:5663 cp/semantics.c:850
 msgid "Cilk array notation cannot be used as a condition for a do-while statement"
-msgstr ""
+msgstr "La notation Cilk d'un tableau ne peut pas être utilisée comme condition d'un do-while"
 
 #: c/c-parser.c:5866 cp/semantics.c:969
 msgid "Cilk array notation cannot be used in a condition for a for-loop"
-msgstr ""
+msgstr "La notation Cilk d'un tableau ne peut pas être utilisée comme condition d'une boucle for"
 
 #: c/c-parser.c:7497
 msgid "expected %<.%>"
@@ -4475,7 +4394,7 @@ msgstr "%<>%> attendu"
 #: c/c-parser.c:12116 c/c-parser.c:12880 cp/parser.c:27012
 #, gcc-internal-format
 msgid "expected %<,%> or %<)%>"
-msgstr "%<,%> or %<)%> attendu"
+msgstr "%<,%> ou %<)%> attendu"
 
 #: c/c-parser.c:14229 c/c-parser.c:14273 c/c-parser.c:14501 c/c-parser.c:14736
 #: c/c-parser.c:16891 c/c-parser.c:17513 c/c-parser.c:4573 cp/parser.c:26991
@@ -4486,7 +4405,7 @@ msgstr "%<=%> attendu"
 #: c/c-parser.c:15280 c/c-parser.c:15270 cp/parser.c:34132
 #, gcc-internal-format
 msgid "expected %<#pragma omp section%> or %<}%>"
-msgstr "%<#pragma omp section%> or %<}%> attendu"
+msgstr "%<#pragma omp section%> ou %<}%> attendu"
 
 #: c/c-parser.c:17675 c/c-parser.c:10602 cp/parser.c:26976 cp/parser.c:30031
 #, gcc-internal-format
@@ -4507,23 +4426,23 @@ msgstr "candidat 2 :"
 
 #: cp/decl2.c:778
 msgid "candidates are: %+#D"
-msgstr "candidats sont : %+#D"
+msgstr "les candidats sont : %+#D"
 
 #: cp/decl2.c:780
 msgid "candidate is: %+#D"
-msgstr "candidat est : %+#D"
+msgstr "le candidat est : %+#D"
 
 #: cp/error.c:317
 msgid "<missing>"
-msgstr "<champ manquant>"
+msgstr "<manquant>"
 
 #: cp/error.c:417
 msgid "<brace-enclosed initializer list>"
-msgstr ""
+msgstr "<initialisation de liste entre accolades>"
 
 #: cp/error.c:419
 msgid "<unresolved overloaded function type>"
-msgstr ""
+msgstr "<type de fonction surchargée non résolu>"
 
 #: cp/error.c:581
 msgid "<type error>"
@@ -4532,7 +4451,7 @@ msgstr "<erreur de type>"
 #: cp/error.c:684
 #, c-format
 msgid "<anonymous %s>"
-msgstr "<anonymous %s>"
+msgstr "<%s anonyme>"
 
 #. A lambda's "type" is essentially its signature.
 #: cp/error.c:689
@@ -4541,29 +4460,29 @@ msgstr "<lambda"
 
 #: cp/error.c:819
 msgid "<typeprefixerror>"
-msgstr "<typeprefixerror>"
+msgstr "<erreurprefixtype>"
 
 #: cp/error.c:948
 #, c-format
 msgid "(static initializers for %s)"
-msgstr "(initialiseur static pour « %s »)"
+msgstr "(initialiseurs statiques pour %s)"
 
 #: cp/error.c:950
 #, c-format
 msgid "(static destructors for %s)"
-msgstr "(destructeur static pour %s)"
+msgstr "(destructeurs statiques pour %s)"
 
 #: cp/error.c:1063
 msgid "vtable for "
-msgstr ""
+msgstr "vtable pour "
 
 #: cp/error.c:1087
 msgid "<return value> "
-msgstr ""
+msgstr "<valeur de retour> "
 
 #: cp/error.c:1102
 msgid "{anonymous}"
-msgstr "{anonymous}"
+msgstr "{anonyme}"
 
 #: cp/error.c:1104
 msgid "(anonymous namespace)"
@@ -4571,7 +4490,7 @@ msgstr "(espace de nom anonyme)"
 
 #: cp/error.c:1220
 msgid "<template arguments error>"
-msgstr "<erreur argument du patron>"
+msgstr "<erreur dans les arguments du patron>"
 
 #: cp/error.c:1242
 msgid "<enumerator>"
@@ -4583,7 +4502,7 @@ msgstr "<erreur de déclaration>"
 
 #: cp/error.c:1819 cp/error.c:1839
 msgid "<template parameter error>"
-msgstr "<erreur de patron de paramètre>"
+msgstr "<erreur de paramètre du patron>"
 
 #: cp/error.c:1969
 msgid "<statement>"
@@ -4598,11 +4517,11 @@ msgstr "<inconnu>"
 #. __cxa_allocate_exception, __cxa_throw, and the like.
 #: cp/error.c:2014
 msgid "<throw-expression>"
-msgstr "<levé d'exception>"
+msgstr "<levée d'exception>"
 
 #: cp/error.c:2115
 msgid "<ubsan routine call>"
-msgstr ""
+msgstr "<appel de routine UBSan>"
 
 #: cp/error.c:2572
 msgid "<unparsed>"
@@ -4630,7 +4549,7 @@ msgstr "{inconnu}"
 
 #: cp/error.c:3199
 msgid "At global scope:"
-msgstr ""
+msgstr "Au niveau global:"
 
 #: cp/error.c:3305
 #, c-format
@@ -4659,11 +4578,11 @@ msgstr "Dans la fonction lambda"
 #: cp/error.c:3333
 #, c-format
 msgid "%s: In substitution of %qS:\n"
-msgstr "%s : dans la substitution de %qS :\n"
+msgstr "%s: Dans la substitution de %qS :\n"
 
 #: cp/error.c:3334
 msgid "%s: In instantiation of %q#D:\n"
-msgstr "%s : dans l'instantiation de %q#D :\n"
+msgstr "%s: Dans l'instanciation de %q#D :\n"
 
 #: cp/error.c:3359
 msgid "%r%s:%d:%d:%R   "
@@ -4676,110 +4595,94 @@ msgstr "%r%s:%d:%R   "
 #: cp/error.c:3370
 #, c-format
 msgid "recursively required by substitution of %qS\n"
-msgstr ""
+msgstr "requis récursivement par la substitution de %qS\n"
 
 #: cp/error.c:3371
 #, c-format
 msgid "required by substitution of %qS\n"
-msgstr ""
+msgstr "requis par la substitution de %qS\n"
 
 #: cp/error.c:3376
 msgid "recursively required from %q#D\n"
-msgstr ""
+msgstr "requis récursivement par %q#D\n"
 
 #: cp/error.c:3377
 msgid "required from %q#D\n"
-msgstr ""
+msgstr "requis par %q#D\n"
 
 #: cp/error.c:3384
 msgid "recursively required from here\n"
-msgstr ""
+msgstr "requis récursivement depuis ici\n"
 
 #: cp/error.c:3385
-#, fuzzy
-#| msgid "called from here"
 msgid "required from here\n"
-msgstr "appelé d'ici"
+msgstr "requis depuis ici\n"
 
 #: cp/error.c:3437
 msgid "%r%s:%d:%d:%R   [ skipping %d instantiation contexts, use -ftemplate-backtrace-limit=0 to disable ]\n"
-msgstr ""
+msgstr "%r%s:%d:%d:%R   [ passe outre %d contextes d'instanciation, utilisez -ftemplate-backtrace-limit=0 pour désactiver ]\n"
 
 #: cp/error.c:3443
 msgid "%r%s:%d:%R   [ skipping %d instantiation contexts, use -ftemplate-backtrace-limit=0 to disable ]\n"
-msgstr ""
+msgstr "%r%s:%d:%R   [ passe outre %d contextes d'instanciation, utilisez -ftemplate-backtrace-limit=0 pour désactiver ]\n"
 
 #: cp/error.c:3497
 msgid "%r%s:%d:%d:%R   in constexpr expansion of %qs"
-msgstr ""
+msgstr "%r%s:%d:%d:%R   dans l'expansion de constexpr de %qs"
 
 #: cp/error.c:3501
 msgid "%r%s:%d:%R   in constexpr expansion of %qs"
-msgstr ""
+msgstr "%r%s:%d:%R   dans l'expansion de constexpr de %qs"
 
 #: cp/pt.c:1945 cp/semantics.c:5217
 msgid "candidates are:"
-msgstr "candidats sont :"
+msgstr "les candidats sont :"
 
 #: cp/pt.c:21086
 msgid "candidate is:"
 msgid_plural "candidates are:"
-msgstr[0] "candidat est :"
-msgstr[1] "candidats sont :"
+msgstr[0] "le candidat est :"
+msgstr[1] "les candidats sont :"
 
 #: cp/rtti.c:545
-#, fuzzy
-#| msgid "  overriding `%#D' (must be pointer or reference to class)"
 msgid "target is not pointer or reference to class"
-msgstr "doit être un pointeur ou une référence vers un classe"
+msgstr "la cible n'est pas un pointeur ni une référence à une classe"
 
 #: cp/rtti.c:550
-#, fuzzy
-#| msgid "arithmetic on pointer to an incomplete type"
 msgid "target is not pointer or reference to complete type"
-msgstr "arithmétique sur un pointeur vers un type incomplet"
+msgstr "la cible n'est pas un pointeur ni une référence à un type complet"
 
 #: cp/rtti.c:556
-#, fuzzy
-#| msgid "cannot declare pointers to references"
 msgid "target is not pointer or reference"
-msgstr "ne peut déclarer des pointeurs vers des références"
+msgstr "la cible n'est pas un pointeur ni une référence"
 
 #: cp/rtti.c:572
-#, fuzzy
-#| msgid "base operand of `->' is not a pointer"
 msgid "source is not a pointer"
-msgstr "l'opérande de base de «->» n'est pas un pointeur"
+msgstr "la source n'est pas un pointeur"
 
 #: cp/rtti.c:577
-#, fuzzy
-#| msgid "Assume that pointers not aliased"
 msgid "source is not a pointer to class"
-msgstr "Présumer que les pointeurs ne peuvent pas être aliasés"
+msgstr "la source n'est pas un pointeur vers une classe"
 
 #: cp/rtti.c:582
-#, fuzzy
-#| msgid "cannot %s a pointer to incomplete type `%T'"
 msgid "source is a pointer to incomplete type"
-msgstr "ne peut utiliser %s comme pointeur sur un type incomplet « %T »"
+msgstr "la source est un pointeur vers un type incomplet"
 
 #: cp/rtti.c:597
 msgid "source is not of class type"
-msgstr "la source n'est pas de type classe"
+msgstr "la source n'est pas de type « class »"
 
 #: cp/rtti.c:602
-#, fuzzy
-#| msgid "return type is an incomplete type"
 msgid "source is of incomplete class type"
-msgstr "le type du retour est incomplet"
+msgstr "la source est d'un type « class » incomplet"
 
 #: cp/rtti.c:611
 msgid "conversion casts away constness"
-msgstr ""
+msgstr "la conversion supprime l'attribut « const »"
 
 #: cp/rtti.c:767
 msgid "source type is not polymorphic"
-msgstr ""
+msgstr "le type source n'est pas polymorphique"
 
 #: cp/typeck.c:5820 c/c-typeck.c:4098
 #, gcc-internal-format
@@ -4794,7 +4697,7 @@ msgstr "type d'argument erroné pour le plus unaire"
 #: cp/typeck.c:5848 c/c-typeck.c:4124
 #, gcc-internal-format
 msgid "wrong type argument to bit-complement"
-msgstr "type d'argument erroné pour un complément de bit"
+msgstr "type d'argument erroné pour le complément de bit"
 
 #: cp/typeck.c:5855 c/c-typeck.c:4132
 #, gcc-internal-format
@@ -4808,23 +4711,23 @@ msgstr "type d'argument erroné pour la conjugaison"
 
 #: cp/typeck.c:5885
 msgid "in argument to unary !"
-msgstr ""
+msgstr "dans l'argument d'un unaire !"
 
 #: cp/typeck.c:5931
 msgid "no pre-increment operator for type"
-msgstr ""
+msgstr "pas d'opérateur de pré-incrémentation pour le type"
 
 #: cp/typeck.c:5933
 msgid "no post-increment operator for type"
-msgstr ""
+msgstr "pas d'opérateur de post-incrémentation pour le type"
 
 #: cp/typeck.c:5935
 msgid "no pre-decrement operator for type"
-msgstr ""
+msgstr "pas d'opérateur de pré-décrémentation pour le type"
 
 #: cp/typeck.c:5937
 msgid "no post-decrement operator for type"
-msgstr ""
+msgstr "pas d'opérateur de post-décrémentation pour le type"
 
 #: fortran/arith.c:95
 msgid "Arithmetic OK at %L"
@@ -4836,7 +4739,7 @@ msgstr "Débordement arithmétique à %L"
 
 #: fortran/arith.c:101
 msgid "Arithmetic underflow at %L"
-msgstr ""
+msgstr "Dépassement de capacité arithmétique par le bas à %L"
 
 #: fortran/arith.c:104
 msgid "Arithmetic NaN at %L"
@@ -4848,35 +4751,30 @@ msgstr "Division par zéro à %L"
 
 #: fortran/arith.c:110
 msgid "Array operands are incommensurate at %L"
-msgstr ""
+msgstr "Les opérandes de tableau ne sont pas comparables à %L"
 
 #: fortran/arith.c:114
 msgid "Integer outside symmetric range implied by Standard Fortran at %L"
-msgstr ""
+msgstr "L'entier est hors de la plage symétrique impliquée par le standard Fortran à %L"
 
 #: fortran/arith.c:1370
-#, fuzzy
-#| msgid "missing binary operator"
 msgid "elemental binary operation"
-msgstr "opérateur binaire manquant"
+msgstr "opération binaire élémentaire"
 
 #: fortran/check.c:2124 fortran/check.c:3115 fortran/check.c:3169
-#, fuzzy, c-format
-#| msgid "Too few arguments for intrinsic `%A' at %0"
+#, c-format
 msgid "arguments '%s' and '%s' for intrinsic %s"
-msgstr "trop peu d'arguments passés à l'intrinsèque «%A» à %0"
+msgstr "arguments « %s » et « %s » pour l'intrinsèque %s"
 
 #: fortran/check.c:2921
-#, fuzzy, c-format
-#| msgid "Too few arguments for intrinsic `%A' at %0"
+#, c-format
 msgid "arguments 'a%d' and 'a%d' for intrinsic '%s'"
-msgstr "trop peu d'arguments passés à l'intrinsèque «%A» à %0"
+msgstr "arguments « a%d » et « a%d » pour l'intrinsèque « %s »"
 
 #: fortran/check.c:3444 fortran/intrinsic.c:4290
-#, fuzzy, c-format
-#| msgid "Too few arguments for intrinsic `%A' at %0"
+#, c-format
 msgid "arguments '%s' and '%s' for intrinsic '%s'"
-msgstr "trop peu d'arguments passés à l'intrinsèque «%A» à %0"
+msgstr "arguments « %s » et « %s » pour l'intrinsèque « %s »"
 
 #: fortran/error.c:871
 msgid "Fortran 2008 obsolescent feature:"
@@ -4899,22 +4797,19 @@ msgid "Deleted feature:"
 msgstr "Fonctionnalité supprimée :"
 
 #: fortran/expr.c:622
-#, fuzzy, c-format
-#| msgid "non-constant expression for BIN"
+#, c-format
 msgid "Constant expression required at %C"
-msgstr "expression n'est pas un constante pour BIN"
+msgstr "Expression constante requise à %C"
 
 #: fortran/expr.c:625
-#, fuzzy, c-format
-#| msgid "untyped expression as argument %d"
+#, c-format
 msgid "Integer expression required at %C"
-msgstr "expression sans type comme argument %d"
+msgstr "Expression entière requise à %C"
 
 #: fortran/expr.c:630
-#, fuzzy, c-format
-#| msgid "integer overflow in expression"
+#, c-format
 msgid "Integer value too large in expression at %C"
-msgstr "débordement d'entier dans l'expression"
+msgstr "Valeur entière trop grande dans l'expression à %C"
 
 #: fortran/expr.c:3242
 msgid "array assignment"
@@ -4923,113 +4818,93 @@ msgstr "affectation de tableau"
 #: fortran/gfortranspec.c:425
 #, c-format
 msgid "Driving:"
-msgstr ""
+msgstr "Pilotage:"
 
 #: fortran/interface.c:3048 fortran/intrinsic.c:3994
 msgid "actual argument to INTENT = OUT/INOUT"
-msgstr ""
+msgstr "argument actuel de INTENT = OUT/INOUT"
 
 #: fortran/io.c:550
-#, fuzzy
-#| msgid "  but %d required"
 msgid "Positive width required"
-msgstr "mais %d son requis"
+msgstr "Largeur positive requise"
 
 #: fortran/io.c:551
-#, fuzzy
-#| msgid "negative width in bit-field `%s'"
 msgid "Nonnegative width required"
-msgstr "largeur négative du champ de bits"
+msgstr "Largeur non négative requise"
 
 #: fortran/io.c:552
-#, fuzzy
-#| msgid "unterminated format string"
 msgid "Unexpected element %qc in format string at %L"
-msgstr "chaîne de format non terminée"
+msgstr "Élément %qc inattendu dans la chaîne de format à %L"
 
 #: fortran/io.c:554
-#, fuzzy
-#| msgid "unterminated format string"
 msgid "Unexpected end of format string"
-msgstr "chaîne de format non terminée"
+msgstr "Fin inattendue de la chaîne de format"
 
 #: fortran/io.c:555
-#, fuzzy
-#| msgid "zero width in %s format"
 msgid "Zero width in format descriptor"
-msgstr "largeur zéro dans le format %s"
+msgstr "Largeur zéro dans le descripteur de format"
 
 #: fortran/io.c:575
-#, fuzzy
-#| msgid "missing open paren"
 msgid "Missing leading left parenthesis"
-msgstr "parenthèse ouvrante manquante"
+msgstr "Parenthèse gauche d'ouverture manquante"
 
 #: fortran/io.c:604
-#, fuzzy
-#| msgid "template argument required for `%s %T'"
 msgid "Left parenthesis required after %<*%>"
-msgstr "argument du patron est requis pour « %s %T »"
+msgstr "Parenthèse gauche requise après %<*%>"
 
 #: fortran/io.c:635
 msgid "Expected P edit descriptor"
-msgstr ""
+msgstr "Descripteur d'édition P attendu"
 
 #. P requires a prior number.
 #: fortran/io.c:643
 msgid "P descriptor requires leading scale factor"
-msgstr ""
+msgstr "Le descripteur P doit être annoncé par un facteur d'échelle"
 
 #: fortran/io.c:736 fortran/io.c:750
 msgid "Comma required after P descriptor"
-msgstr ""
+msgstr "Virgule requise après le descripteur P"
 
 #: fortran/io.c:764
 msgid "Positive width required with T descriptor"
-msgstr ""
+msgstr "Largeur positive requise avec le descripteur T"
 
 #: fortran/io.c:843
-#, fuzzy
-#| msgid "type specifier `%s' not allowed after struct or class"
 msgid "E specifier not allowed with g0 descriptor"
-msgstr "spécificateur de type « %s » n'est pas permis après struct ou class"
+msgstr "Spécificateur E pas permis avec le descripteur g0"
 
 #: fortran/io.c:913
-#, fuzzy
-#| msgid "no exponent width (offset %d)"
 msgid "Positive exponent width required"
-msgstr "pas de largeur d'exposant (décalage %d)"
+msgstr "Largeur d'exposant positive requise"
 
 #: fortran/io.c:943
-#, fuzzy
-#| msgid "unrecognized format specifier"
 msgid "Period required in format specifier"
-msgstr "spécificateur de format non reconnu"
+msgstr "Point requis dans le spécificateur de format"
 
 #: fortran/io.c:1570
 #, c-format
 msgid "%s tag"
-msgstr "tag %s"
+msgstr "étiquette %s"
 
 #: fortran/io.c:2966
 msgid "internal unit in WRITE"
-msgstr ""
+msgstr "unité interne dans WRITE"
 
 #. For INQUIRE, all tags except FILE, ID and UNIT are variable definition
 #. contexts.  Thus, use an extended RESOLVE_TAG macro for that.
 #: fortran/io.c:4185
 #, c-format
 msgid "%s tag with INQUIRE"
-msgstr ""
+msgstr "étiquette %s avec INQUIRE"
 
 #: fortran/matchexp.c:28
 #, c-format
 msgid "Syntax error in expression at %C"
-msgstr "erreur de syntaxe dans l'exception à %C"
+msgstr "Erreur de syntaxe dans l'exception à %C"
 
 #: fortran/module.c:1204
 msgid "Unexpected EOF"
-msgstr "fin de fichier inattendue"
+msgstr "Fin de fichier inattendue"
 
 #: fortran/module.c:1288
 msgid "Integer overflow"
@@ -5049,7 +4924,7 @@ msgstr "Nom attendu"
 
 #: fortran/module.c:1550
 msgid "Expected left parenthesis"
-msgstr "Parenthèse droite attendu"
+msgstr "Parenthèse gauche attendue"
 
 #: fortran/module.c:1553
 msgid "Expected right parenthesis"
@@ -5065,41 +4940,35 @@ msgstr "Chaîne attendue"
 
 #: fortran/module.c:1584
 msgid "find_enum(): Enum not found"
-msgstr "find_enum() : Énumération introuvable"
+msgstr "find_enum(): Énumération introuvable"
 
 #: fortran/module.c:2265
-#, fuzzy
-#| msgid "packed attribute is unnecessary"
 msgid "Expected attribute bit name"
-msgstr "attribut empaqueté n'est pas nécessaire"
+msgstr "Nom de bit d'attribut attendu"
 
 #: fortran/module.c:3151
 msgid "Expected integer string"
-msgstr "Chaîne d'entiers attendue"
+msgstr "Chaîne contenant un entier attendue"
 
 #: fortran/module.c:3155
 msgid "Error converting integer"
-msgstr "Erreur de conversion d'entier"
+msgstr "Erreur de conversion de l'entier"
 
 #: fortran/module.c:3177
 msgid "Expected real string"
-msgstr "Chaîne de réels attendue"
+msgstr "Chaîne contenant un réel attendue"
 
 #: fortran/module.c:3401
-#, fuzzy
-#| msgid "unexpected address expression"
 msgid "Expected expression type"
-msgstr "expression d'adresse inattendue"
+msgstr "Type d'expression attendue"
 
 #: fortran/module.c:3481
 msgid "Bad operator"
-msgstr "Mauvais opérande"
+msgstr "Mauvais opérateur"
 
 #: fortran/module.c:3596
-#, fuzzy
-#| msgid "overflow in constant expression"
 msgid "Bad type in constant expression"
-msgstr "débordement dans l'expression de la constante"
+msgstr "Mauvais type dans l'expression constante"
 
 #: fortran/module.c:6946
 msgid "Unexpected end of module"
@@ -5111,7 +4980,7 @@ msgstr "IF arithmétique"
 
 #: fortran/parse.c:1680
 msgid "attribute declaration"
-msgstr "Déclaration d'attribut"
+msgstr "déclaration d'attribut"
 
 #: fortran/parse.c:1716
 msgid "data declaration"
@@ -5119,7 +4988,7 @@ msgstr "déclaration de données"
 
 #: fortran/parse.c:1734
 msgid "derived type declaration"
-msgstr "déclaration vide"
+msgstr "déclaration de type dérivé"
 
 #: fortran/parse.c:1846
 msgid "block IF"
@@ -5127,7 +4996,7 @@ msgstr "bloc IF"
 
 #: fortran/parse.c:1855
 msgid "implied END DO"
-msgstr "implique END DO"
+msgstr "END DO implicite"
 
 #: fortran/parse.c:1949 fortran/resolve.c:10537
 msgid "assignment"
@@ -5143,41 +5012,31 @@ msgstr "IF simple"
 
 #: fortran/resolve.c:606
 msgid "module procedure"
-msgstr ""
+msgstr "procédure de module"
 
 #: fortran/resolve.c:607
 msgid "internal function"
 msgstr "fonction interne"
 
 #: fortran/resolve.c:2157 fortran/resolve.c:2351
-#, fuzzy
-#| msgid "too few arguments to procedure"
 msgid "elemental procedure"
-msgstr "trop peu d'arguments pour la procédure"
+msgstr "procédure élémentaire"
 
 #: fortran/resolve.c:2254
-#, fuzzy
-#| msgid "invalid type argument"
 msgid "allocatable argument"
-msgstr "type d'argument invalide"
+msgstr "argument qui peut être alloué"
 
 #: fortran/resolve.c:2259
-#, fuzzy
-#| msgid "no arguments"
 msgid "asynchronous argument"
-msgstr "pas d'argument"
+msgstr "argument asynchrone"
 
 #: fortran/resolve.c:2264
-#, fuzzy
-#| msgid "no arguments"
 msgid "optional argument"
-msgstr "pas d'argument"
+msgstr "argument optionnel"
 
 #: fortran/resolve.c:2269
-#, fuzzy
-#| msgid "no arguments"
 msgid "pointer argument"
-msgstr "pas d'argument"
+msgstr "argument pointeur"
 
 #: fortran/resolve.c:2274
 msgid "target argument"
@@ -5192,26 +5051,20 @@ msgid "volatile argument"
 msgstr "argument volatile"
 
 #: fortran/resolve.c:2289
-#, fuzzy
-#| msgid "invalid type argument"
 msgid "assumed-shape argument"
-msgstr "type d'argument invalide"
+msgstr "argument de forme tacite (assumed-shape)"
 
 #: fortran/resolve.c:2294
-#, fuzzy
-#| msgid "no arguments"
 msgid "assumed-rank argument"
-msgstr "pas d'argument"
+msgstr "argument de rang tacite (assumed-rank)"
 
 #: fortran/resolve.c:2299
 msgid "coarray argument"
 msgstr "argument co-tableau"
 
 #: fortran/resolve.c:2304
-#, fuzzy
-#| msgid "parameterized structures not implemented"
 msgid "parametrized derived type argument"
-msgstr "structure paramétrisée ne sont pas implantées"
+msgstr "argument de type dérivé paramétrisé"
 
 #: fortran/resolve.c:2309
 msgid "polymorphic argument"
@@ -5219,106 +5072,93 @@ msgstr "argument polymorphique"
 
 #: fortran/resolve.c:2314
 msgid "NO_ARG_CHECK attribute"
-msgstr ""
+msgstr "attribut NO_ARG_CHECK "
 
 #. As assumed-type is unlimited polymorphic (cf. above).
 #. See also TS 29113, Note 6.1.
 #: fortran/resolve.c:2321
 msgid "assumed-type argument"
-msgstr "argument assumed-type"
+msgstr "argument de type tacite (assumed-type)"
 
 #: fortran/resolve.c:2332
 msgid "array result"
-msgstr ""
+msgstr "résultat tableau"
 
 #: fortran/resolve.c:2337
 msgid "pointer or allocatable result"
-msgstr ""
+msgstr "résultat pointeur ou allouable"
 
 #: fortran/resolve.c:2344
-#, fuzzy
-#| msgid "bit array slice with non-constant length"
 msgid "result with non-constant character length"
-msgstr "découpage de tableau de bits avec une longueur non constante"
+msgstr "résultat avec une longueur de caractères non constante"
 
 #: fortran/resolve.c:2356
 msgid "bind(c) procedure"
-msgstr ""
+msgstr "procédure bind(c)"
 
 #: fortran/resolve.c:3558
-#, fuzzy, c-format
-#| msgid "Invalid continuation line at %0"
+#, c-format
 msgid "Invalid context for NULL() pointer at %%L"
-msgstr "ligne de continuation invalide à %0"
+msgstr "Contexte invalide pour un pointeur NULL() à %%L"
 
 #: fortran/resolve.c:3574
-#, fuzzy, c-format
-#| msgid "Invalid operand at %1 for concatenation operator at %0"
+#, c-format
 msgid "Operand of unary numeric operator %%<%s%%> at %%L is %s"
-msgstr "Opérande invalide à %1 pour l'opérateur de concaténation à %0"
+msgstr "L'opérande de l'opérateur numérique unaire %%<%s%%> à %%L est %s"
 
 #: fortran/resolve.c:3590
-#, fuzzy, c-format
-#| msgid "Invalid operand at %1 for concatenation operator at %0"
+#, c-format
 msgid "Operands of binary numeric operator %%<%s%%> at %%L are %s/%s"
-msgstr "Opérande invalide à %1 pour l'opérateur de concaténation à %0"
+msgstr "Les opérandes de l'opérateur numérique binaire %%<%s%%> à %%L sont %s/%s"
 
 #: fortran/resolve.c:3605
-#, fuzzy, c-format
-#| msgid "Invalid operand at %1 for concatenation operator at %0"
+#, c-format
 msgid "Operands of string concatenation operator at %%L are %s/%s"
-msgstr "Opérande invalide à %1 pour l'opérateur de concaténation à %0"
+msgstr "Les opérandes de l'opérateur de concaténation de chaînes à %%L sont %s/%s"
 
 #: fortran/resolve.c:3624
-#, fuzzy, c-format
-#| msgid "Invalid operand at %1 for concatenation operator at %0"
+#, c-format
 msgid "Operands of logical operator %%<%s%%> at %%L are %s/%s"
-msgstr "Opérande invalide à %1 pour l'opérateur de concaténation à %0"
+msgstr "Les opérandes de l'opérateur logique %%<%s%%> à %%L sont %s/%s"
 
 #: fortran/resolve.c:3638
 #, c-format
 msgid "Operand of .not. operator at %%L is %s"
-msgstr ""
+msgstr "L'opérande de l'opérateur .not. à %%L est %s"
 
 #: fortran/resolve.c:3652
-#, fuzzy
-#| msgid "`%V' qualifiers cannot be applied to `%T'"
 msgid "COMPLEX quantities cannot be compared at %L"
-msgstr "qualificateur « %V » ne peut pas être appliqué à « %T »"
+msgstr "Les quantités COMPLEX ne peuvent pas être comparées à %L"
 
 #: fortran/resolve.c:3703
 #, c-format
 msgid "Logicals at %%L must be compared with %s instead of %s"
-msgstr ""
+msgstr "Les valeurs logiques à %%L doivent être comparées avec %s au lieu de %s"
 
 #: fortran/resolve.c:3709
-#, fuzzy, c-format
-#| msgid "Invalid operand at %1 for concatenation operator at %0"
+#, c-format
 msgid "Operands of comparison operator %%<%s%%> at %%L are %s/%s"
-msgstr "Opérande invalide à %1 pour l'opérateur de concaténation à %0"
+msgstr "Les opérandes de l'opérateur de comparaison %%<%s%%> à %%L sont %s/%s"
 
 #: fortran/resolve.c:3717
-#, fuzzy, c-format
-#| msgid "Unknown operator '%s' at %%L"
+#, c-format
 msgid "Unknown operator %%<%s%%> at %%L"
-msgstr "Opérateur inconnu « %s » à %%L"
+msgstr "Opérateur %%<%s%%> inconnu à %%L"
 
 #: fortran/resolve.c:3720
-#, fuzzy, c-format
-#| msgid "Invalid operand at %1 for concatenation operator at %0"
+#, c-format
 msgid "Operand of user operator %%<%s%%> at %%L is %s"
-msgstr "Opérande invalide à %1 pour l'opérateur de concaténation à %0"
+msgstr "L'opérande de l'opérateur utilisateur %%<%s%%> à %%L est %s"
 
 #: fortran/resolve.c:3724
-#, fuzzy, c-format
-#| msgid "Invalid operand at %1 for concatenation operator at %0"
+#, c-format
 msgid "Operands of user operator %%<%s%%> at %%L are %s/%s"
-msgstr "Opérande invalide à %1 pour l'opérateur de concaténation à %0"
+msgstr "Les opérandes de l'opérateur utilisateur %%<%s%%> à %%L sont %s/%s"
 
 #: fortran/resolve.c:3812
 #, c-format
 msgid "Inconsistent ranks for operator at %%L and %%L"
-msgstr ""
+msgstr "Rangs inconsistant pour l'opérateur à %%L et %%L"
 
 #: fortran/resolve.c:6476
 msgid "Loop variable"
@@ -5326,25 +5166,19 @@ msgstr "Variable de boucle"
 
 #: fortran/resolve.c:6480
 msgid "iterator variable"
-msgstr "Variable d'itérateur"
+msgstr "variable d'itérateur"
 
 #: fortran/resolve.c:6484
-#, fuzzy
-#| msgid "SET expression not a location"
 msgid "Start expression in DO loop"
-msgstr "expression de l'ENSEMBLE n'est pas une localisation"
+msgstr "Expression de départ dans la boucle DO"
 
 #: fortran/resolve.c:6488
-#, fuzzy
-#| msgid "SET expression not a location"
 msgid "End expression in DO loop"
-msgstr "expression de l'ENSEMBLE n'est pas une localisation"
+msgstr "Expression de fin dans la boucle DO"
 
 #: fortran/resolve.c:6492
-#, fuzzy
-#| msgid "SET expression not a location"
 msgid "Step expression in DO loop"
-msgstr "expression de l'ENSEMBLE n'est pas une localisation"
+msgstr "Expression de pas dans la boucle DO"
 
 #: fortran/resolve.c:6749 fortran/resolve.c:6752
 msgid "DEALLOCATE object"
@@ -5352,11 +5186,11 @@ msgstr "objet DEALLOCATE"
 
 #: fortran/resolve.c:7119 fortran/resolve.c:7122
 msgid "ALLOCATE object"
-msgstr "object ALLOCATE"
+msgstr "objet ALLOCATE"
 
 #: fortran/resolve.c:7351 fortran/resolve.c:8798
 msgid "STAT variable"
-msgstr "object STAT"
+msgstr "variable STAT"
 
 #: fortran/resolve.c:7395 fortran/resolve.c:8810
 msgid "ERRMSG variable"
@@ -5373,109 +5207,99 @@ msgstr "variable ACQUIRED_LOCK"
 #: fortran/trans-array.c:1428
 #, c-format
 msgid "Different CHARACTER lengths (%ld/%ld) in array constructor"
-msgstr ""
+msgstr "Longueurs de CHARACTER différentes (%ld/%ld) dans le constructeur de tableau"
 
 #: fortran/trans-array.c:5491
 msgid "Integer overflow when calculating the amount of memory to allocate"
-msgstr ""
+msgstr "Débordement d'entier en calculant la quantité de mémoire à allouer"
 
 #: fortran/trans-decl.c:5503
 #, c-format
 msgid "Actual string length does not match the declared one for dummy argument '%s' (%ld/%ld)"
-msgstr ""
+msgstr "La longueur réelle de la chaîne ne correspond pas à celle déclarée pour l'argument fictif « %s » (%ld/%ld)"
 
 #: fortran/trans-decl.c:5511
 #, c-format
 msgid "Actual string length is shorter than the declared one for dummy argument '%s' (%ld/%ld)"
-msgstr ""
+msgstr "La longueur réelle de la chaîne est plus courte que celle déclarée pour l'argument fictif « %s » (%ld/%ld)"
 
 #: fortran/trans-expr.c:8170
 #, c-format
 msgid "Target of rank remapping is too small (%ld < %ld)"
-msgstr ""
+msgstr "La cible du remappage de rang est trop petite (%ld < %ld)"
 
 #: fortran/trans-expr.c:9375
 msgid "Assignment of scalar to unallocated array"
-msgstr ""
+msgstr "Assignation d'un scalaire à un tableau non alloué"
 
 #: fortran/trans-intrinsic.c:897
 #, c-format
 msgid "Unequal character lengths (%ld/%ld) in %s"
-msgstr ""
+msgstr "Longueurs de caractères inégales (%ld/%ld) dans %s"
 
 #: fortran/trans-intrinsic.c:7002
 #, c-format
 msgid "Argument NCOPIES of REPEAT intrinsic is negative (its value is %ld)"
-msgstr ""
+msgstr "L'argument NCOPIES de l'intrinsèque REPEAT est négatif (sa valeur est %ld)"
 
 #: fortran/trans-intrinsic.c:7034
 msgid "Argument NCOPIES of REPEAT intrinsic is too large"
-msgstr ""
+msgstr "L'argument NCOPIES de l'intrinsèque REPEAT est trop grand"
 
 #: fortran/trans-io.c:560
-#, fuzzy
-#| msgid "Spurious number in FORMAT statement at %0"
 msgid "Unit number in I/O statement too small"
-msgstr "faux nombre dans la déclaration de FORMAT à %0"
+msgstr "Le numéro d'unité dans l'expression E/S est trop petit"
 
 #: fortran/trans-io.c:569
-#, fuzzy
-#| msgid "Spurious number in FORMAT statement at %0"
 msgid "Unit number in I/O statement too large"
-msgstr "faux nombre dans la déclaration de FORMAT à %0"
+msgstr "Le numéro d'unité dans l'expression E/S est trop grand"
 
 #: fortran/trans-stmt.c:156
-#, fuzzy
-#| msgid "mode in label is not discrete"
 msgid "Assigned label is not a target label"
-msgstr "mode dans l'étiquette n'est pas discret"
+msgstr "L'étiquette assignée n'est pas une étiquette cible"
 
 #: fortran/trans-stmt.c:1101
 #, c-format
 msgid "Invalid image number %d in SYNC IMAGES"
-msgstr ""
+msgstr "Numéro d'image %d invalide dans SYNC IMAGES"
 
 #: fortran/trans-stmt.c:1898 fortran/trans-stmt.c:2183
 msgid "Loop variable has been modified"
-msgstr ""
+msgstr "La variable de boucle a été modifiée"
 
 #: fortran/trans-stmt.c:2038
 msgid "DO step value is zero"
-msgstr ""
+msgstr "La valeur de pas de DO est zéro"
 
 #: fortran/trans.c:47
-#, fuzzy
-#| msgid "forming reference to void"
 msgid "Array reference out of bounds"
-msgstr "formation d'une référence en void"
+msgstr "Référence de tableau hors limites"
 
 #: fortran/trans.c:48
-#, fuzzy
-#| msgid "`noreturn' function returns non-void value"
 msgid "Incorrect function return value"
-msgstr "fonction marquée « noreturn » retourne une valeur n'étant pas de type « void »"
+msgstr "Valeur de retour de fonction invalide"
 
 #: fortran/trans.c:607
 msgid "Memory allocation failed"
-msgstr ""
+msgstr "L'allocation de mémoire a échoué"
 
 #: fortran/trans.c:688 fortran/trans.c:1527
 msgid "Allocation would exceed memory limit"
-msgstr ""
+msgstr "L'allocation dépasserait la limite de la mémoire"
 
 #: fortran/trans.c:858
 #, c-format
 msgid "Attempting to allocate already allocated variable '%s'"
-msgstr ""
+msgstr "Tentative d'allouer la variable « %s » qui est déjà allouée"
 
 #: fortran/trans.c:864
 msgid "Attempting to allocate already allocated variable"
-msgstr ""
+msgstr "Tentative d'allouer une variable déjà allouée"
 
 #: fortran/trans.c:1274 fortran/trans.c:1428
 #, c-format
 msgid "Attempt to DEALLOCATE unallocated '%s'"
-msgstr ""
+msgstr "Tentative d'utiliser DEALLOCATE sur « %s » qui n'est pas alloué"
 
 #. The remainder are real diagnostic types.
 #: fortran/gfc-diagnostic.def:33
@@ -5485,10 +5309,8 @@ msgstr "Erreur fatale"
 #. This is like DK_ICE, but backtrace is not printed.  Used in the driver
 #. when reporting fatal signal in the compiler.
 #: fortran/gfc-diagnostic.def:34 fortran/gfc-diagnostic.def:50
-#, fuzzy
-#| msgid "internal compiler error: "
 msgid "internal compiler error"
-msgstr "erreur interne du compilateur: "
+msgstr "erreur interne du compilateur"
 
 #: fortran/gfc-diagnostic.def:35
 msgid "Error"
@@ -5504,7 +5326,7 @@ msgstr "Avertissement"
 
 #: fortran/gfc-diagnostic.def:38
 msgid "anachronism"
-msgstr "Anachronisme"
+msgstr "anachronisme"
 
 #: fortran/gfc-diagnostic.def:39
 msgid "note"
@@ -5532,29 +5354,29 @@ msgstr "erreur"
 
 #: go/go-backend.c:171
 msgid "lseek failed while reading export data"
-msgstr ""
+msgstr "lseek a échoué en lisant les données d'export"
 
 #: go/go-backend.c:178
 msgid "memory allocation failed while reading export data"
-msgstr ""
+msgstr "l'allocation de mémoire a échoué en lisant les données d'export"
 
 #: go/go-backend.c:186
 msgid "read failed while reading export data"
-msgstr ""
+msgstr "la lecture a échoué en lisant les données d'export"
 
 #: go/go-backend.c:192
 msgid "short read while reading export data"
-msgstr ""
+msgstr "trop peu de données lues en lisant les données d'export"
 
 #: java/jcf-dump.c:1127
 #, c-format
 msgid "Not a valid Java .class file.\n"
-msgstr "n'est pas un fichier Java .class valide.\n"
+msgstr "N'est pas un fichier Java .class valide.\n"
 
 #: java/jcf-dump.c:1133
 #, c-format
 msgid "error while parsing constant pool\n"
-msgstr "error lors de l'analyse syntaxique du lots de constantes\n"
+msgstr "erreur lors de l'analyse syntaxique du lots de constantes\n"
 
 #: java/jcf-dump.c:1139 java/jcf-parse.c:1429
 #, gcc-internal-format, gfc-internal-format
@@ -5574,7 +5396,7 @@ msgstr "erreur lors de l'analyse syntaxique des méthodes\n"
 #: java/jcf-dump.c:1161
 #, c-format
 msgid "error while parsing final attributes\n"
-msgstr "erreur lors de l'analyse syntaxique des attributs\n"
+msgstr "erreur lors de l'analyse syntaxique des attributs finaux\n"
 
 #: java/jcf-dump.c:1198
 #, c-format
@@ -5596,63 +5418,58 @@ msgid ""
 "Display contents of a class file in readable form.\n"
 "\n"
 msgstr ""
+"Afficher le contenu d'un fichier de classe dans une forme lisible.\n"
+"\n"
 
 #: java/jcf-dump.c:1207
-#, fuzzy, c-format
-#| msgid "  -W                      Enable extra warnings\n"
+#, c-format
 msgid "  -c                      Disassemble method bodies\n"
-msgstr "  -c                      autoriser les avertissements additionnels\n"
+msgstr "  -c                      Désassembler les corps des méthodes\n"
 
 #: java/jcf-dump.c:1208
-#, fuzzy, c-format
-#| msgid "  --help                   Display this information\n"
+#, c-format
 msgid "  --javap                 Generate output in 'javap' format\n"
-msgstr "  --javap                 afficher l'aide mémoire\n"
+msgstr "  --javap                 Générer la sortie au format « javap »\n"
 
 #: java/jcf-dump.c:1210
 #, c-format
 msgid "  --classpath PATH        Set path to find .class files\n"
-msgstr ""
+msgstr "  --classpath CHEMIN      Fixer le chemin pour trouver les fichiers .class\n"
 
 #: java/jcf-dump.c:1211
-#, fuzzy, c-format
-#| msgid "  -B <directory>           Add <directory> to the compiler's search paths\n"
+#, c-format
 msgid "  -IDIR                   Append directory to class path\n"
-msgstr "  -B <répertoire>          ajouter le <répertoire> aux chemins de recherche du compilateur\n"
+msgstr "  -IDIR                   Ajouter le répertoire au chemin des classes\n"
 
 #: java/jcf-dump.c:1212
 #, c-format
 msgid "  --bootclasspath PATH    Override built-in class path\n"
-msgstr ""
+msgstr "  --bootclasspath CHEMIN  Écraser le chemin de classe intégré\n"
 
 #: java/jcf-dump.c:1213
 #, c-format
 msgid "  --extdirs PATH          Set extensions directory path\n"
-msgstr ""
+msgstr "  --extdirs CHEMIN        Fixer le chemin du répertoire des extensions\n"
 
 #: java/jcf-dump.c:1214
-#, fuzzy, c-format
-#| msgid "  -o <file>                Place the output into <file>\n"
+#, c-format
 msgid "  -o FILE                 Set output file name\n"
-msgstr "  -o FICHIER             placer la sortie dans le <fichier>\n"
+msgstr "  -o FICHIER              Choisi le nom du fichier de sortie\n"
 
 #: java/jcf-dump.c:1216
-#, fuzzy, c-format
-#| msgid "  -h, --help                      Print this help, then exit\n"
+#, c-format
 msgid "  --help                  Print this help, then exit\n"
-msgstr "  -h, --help               afficher l'aide mémoire\n"
+msgstr "  --help                  Afficher cette aide puis quitter\n"
 
 #: java/jcf-dump.c:1217
-#, fuzzy, c-format
-#| msgid "  -v, --version                   Print version number, then exit\n"
+#, c-format
 msgid "  --version               Print version number, then exit\n"
-msgstr "  -v, --version            exécuter le numéro de <version> de gcc, si installée\n"
+msgstr "  --version               Afficher le numéro de version puis quitter\n"
 
 #: java/jcf-dump.c:1218
-#, fuzzy, c-format
-#| msgid "  -v, --version                   Print version number, then exit\n"
+#, c-format
 msgid "  -v, --verbose           Print extra information while running\n"
-msgstr "  -v, --version            exécuter le numéro de <version> de gcc, si installée\n"
+msgstr "  -v, --verbose           Afficher des informations supplémentaires pendant l'exécution\n"
 
 #: java/jcf-dump.c:1220
 #, c-format
@@ -5660,13 +5477,13 @@ msgid ""
 "For bug reporting instructions, please see:\n"
 "%s.\n"
 msgstr ""
-"Pour les instructions de rapport des anomales, veuillez consulter :\n"
+"Pour les instructions de rapport des anomalies, veuillez consulter :\n"
 "%s.\n"
 
 #: java/jcf-dump.c:1258 java/jcf-dump.c:1326
 #, c-format
 msgid "jcf-dump: no classes specified\n"
-msgstr "jcf-dump : pas de classes spécifiées\n"
+msgstr "jcf-dump: pas de classes spécifiées\n"
 
 #: java/jcf-dump.c:1346
 #, c-format
@@ -5676,7 +5493,7 @@ msgstr "Impossible d'ouvrir « %s » pour écrire la sortie.\n"
 #: java/jcf-dump.c:1391
 #, c-format
 msgid "bad format of .zip/.jar archive\n"
-msgstr "format d'archive .zip/.jar incorrect\n"
+msgstr "mauvais format d'archive .zip/.jar\n"
 
 #: java/jcf-dump.c:1509
 #, c-format
@@ -5686,17 +5503,17 @@ msgstr "Mauvais codes d'octet.\n"
 #: java/jvgenmain.c:44
 #, c-format
 msgid "Usage: %s [OPTIONS]... CLASSNAMEmain [OUTFILE]\n"
-msgstr "Usage : %s [OPTION]… NOMCLASSprincipale [FICHIERSORTIE]\n"
+msgstr "Usage : %s [OPTIONS]… NOMCLASSprincipale [FICHIERSORTIE]\n"
 
 #: java/jvgenmain.c:117
 #, c-format
 msgid "%s: Cannot open output file: %s\n"
-msgstr "%s : impossible d'ouvrir le fichier : %s\n"
+msgstr "%s: Impossible d'ouvrir le fichier de sortie : %s\n"
 
 #: java/jvgenmain.c:167
 #, c-format
 msgid "%s: Failed to close output file %s\n"
-msgstr "%s : Impossible de fermer le fichier de sortie %s\n"
+msgstr "%s: Impossible de fermer le fichier de sortie %s\n"
 
 #: objc/objc-act.c:6179 cp/cxx-pretty-print.c:159
 #, gcc-internal-format
@@ -5717,11 +5534,11 @@ msgstr "-fuse-linker-plugin n'est pas supporté par cette configuration"
 
 #: gcc.c:976
 msgid "cannot specify -static with -fsanitize=address"
-msgstr "Impossible de spécifier -static avec -fsanitize=adresse"
+msgstr "impossible de spécifier -static avec -fsanitize=adresse"
 
 #: gcc.c:978
 msgid "cannot specify -static with -fsanitize=thread"
-msgstr ""
+msgstr "impossible de spécifier -static avec -fsanitize=thread"
 
 #: gcc.c:1122 ada/gcc-interface/lang-specs.h:33 java/jvspec.c:79
 msgid "-pg and -fomit-frame-pointer are incompatible"
@@ -5733,7 +5550,7 @@ msgstr "GNU C ne supporte plus -traditional sans -E"
 
 #: gcc.c:1307
 msgid "-E or -x required when input is from standard input"
-msgstr "-E ou -x est requis lorsque l'entrée est faite à partir de l'entrée standard"
+msgstr "-E ou -x est requis lorsque l'entrée vient de l'entrée standard"
 
 #: config/darwin.h:252
 msgid "-current_version only allowed with -dynamiclib"
@@ -5741,7 +5558,7 @@ msgstr "-current_version permis seulement avec -dynamiclib"
 
 #: config/darwin.h:254
 msgid "-install_name only allowed with -dynamiclib"
-msgstr "-install_name permis seulement avec with -dynamiclib"
+msgstr "-install_name permis seulement avec -dynamiclib"
 
 #: config/darwin.h:259
 msgid "-bundle not allowed with -dynamiclib"
@@ -5771,31 +5588,27 @@ msgstr "-private_bundle n'est pas permis avec -dynamiclib"
 #: config/i386/freebsd64.h:35 config/ia64/freebsd.h:26
 #: config/rs6000/sysv4.h:731 config/sparc/freebsd.h:45
 msgid "consider using '-pg' instead of '-p' with gprof(1)"
-msgstr ""
+msgstr "envisagez d'utiliser « -pg » au lieu de « -p » avec gprof(1)"
 
 #: config/lynx.h:69
 msgid "cannot use mthreads and mlegacy-threads together"
-msgstr ""
+msgstr "impossible d'utiliser simultanément mthreads et mlegacy-threads"
 
 #: config/lynx.h:94
 msgid "cannot use mshared and static together"
-msgstr ""
+msgstr "impossible d'utiliser simultanément mshared et static"
 
 #: config/sol2.h:181
-#, fuzzy
-#| msgid "-m%s not supported in this configuration"
 msgid "-fvtable-verify is not supported in this configuration"
-msgstr "-m%s n'est pas supporté par cette configuration"
+msgstr "-fvtable-verify n'est pas supporté par cette configuration"
 
 #: config/sol2.h:268 config/sol2.h:273
 msgid "does not support multilib"
 msgstr "ne supporte pas multilib"
 
 #: config/sol2.h:365
-#, fuzzy
-#| msgid "-gz is not supported in this configuration"
 msgid "-pie is not supported in this configuration"
-msgstr "-gz n'est pas supporté par cette configuration"
+msgstr "-pie n'est pas supporté par cette configuration"
 
 #: config/vxworks.h:70
 msgid "-Xbind-now and -Xbind-lazy are incompatible"
@@ -5803,11 +5616,11 @@ msgstr "-Xbind-now et -Xbind-lazy sont incompatibles"
 
 #: config/arc/arc.h:142 config/mips/mips.h:1358
 msgid "may not use both -EB and -EL"
-msgstr "ne peut utiliser ensemble -EB et -EL"
+msgstr "ne peut utiliser -EB et -EL ensemble"
 
 #: config/arm/arm.h:100
 msgid "-mfloat-abi=soft and -mfloat-abi=hard may not be used together"
-msgstr "-mfloat-abi=soft et -mfloat-abi=hard ne peuvent être utilisées ensembles"
+msgstr "-mfloat-abi=soft et -mfloat-abi=hard ne peuvent pas être utilisés ensemble"
 
 #: config/arm/arm.h:102 config/tilegx/tilegx.h:546 config/tilegx/tilegx.h:551
 msgid "-mbig-endian and -mlittle-endian may not be used together"
@@ -5815,21 +5628,19 @@ msgstr "-mbig-endian et -mlittle-endian ne peuvent être utilisés ensemble"
 
 #: config/arm/freebsd.h:49
 msgid "consider using `-pg' instead of `-p' with gprof (1) "
-msgstr ""
+msgstr "envisagez d'utiliser « -pg » au lieu de « -p » avec gprof (1)"
 
 #: config/avr/specs.h:68
 msgid "shared is not supported"
 msgstr "shared n'est pas supporté"
 
 #: config/bfin/elf.h:55
-#, fuzzy
-#| msgid "spec file has no spec for linking"
 msgid "no processor type specified for linking"
-msgstr "fichier de specs n'a pas de spécification pour l'édition de liens"
+msgstr "pas de type de processeur spécifié pour l'édition de liens"
 
 #: config/cris/cris.h:184
 msgid "do not specify both -march=... and -mcpu=..."
-msgstr "ne peut spécifier à la fois -march=… et -mcpu=…"
+msgstr "ne spécifiez pas à la fois -march=… et -mcpu=…"
 
 #: config/i386/cygwin-w64.h:64 config/i386/cygwin.h:130
 #: config/i386/mingw-w64.h:87 config/i386/mingw32.h:117
@@ -5838,27 +5649,23 @@ msgstr "shared et mdll ne sont pas compatibles"
 
 #: config/mcore/mcore.h:53
 msgid "the m210 does not have little endian support"
-msgstr "Le m210 ne supporte pas le code pour système à octets de poids faible"
+msgstr "le m210 ne supporte pas les systèmes à octets de poids faible"
 
 #: config/mips/r3900.h:37
 msgid "-mhard-float not supported"
 msgstr "-mhard-float n'est pas supporté"
 
 #: config/mips/r3900.h:39
-#, fuzzy
-#| msgid "-msingle-float and -msoft-float can not both be specified"
 msgid "-msingle-float and -msoft-float cannot both be specified"
-msgstr "-msingle-float et -msoft-float ne peuvent être spécifiés ensembles"
+msgstr "-msingle-float et -msoft-float ne peuvent être spécifiés ensemble"
 
 #: config/moxie/moxiebox.h:43
-#, fuzzy
-#| msgid "Generate code for little endian"
 msgid "this target is little-endian"
-msgstr "Générer du code pour un système à octets de poids faible"
+msgstr "cette cible est petit-boutien"
 
 #: config/nios2/elf.h:44
 msgid "You need a C startup file for -msys-crt0="
-msgstr ""
+msgstr "Vous avez besoin d'un fichier de démarrage C pour -msys-crt0="
 
 #: config/pa/pa-hpux10.h:102 config/pa/pa-hpux10.h:105
 #: config/pa/pa-hpux10.h:113 config/pa/pa-hpux10.h:116
@@ -5866,7 +5673,7 @@ msgstr ""
 #: config/pa/pa64-hpux.h:29 config/pa/pa64-hpux.h:32 config/pa/pa64-hpux.h:41
 #: config/pa/pa64-hpux.h:44
 msgid "warning: consider linking with '-static' as system libraries with"
-msgstr ""
+msgstr "attention: envisagez de lier avec « -static » car les bibliothèques système avec"
 
 #: config/pa/pa-hpux10.h:103 config/pa/pa-hpux10.h:106
 #: config/pa/pa-hpux10.h:114 config/pa/pa-hpux10.h:117
@@ -5874,15 +5681,15 @@ msgstr ""
 #: config/pa/pa64-hpux.h:30 config/pa/pa64-hpux.h:33 config/pa/pa64-hpux.h:42
 #: config/pa/pa64-hpux.h:45
 msgid "  profiling support are only provided in archive format"
-msgstr ""
+msgstr " support pour le profilage sont uniquement fournies dans ce format d'archive"
 
 #: config/rs6000/darwin.h:95
 msgid " conflicting code gen style switches are used"
-msgstr " code de génération de style de switches utilisées est en conflit"
+msgstr " les sélecteurs de style de génération de code utilisés sont en conflit"
 
 #: config/rs6000/freebsd64.h:171
 msgid "consider using `-pg' instead of `-p' with gprof(1)"
-msgstr ""
+msgstr "envisagez d'utiliser « -pg » au lieu de « -p » avec gprof(1)"
 
 #: config/rx/rx.h:80
 msgid "-mas100-syntax is incompatible with -gdwarf"
@@ -5890,37 +5697,33 @@ msgstr "-mas100-syntax incompatible avec -gdwarf"
 
 #: config/rx/rx.h:81
 msgid "rx100 cpu does not have FPU hardware"
-msgstr "le processeur rx100 n'a pas de d'unité matérielle FPU"
+msgstr "le processeur rx100 n'a pas de d'unité FPU matérielle"
 
 #: config/rx/rx.h:82
 msgid "rx200 cpu does not have FPU hardware"
-msgstr "le processeur rx200 n'a pas de d'unité matérielle FPU"
+msgstr "le processeur rx200 n'a pas de d'unité FPU matérielle"
 
 #: config/s390/tpf.h:110
 msgid "static is not supported on TPF-OS"
 msgstr "static n'est pas supporté sur TPF-OS"
 
 #: config/sh/sh.h:375 config/sh/sh.h:378
-#, fuzzy
-#| msgid "does not support multilib"
 msgid "SH2a does not support little-endian"
-msgstr "ne supporte pas multilib"
+msgstr "SH2a ne supporte pas les petits-boutistes"
 
 #: config/sparc/linux64.h:149 config/sparc/linux64.h:156
 #: config/sparc/netbsd-elf.h:108 config/sparc/netbsd-elf.h:117
 #: config/sparc/sol2.h:228 config/sparc/sol2.h:234
 msgid "may not use both -m32 and -m64"
-msgstr "ne peut utiliser ensemble -m32 et -m64"
+msgstr "ne peut utiliser -m32 et -m64 ensemble"
 
 #: config/vax/netbsd-elf.h:51
-#, fuzzy
-#| msgid "The -shared option is not currently supported for VAX ELF."
 msgid "the -shared option is not currently supported for VAX ELF"
-msgstr "L'option -shared n'est pas couramment supportée pour VAS ELF."
+msgstr "l'option -shared n'est actuellement pas supportée pour un ELF VAX."
 
 #: config/vax/vax.h:46 config/vax/vax.h:47
 msgid "profiling not supported with -mg"
-msgstr "profilage n'est pas supporté avec -mg"
+msgstr "le profilage n'est pas supporté avec -mg"
 
 #: ada/gcc-interface/lang-specs.h:34
 msgid "-c or -S required for Ada"
@@ -5948,23 +5751,23 @@ msgstr "-fjni et -femit-class-file sont incompatibles"
 
 #: java/lang-specs.h:34 java/lang-specs.h:35
 msgid "-femit-class-file should used along with -fsyntax-only"
-msgstr "-femit-class-file dervait être utilisé avec -fsyntax-only"
+msgstr "-femit-class-file devrait être utilisé avec -fsyntax-only"
 
 #: objc/lang-specs.h:30 objc/lang-specs.h:41
 msgid "GNU Objective C no longer supports traditional compilation"
-msgstr ""
+msgstr "GNU Objective C ne supporte plus la compilation traditionnelle"
 
 #: objc/lang-specs.h:55
 msgid "objc-cpp-output is deprecated; please use objective-c-cpp-output instead"
-msgstr ""
+msgstr "objc-cpp-output est déprécié; utilisez plutôt objective-c-cpp-output"
 
 #: objcp/lang-specs.h:58
 msgid "objc++-cpp-output is deprecated; please use objective-c++-cpp-output instead"
-msgstr ""
+msgstr "objc++-cpp-output est déprécié; utilisez plutôt objective-c++-cpp-output"
 
 #: fortran/lang.opt:146
 msgid "-J<directory>\tPut MODULE files in 'directory'."
-msgstr ""
+msgstr "-J<répertoire>\tPlace les fichiers de MODULE dans « répertoire »."
 
 #: fortran/lang.opt:198
 #, fuzzy
@@ -51220,13 +51023,13 @@ msgstr "attribut « %s » peut seulement être appliqué aux définitions de cla
 #, fuzzy, gcc-internal-format
 #| msgid "storage class `%D' applied to template instantiation"
 msgid "ignoring %qE attribute applied to template instantiation %qT"
-msgstr "classe de stockage « %D » appliqué à l'instantiation du patron"
+msgstr "classe de stockage « %D » appliqué à l'instanciation du patron"
 
 #: cp/tree.c:3815
 #, fuzzy, gcc-internal-format
 #| msgid "storage class `%D' applied to template instantiation"
 msgid "ignoring %qE attribute applied to template specialization %qT"
-msgstr "classe de stockage « %D » appliqué à l'instantiation du patron"
+msgstr "classe de stockage « %D » appliqué à l'instanciation du patron"
 
 #: cp/tree.c:3837
 #, fuzzy, gcc-internal-format
-- 
cgit v1.1


From 2d170acb265d0e9e97baf7c06f3efad981077b21 Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Tue, 6 Dec 2016 00:16:18 +0000
Subject: Daily bump.

From-SVN: r243277
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 1c033b9..1d0b8c53 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20161205
+20161206
-- 
cgit v1.1


From 202687fb7c65fa951f7ffd39c0a4a651ab753daf Mon Sep 17 00:00:00 2001
From: Michael Meissner <meissner@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2016 00:58:40 +0000
Subject: re PR target/78688 (PowerPC fails bootstrap)

2016-12-05  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/78688
	* config/rs6000/rs6000.h (FUNCTION_VALUE_REGNO_P): Use IN_RANGE
	instead of ((N) >= (X) && (N) <= (Y-X)) to silence warnings about
	comparing signed to unsigned values.
	(FUNCTION_ARG_REGNO_P): Likewise.

From-SVN: r243278
---
 gcc/ChangeLog              |  8 ++++++++
 gcc/config/rs6000/rs6000.h | 10 +++++-----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index beef921..672b604 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-05  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/78688
+	* config/rs6000/rs6000.h (FUNCTION_VALUE_REGNO_P): Use IN_RANGE
+	instead of ((N) >= (X) && (N) <= (Y-X)) to silence warnings about
+	comparing signed to unsigned values.
+	(FUNCTION_ARG_REGNO_P): Likewise.
+
 2016-12-05  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
 	    Stefan Freudenberger  <stefan@reservoir.com>
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index d1e36d9..5d56927 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1866,19 +1866,19 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    On RS/6000, this is r3, fp1, and v2 (for AltiVec).  */
 #define FUNCTION_VALUE_REGNO_P(N)					\
   ((N) == GP_ARG_RETURN							\
-   || ((N) >= FP_ARG_RETURN && (N) <= FP_ARG_MAX_RETURN			\
+   || (IN_RANGE ((N), FP_ARG_RETURN, FP_ARG_MAX_RETURN)			\
        && TARGET_HARD_FLOAT && TARGET_FPRS)				\
-   || ((N) >= ALTIVEC_ARG_RETURN && (N) <= ALTIVEC_ARG_MAX_RETURN	\
+   || (IN_RANGE ((N), ALTIVEC_ARG_RETURN, ALTIVEC_ARG_MAX_RETURN)	\
        && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI))
 
 /* 1 if N is a possible register number for function argument passing.
    On RS/6000, these are r3-r10 and fp1-fp13.
    On AltiVec, v2 - v13 are used for passing vectors.  */
 #define FUNCTION_ARG_REGNO_P(N)						\
-  ((unsigned) (N) - GP_ARG_MIN_REG < GP_ARG_NUM_REG			\
-   || ((unsigned) (N) - ALTIVEC_ARG_MIN_REG < ALTIVEC_ARG_NUM_REG	\
+  (IN_RANGE ((N), GP_ARG_MIN_REG, GP_ARG_MAX_REG)			\
+   || (IN_RANGE ((N), ALTIVEC_ARG_MIN_REG, ALTIVEC_ARG_MAX_REG)		\
        && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)				\
-   || ((unsigned) (N) - FP_ARG_MIN_REG < FP_ARG_NUM_REG			\
+   || (IN_RANGE ((N), FP_ARG_MIN_REG, FP_ARG_MAX_REG)			\
        && TARGET_HARD_FLOAT && TARGET_FPRS))
 
 /* Define a data type for recording info about an argument list
-- 
cgit v1.1


From 2207ff5dd1758a989e2af452eca89e0bb73da2a5 Mon Sep 17 00:00:00 2001
From: DJ Delorie <dj@redhat.com>
Date: Tue, 6 Dec 2016 01:38:23 -0500
Subject: * argv.c (expandargv): Check for directories passed as @-files.

From-SVN: r243280
---
 libiberty/ChangeLog |  4 ++++
 libiberty/argv.c    | 19 +++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/libiberty/ChangeLog b/libiberty/ChangeLog
index f0959c9..e4e765a 100644
--- a/libiberty/ChangeLog
+++ b/libiberty/ChangeLog
@@ -1,3 +1,7 @@
+2016-11-30  DJ Delorie  <dj@redhat.com>
+
+	* argv.c (expandargv): Check for directories passed as @-files.
+
 2016-11-30  David Malcolm  <dmalcolm@redhat.com>
 
 	PR c/78498
diff --git a/libiberty/argv.c b/libiberty/argv.c
index 994dd35..fd43e78 100644
--- a/libiberty/argv.c
+++ b/libiberty/argv.c
@@ -35,6 +35,13 @@ Boston, MA 02110-1301, USA.  */
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
+#include <sys/types.h>
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#if HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
 
 #ifndef NULL
 #define NULL 0
@@ -387,6 +394,9 @@ expandargv (int *argcp, char ***argvp)
       char **file_argv;
       /* The number of options read from the response file, if any.  */
       size_t file_argc;
+#ifdef S_ISDIR
+      struct stat sb;
+#endif
       /* We are only interested in options of the form "@file".  */
       filename = (*argvp)[i];
       if (filename[0] != '@')
@@ -397,6 +407,15 @@ expandargv (int *argcp, char ***argvp)
 	  fprintf (stderr, "%s: error: too many @-files encountered\n", (*argvp)[0]);
 	  xexit (1);
 	}
+#ifdef S_ISDIR
+      if (stat (filename+1, &sb) < 0)
+	continue;
+      if (S_ISDIR(sb.st_mode))
+	{
+	  fprintf (stderr, "%s: error: @-file refers to a directory\n", (*argvp)[0]);
+	  xexit (1);
+	}
+#endif
       /* Read the contents of the file.  */
       f = fopen (++filename, "r");
       if (!f)
-- 
cgit v1.1


From 1d2ccddf2ea8b648b8520c7a844d6259b0f31ff7 Mon Sep 17 00:00:00 2001
From: DJ Delorie <dj@gcc.gnu.org>
Date: Tue, 6 Dec 2016 01:40:07 -0500
Subject: Oops, fix date

From-SVN: r243281
---
 libiberty/ChangeLog | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libiberty/ChangeLog b/libiberty/ChangeLog
index e4e765a..1e9e706 100644
--- a/libiberty/ChangeLog
+++ b/libiberty/ChangeLog
@@ -1,4 +1,4 @@
-2016-11-30  DJ Delorie  <dj@redhat.com>
+2016-12-06  DJ Delorie  <dj@redhat.com>
 
 	* argv.c (expandargv): Check for directories passed as @-files.
 
-- 
cgit v1.1


From d7ae3739a200ea1c90ca20afbebfc627ee32cf49 Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Tue, 6 Dec 2016 07:03:04 +0000
Subject: re PR middle-end/78642 (invalid rtl sharing found in the insn)

	PR middle-end/78642
	* emit-rtl.c (verify_rtx_sharing) <CLOBBER>: Relax condition.
	(copy_rtx_if_shared_1) <CLOBBER>: Likewise.
	(copy_insn_1) <CLOBBER>: Likewise.

From-SVN: r243282
---
 gcc/ChangeLog  |  7 +++++++
 gcc/emit-rtl.c | 15 +++++++++------
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 672b604..38e86cf 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-06  Eric Botcazou  <ebotcazou@adacore.com>
+
+	PR middle-end/78642
+	* emit-rtl.c (verify_rtx_sharing) <CLOBBER>: Relax condition.
+	(copy_rtx_if_shared_1) <CLOBBER>: Likewise.
+	(copy_insn_1) <CLOBBER>: Likewise.
+
 2016-12-05  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/78688
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 4650540..5201bd0 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -2718,8 +2718,9 @@ verify_rtx_sharing (rtx orig, rtx insn)
       /* Share clobbers of hard registers (like cc0), but do not share pseudo reg
          clobbers or clobbers of hard registers that originated as pseudos.
          This is needed to allow safe register renaming.  */
-      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
-	  && ORIGINAL_REGNO (XEXP (x, 0)) == REGNO (XEXP (x, 0)))
+      if (REG_P (XEXP (x, 0))
+	  && HARD_REGISTER_NUM_P (REGNO (XEXP (x, 0)))
+	  && HARD_REGISTER_NUM_P (ORIGINAL_REGNO (XEXP (x, 0))))
 	return;
       break;
 
@@ -2970,8 +2971,9 @@ repeat:
       /* Share clobbers of hard registers (like cc0), but do not share pseudo reg
          clobbers or clobbers of hard registers that originated as pseudos.
          This is needed to allow safe register renaming.  */
-      if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
-	  && ORIGINAL_REGNO (XEXP (x, 0)) == REGNO (XEXP (x, 0)))
+      if (REG_P (XEXP (x, 0))
+	  && HARD_REGISTER_NUM_P (REGNO (XEXP (x, 0)))
+	  && HARD_REGISTER_NUM_P (ORIGINAL_REGNO (XEXP (x, 0))))
 	return;
       break;
 
@@ -5521,8 +5523,9 @@ copy_insn_1 (rtx orig)
       /* Share clobbers of hard registers (like cc0), but do not share pseudo reg
          clobbers or clobbers of hard registers that originated as pseudos.
          This is needed to allow safe register renaming.  */
-      if (REG_P (XEXP (orig, 0)) && REGNO (XEXP (orig, 0)) < FIRST_PSEUDO_REGISTER
-	  && ORIGINAL_REGNO (XEXP (orig, 0)) == REGNO (XEXP (orig, 0)))
+      if (REG_P (XEXP (orig, 0))
+	  && HARD_REGISTER_NUM_P (REGNO (XEXP (orig, 0)))
+	  && HARD_REGISTER_NUM_P (ORIGINAL_REGNO (XEXP (orig, 0))))
 	return orig;
       break;
 
-- 
cgit v1.1


From 0d3ce69b79ab7d7ea4a2fc4ed5e983ea6efcfa69 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 6 Dec 2016 10:21:13 +0100
Subject: re PR tree-optimization/78675 (ICE: verify_gimple failed (error:
 integral result type precision does not match field size of BIT_FIELD_REF))

2016-12-06  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/78675
	* tree-vect-loop.c (vectorizable_live_operation): For
	VECTOR_BOOLEAN_TYPE_P vectype use integral type with bitsize precision
	instead of TREE_TYPE (vectype) for the BIT_FIELD_REF.

	* gcc.c-torture/execute/pr78675.c: New test.
	* gcc.target/i386/pr78675-1.c: New test.
	* gcc.target/i386/pr78675-2.c: New test.

From-SVN: r243283
---
 gcc/ChangeLog                                 |  7 +++++
 gcc/testsuite/ChangeLog                       |  7 +++++
 gcc/testsuite/gcc.c-torture/execute/pr78675.c | 38 +++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr78675-1.c     |  5 ++++
 gcc/testsuite/gcc.target/i386/pr78675-2.c     | 15 +++++++++++
 gcc/tree-vect-loop.c                          |  6 +++--
 6 files changed, 76 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr78675.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr78675-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr78675-2.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 38e86cf..4e70e27 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-06  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/78675
+	* tree-vect-loop.c (vectorizable_live_operation): For
+	VECTOR_BOOLEAN_TYPE_P vectype use integral type with bitsize precision
+	instead of TREE_TYPE (vectype) for the BIT_FIELD_REF.
+
 2016-12-06  Eric Botcazou  <ebotcazou@adacore.com>
 
 	PR middle-end/78642
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 3b0a8fa..35f10fe 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-06  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/78675
+	* gcc.c-torture/execute/pr78675.c: New test.
+	* gcc.target/i386/pr78675-1.c: New test.
+	* gcc.target/i386/pr78675-2.c: New test.
+
 2016-12-05  Andrew Senkevich  <andrew.senkevich@intel.com>
 
 	* gcc.target/i386/avx512bw-kandd-1.c: New.
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr78675.c b/gcc/testsuite/gcc.c-torture/execute/pr78675.c
new file mode 100644
index 0000000..7cef342
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr78675.c
@@ -0,0 +1,38 @@
+/* PR tree-optimization/78675 */
+
+long int a;
+
+__attribute__((noinline, noclone)) long int
+foo (long int x)
+{
+  long int b;
+  while (a < 1)
+    {
+      b = a && x;
+      ++a;
+    }
+  return b;
+}
+
+int
+main ()
+{
+  if (foo (0) != 0)
+    __builtin_abort ();
+  a = 0;
+  if (foo (1) != 0)
+    __builtin_abort ();
+  a = 0;
+  if (foo (25) != 0)
+    __builtin_abort ();
+  a = -64;
+  if (foo (0) != 0)
+    __builtin_abort ();
+  a = -64;
+  if (foo (1) != 0)
+    __builtin_abort ();
+  a = -64;
+  if (foo (25) != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr78675-1.c b/gcc/testsuite/gcc.target/i386/pr78675-1.c
new file mode 100644
index 0000000..68435b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr78675-1.c
@@ -0,0 +1,5 @@
+/* PR tree-optimization/78675 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512f" } */
+
+#include "../../gcc.c-torture/execute/pr78675.c"
diff --git a/gcc/testsuite/gcc.target/i386/pr78675-2.c b/gcc/testsuite/gcc.target/i386/pr78675-2.c
new file mode 100644
index 0000000..8f5ef87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr78675-2.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O3 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+#define main do_main
+
+#include "../../gcc.c-torture/execute/pr78675.c"
+
+static void
+avx512f_test (void)
+{
+  do_main ();
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 4150b0d..6e8b89c 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -6601,8 +6601,10 @@ vectorizable_live_operation (gimple *stmt,
   /* Create a new vectorized stmt for the uses of STMT and insert outside the
      loop.  */
   gimple_seq stmts = NULL;
-  tree new_tree = build3 (BIT_FIELD_REF, TREE_TYPE (vectype), vec_lhs, bitsize,
-			  bitstart);
+  tree bftype = TREE_TYPE (vectype);
+  if (VECTOR_BOOLEAN_TYPE_P (vectype))
+    bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
+  tree new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart);
   new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts,
 				   true, NULL_TREE);
   if (stmts)
-- 
cgit v1.1


From bf2d0849a360376182b796042fedaa018b87d605 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 6 Dec 2016 10:22:36 +0100
Subject: re PR c++/71537 (GCC rejects consetxpr boolean conversions and
 comparisons on the result of pointer arithmetic.)

	PR c++/71537
	* fold-const-call.c (fold_const_call): Handle
	CFN_BUILT_IN_{INDEX,STRCHR,RINDEX,STRRCHR}.

	* g++.dg/cpp0x/constexpr-strchr.C: New test.

From-SVN: r243284
---
 gcc/ChangeLog                                 |  4 ++++
 gcc/fold-const-call.c                         | 25 +++++++++++++++++++++++++
 gcc/testsuite/ChangeLog                       |  3 +++
 gcc/testsuite/g++.dg/cpp0x/constexpr-strchr.C | 27 +++++++++++++++++++++++++++
 4 files changed, 59 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-strchr.C

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4e70e27..029dbdc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,9 @@
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
+	PR c++/71537
+	* fold-const-call.c (fold_const_call): Handle
+	CFN_BUILT_IN_{INDEX,STRCHR,RINDEX,STRRCHR}.
+
 	PR tree-optimization/78675
 	* tree-vect-loop.c (vectorizable_live_operation): For
 	VECTOR_BOOLEAN_TYPE_P vectype use integral type with bitsize precision
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
index e2d0eaf..439988d 100644
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@@ -1383,6 +1383,7 @@ tree
 fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1)
 {
   const char *p0, *p1;
+  char c;
   switch (fn)
     {
     case CFN_BUILT_IN_STRSPN:
@@ -1409,6 +1410,30 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1)
 	}
       return NULL_TREE;
 
+    case CFN_BUILT_IN_INDEX:
+    case CFN_BUILT_IN_STRCHR:
+      if ((p0 = c_getstr (arg0)) && target_char_cst_p (arg1, &c))
+	{
+	  const char *r = strchr (p0, c);
+	  if (r == NULL)
+	    return build_int_cst (type, 0);
+	  return fold_convert (type,
+			       fold_build_pointer_plus_hwi (arg0, r - p0));
+	}
+      return NULL_TREE;
+
+    case CFN_BUILT_IN_RINDEX:
+    case CFN_BUILT_IN_STRRCHR:
+      if ((p0 = c_getstr (arg0)) && target_char_cst_p (arg1, &c))
+	{
+	  const char *r = strrchr (p0, c);
+	  if (r == NULL)
+	    return build_int_cst (type, 0);
+	  return fold_convert (type,
+			       fold_build_pointer_plus_hwi (arg0, r - p0));
+	}
+      return NULL_TREE;
+
     default:
       return fold_const_call_1 (fn, type, arg0, arg1);
     }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 35f10fe..e6dfcdc 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
+	PR c++/71537
+	* g++.dg/cpp0x/constexpr-strchr.C: New test.
+
 	PR tree-optimization/78675
 	* gcc.c-torture/execute/pr78675.c: New test.
 	* gcc.target/i386/pr78675-1.c: New test.
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-strchr.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-strchr.C
new file mode 100644
index 0000000..c44d8a2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-strchr.C
@@ -0,0 +1,27 @@
+// { dg-do compile { target c++11 } }
+
+constexpr const char *f1 (const char *p, int q) { return __builtin_strchr (p, q); }
+constexpr const char *f2 (const char *p, int q) { return __builtin_index (p, q); }
+constexpr const char *f3 (const char *p, int q) { return __builtin_strrchr (p, q); }
+constexpr const char *f4 (const char *p, int q) { return __builtin_rindex (p, q); }
+constexpr const char a[] = "abcdefedcba";
+static_assert (f1 ("abcde", 'f') == nullptr, "");
+static_assert (f1 (a, 'g') == nullptr, "");
+static_assert (f1 (a, 'f') == a + 5, "");
+static_assert (f1 (a, 'c') == a + 2, "");
+static_assert (f1 (a, '\0') == a + 11, "");
+static_assert (f2 ("abcde", 'f') == nullptr, "");
+static_assert (f2 (a, 'g') == nullptr, "");
+static_assert (f2 (a, 'f') == a + 5, "");
+static_assert (f2 (a, 'c') == a + 2, "");
+static_assert (f2 (a, '\0') == a + 11, "");
+static_assert (f3 ("abcde", 'f') == nullptr, "");
+static_assert (f3 (a, 'g') == nullptr, "");
+static_assert (f3 (a, 'f') == a + 5, "");
+static_assert (f3 (a, 'c') == a + 8, "");
+static_assert (f3 (a, '\0') == a + 11, "");
+static_assert (f4 ("abcde", 'f') == nullptr, "");
+static_assert (f4 (a, 'g') == nullptr, "");
+static_assert (f4 (a, 'f') == a + 5, "");
+static_assert (f4 (a, 'c') == a + 8, "");
+static_assert (f4 (a, '\0') == a + 11, "");
-- 
cgit v1.1


From 66972191ee05c5dd2d02e5d062fe1c942e42fcd7 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 6 Dec 2016 10:23:51 +0100
Subject: re PR c++/71537 (GCC rejects consetxpr boolean conversions and
 comparisons on the result of pointer arithmetic.)

2016-12-06  Jakub Jelinek  <jakub@redhat.com>

	PR c++/71537
	* fold-const-call.c (fold_const_call_1): Remove memchr handling here.
	(fold_const_call) <case CFN_BUILT_IN_STRNCMP,
	case CFN_BUILT_IN_STRNCASECMP>: Formatting improvements.
	(fold_const_call) <case CFN_BUILT_IN_MEMCMP>: Likewise.  If s2 is 0
	and arguments have no side-effects, return 0.
	(fold_const_call): Handle CFN_BUILT_IN_MEMCHR.

	* g++.dg/cpp0x/constexpr-memchr.C: New test.

From-SVN: r243285
---
 gcc/ChangeLog                                 |   8 +++
 gcc/fold-const-call.c                         | 100 ++++++++++++--------------
 gcc/testsuite/ChangeLog                       |   3 +
 gcc/testsuite/g++.dg/cpp0x/constexpr-memchr.C |  24 +++++++
 4 files changed, 81 insertions(+), 54 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-memchr.C

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 029dbdc..9ed8f5f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,14 @@
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/71537
+	* fold-const-call.c (fold_const_call_1): Remove memchr handling here.
+	(fold_const_call) <case CFN_BUILT_IN_STRNCMP,
+	case CFN_BUILT_IN_STRNCASECMP>: Formatting improvements.
+	(fold_const_call) <case CFN_BUILT_IN_MEMCMP>: Likewise.  If s2 is 0
+	and arguments have no side-effects, return 0.
+	(fold_const_call): Handle CFN_BUILT_IN_MEMCHR.
+
+	PR c++/71537
 	* fold-const-call.c (fold_const_call): Handle
 	CFN_BUILT_IN_{INDEX,STRCHR,RINDEX,STRRCHR}.
 
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
index 439988d..c85fb41 100644
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@@ -1491,36 +1491,6 @@ fold_const_call_1 (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2)
       return NULL_TREE;
     }
 
-  switch (fn)
-    {
-    case CFN_BUILT_IN_MEMCHR:
-      {
-	char c;
-	if (integer_zerop (arg2)
-	    && !TREE_SIDE_EFFECTS (arg0)
-	    && !TREE_SIDE_EFFECTS (arg1))
-	  return build_int_cst (type, 0);
-
-	if (!tree_fits_uhwi_p (arg2) || !target_char_cst_p (arg1, &c))
-	  return NULL_TREE;
-
-	unsigned HOST_WIDE_INT length = tree_to_uhwi (arg2);
-	unsigned HOST_WIDE_INT string_length;
-	const char *p1 = c_getstr (arg0, &string_length);
-	if (p1)
-	  {
-	    const char *r
-	      = (const char *)memchr (p1, c, MIN (length, string_length));
-	    if (r == NULL && length <= string_length)
-	      return build_int_cst (type, 0);
-	  }
-
-	break;
-      }
-    default:
-      break;
-    }
-
   return NULL_TREE;
 }
 
@@ -1531,47 +1501,69 @@ tree
 fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1, tree arg2)
 {
   const char *p0, *p1;
+  char c;
   unsigned HOST_WIDE_INT s0, s1;
   size_t s2 = 0;
   switch (fn)
     {
     case CFN_BUILT_IN_STRNCMP:
-      {
-	bool const_size_p = host_size_t_cst_p (arg2, &s2);
-	if (const_size_p && s2 == 0
-	    && !TREE_SIDE_EFFECTS (arg0)
-	    && !TREE_SIDE_EFFECTS (arg1))
-	  return build_int_cst (type, 0);
-	else if (const_size_p
-		 && (p0 = c_getstr (arg0))
-		 && (p1 = c_getstr (arg1)))
-	  return build_int_cst (type, strncmp (p0, p1, s2));
+      if (!host_size_t_cst_p (arg2, &s2))
 	return NULL_TREE;
-      }
+      if (s2 == 0
+	  && !TREE_SIDE_EFFECTS (arg0)
+	  && !TREE_SIDE_EFFECTS (arg1))
+	return build_int_cst (type, 0);
+      else if ((p0 = c_getstr (arg0)) && (p1 = c_getstr (arg1)))
+	return build_int_cst (type, strncmp (p0, p1, s2));
+      return NULL_TREE;
+
     case CFN_BUILT_IN_STRNCASECMP:
-      {
-	bool const_size_p = host_size_t_cst_p (arg2, &s2);
-	if (const_size_p && s2 == 0
-	    && !TREE_SIDE_EFFECTS (arg0)
-	    && !TREE_SIDE_EFFECTS (arg1))
-	  return build_int_cst (type, 0);
-	else if (const_size_p
-		 && (p0 = c_getstr (arg0))
-		 && (p1 = c_getstr (arg1))
-		 && strncmp (p0, p1, s2) == 0)
-	  return build_int_cst (type, 0);
+      if (!host_size_t_cst_p (arg2, &s2))
 	return NULL_TREE;
-      }
+      if (s2 == 0
+	  && !TREE_SIDE_EFFECTS (arg0)
+	  && !TREE_SIDE_EFFECTS (arg1))
+	return build_int_cst (type, 0);
+      else if ((p0 = c_getstr (arg0))
+	       && (p1 = c_getstr (arg1))
+	       && strncmp (p0, p1, s2) == 0)
+	return build_int_cst (type, 0);
+      return NULL_TREE;
+
     case CFN_BUILT_IN_BCMP:
     case CFN_BUILT_IN_MEMCMP:
+      if (!host_size_t_cst_p (arg2, &s2))
+	return NULL_TREE;
+      if (s2 == 0
+	  && !TREE_SIDE_EFFECTS (arg0)
+	  && !TREE_SIDE_EFFECTS (arg1))
+	return build_int_cst (type, 0);
       if ((p0 = c_getstr (arg0, &s0))
 	  && (p1 = c_getstr (arg1, &s1))
-	  && host_size_t_cst_p (arg2, &s2)
 	  && s2 <= s0
 	  && s2 <= s1)
 	return build_cmp_result (type, memcmp (p0, p1, s2));
       return NULL_TREE;
 
+    case CFN_BUILT_IN_MEMCHR:
+      if (!host_size_t_cst_p (arg2, &s2))
+	return NULL_TREE;
+      if (s2 == 0
+	  && !TREE_SIDE_EFFECTS (arg0)
+	  && !TREE_SIDE_EFFECTS (arg1))
+	return build_int_cst (type, 0);
+      if ((p0 = c_getstr (arg0, &s0))
+	  && s2 <= s0
+	  && target_char_cst_p (arg1, &c))
+	{
+	  const char *r = (const char *) memchr (p0, c, s2);
+	  if (r == NULL)
+	    return build_int_cst (type, 0);
+	  return fold_convert (type,
+			       fold_build_pointer_plus_hwi (arg0, r - p0));
+	}
+      return NULL_TREE;
+
     default:
       return fold_const_call_1 (fn, type, arg0, arg1, arg2);
     }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index e6dfcdc..b44993b 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,9 @@
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/71537
+	* g++.dg/cpp0x/constexpr-memchr.C: New test.
+
+	PR c++/71537
 	* g++.dg/cpp0x/constexpr-strchr.C: New test.
 
 	PR tree-optimization/78675
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-memchr.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-memchr.C
new file mode 100644
index 0000000..e5c07f1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-memchr.C
@@ -0,0 +1,24 @@
+// { dg-do compile { target c++11 } }
+
+typedef decltype (sizeof (0)) size_t;
+constexpr const void *f1 (const char *p, int q) { return __builtin_memchr (p, q, __builtin_strlen (p) + 1); }
+constexpr const void *f2 (const char *p, int q, size_t r) { return __builtin_memchr (p, q, r); }
+constexpr const char a[] = "abcdefedcba";
+static_assert (f1 ("abcde", 'f') == nullptr, "");
+static_assert (f1 (a, 'g') == nullptr, "");
+static_assert (f1 (a, 'f') == a + 5, "");
+static_assert (f1 (a, 'c') == a + 2, "");
+static_assert (f1 (a, '\0') == a + 11, "");
+static_assert (f2 ("abcde", 'f', 6) == nullptr, "");
+static_assert (f2 ("abcde", 'f', 1) == nullptr, "");
+static_assert (f2 ("abcde", 'f', 0) == nullptr, "");
+static_assert (f2 (a, 'g', 7) == nullptr, "");
+static_assert (f2 (a, 'g', 0) == nullptr, "");
+static_assert (f2 (a, 'f', 6) == a + 5, "");
+static_assert (f2 (a, 'f', 5) == nullptr, "");
+static_assert (f2 (a, 'c', 12) == a + 2, "");
+static_assert (f2 (a, 'c', 3) == a + 2, "");
+static_assert (f2 (a, 'c', 2) == nullptr, "");
+static_assert (f2 (a, '\0', 12) == a + 11, "");
+static_assert (f2 (a, '\0', 11) == nullptr, "");
+static_assert (f2 (a, '\0', 0) == nullptr, "");
-- 
cgit v1.1


From 0f57194bfccc05e07b48e8ccd60d3653c6d65c9a Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Tue, 6 Dec 2016 10:24:36 +0100
Subject: re PR c++/71537 (GCC rejects consetxpr boolean conversions and
 comparisons on the result of pointer arithmetic.)

	PR c++/71537
	* fold-const.c (fold_comparison): Assume CONSTANT_CLASS_P (base0)
	plus offset is non-zero.  For maybe_nonzero_address decl base0,
	require indirect_base0.

	* g++.dg/cpp0x/constexpr-71537.C: New test.

From-SVN: r243286
---
 gcc/ChangeLog                                |  5 +++++
 gcc/fold-const.c                             | 18 ++++++++++--------
 gcc/testsuite/ChangeLog                      |  3 +++
 gcc/testsuite/g++.dg/cpp0x/constexpr-71537.C | 18 ++++++++++++++++++
 4 files changed, 36 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-71537.C

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 9ed8f5f..aef86e3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,6 +1,11 @@
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/71537
+	* fold-const.c (fold_comparison): Assume CONSTANT_CLASS_P (base0)
+	plus offset is non-zero.  For maybe_nonzero_address decl base0,
+	require indirect_base0.
+
+	PR c++/71537
 	* fold-const-call.c (fold_const_call_1): Remove memchr handling here.
 	(fold_const_call) <case CFN_BUILT_IN_STRNCMP,
 	case CFN_BUILT_IN_STRNCASECMP>: Formatting improvements.
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 6517188..c649e54 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -8419,14 +8419,16 @@ fold_comparison (location_t loc, enum tree_code code, tree type,
 	 below follow the C++ rules with the additional property that
 	 every object pointer compares greater than a null pointer.
       */
-      else if (DECL_P (base0)
-	       && maybe_nonzero_address (base0) > 0
-	       /* Avoid folding references to struct members at offset 0 to
-		  prevent tests like '&ptr->firstmember == 0' from getting
-		  eliminated.  When ptr is null, although the -> expression
-		  is strictly speaking invalid, GCC retains it as a matter
-		  of QoI.  See PR c/44555. */
-	       && (offset0 == NULL_TREE && bitpos0 != 0)
+      else if (((DECL_P (base0)
+		 && maybe_nonzero_address (base0) > 0
+		 /* Avoid folding references to struct members at offset 0 to
+		    prevent tests like '&ptr->firstmember == 0' from getting
+		    eliminated.  When ptr is null, although the -> expression
+		    is strictly speaking invalid, GCC retains it as a matter
+		    of QoI.  See PR c/44555. */
+		 && (offset0 == NULL_TREE && bitpos0 != 0))
+		|| CONSTANT_CLASS_P (base0))
+	       && indirect_base0
 	       /* The caller guarantees that when one of the arguments is
 		  constant (i.e., null in this case) it is second.  */
 	       && integer_zerop (arg1))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b44993b..7aa429f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,6 +1,9 @@
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/71537
+	* g++.dg/cpp0x/constexpr-71537.C: New test.
+
+	PR c++/71537
 	* g++.dg/cpp0x/constexpr-memchr.C: New test.
 
 	PR c++/71537
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-71537.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-71537.C
new file mode 100644
index 0000000..3d5ac34
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-71537.C
@@ -0,0 +1,18 @@
+// PR c++/71537
+// { dg-do compile { target c++11 } }
+
+constexpr int n[42] = {1};
+constexpr int x1 = n ? 1 : 0;
+constexpr int x2 = n + 1 ? 1 : 0;
+constexpr int x3 = "abc" ? 1 : 0;
+constexpr int x4 = "abc" + 1 ? 1 : 0;
+constexpr bool x5 = "abc" + 1;
+constexpr bool x6 = "abc" + 4;
+constexpr bool x7 = n + 42;
+static_assert (x1 == 1, "");
+static_assert (x2 == 1, "");
+static_assert (x3 == 1, "");
+static_assert (x4 == 1, "");
+static_assert (x5, "");
+static_assert (x6, "");
+static_assert (x7, "");
-- 
cgit v1.1


From 99b7a28da867b09d219a71cd5c29eff45c6effa4 Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Tue, 6 Dec 2016 09:56:52 +0000
Subject: 2016-12-06  Tamar Christina  <tamar.christina@arm.com>

	* gcc/config/aarch64/arm_neon.h
	(vreinterpretq_p8_p128, vreinterpretq_p16_p128): Added.
	(vreinterpret_p64_p16, vreinterpretq_p64_p128): Likewise.
	(vreinterpretq_p64_p16, vreinterpretq_p128_p8): Likewise.
	(vreinterpretq_p128_p16, vreinterpretq_p128_f16): Likewise.
	(vreinterpretq_p128_f32, vreinterpretq_p128_p64): Likewise.
	(vreinterpretq_p128_s64, vreinterpretq_p128_u64): Likewise.
	(vreinterpretq_p128_s8, vreinterpretq_p128_s16): Likewise.
	(vreinterpretq_p128_s32, vreinterpretq_p128_u8): Likewise.
	(vreinterpretq_p128_u16, vreinterpretq_p128_u32): Likewise.
	(vreinterpretq_f16_p128, vreinterpretq_f32_p128): Likewise.
	(vreinterpretq_s64_p128, vreinterpretq_u64_p128): Likewise.
	(vreinterpretq_s8_p128, vreinterpretq_s16_p128): Likewise.
	(vreinterpretq_s32_p128, vreinterpretq_u8_p128): Likewise.
	(vreinterpretq_u16_p128, vreinterpretq_u32_p128): Likewise.

From-SVN: r243287
---
 gcc/ChangeLog                 |  18 ++++
 gcc/config/aarch64/arm_neon.h | 197 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 215 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index aef86e3..bd74061 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,21 @@
+2016-12-06  Tamar Christina  <tamar.christina@arm.com>
+
+	* gcc/config/aarch64/arm_neon.h
+	(vreinterpretq_p8_p128, vreinterpretq_p16_p128): Added.
+	(vreinterpret_p64_p16, vreinterpretq_p64_p128): Likewise.
+	(vreinterpretq_p64_p16, vreinterpretq_p128_p8): Likewise.
+	(vreinterpretq_p128_p16, vreinterpretq_p128_f16): Likewise.
+	(vreinterpretq_p128_f32, vreinterpretq_p128_p64): Likewise.
+	(vreinterpretq_p128_s64, vreinterpretq_p128_u64): Likewise.
+	(vreinterpretq_p128_s8, vreinterpretq_p128_s16): Likewise.
+	(vreinterpretq_p128_s32, vreinterpretq_p128_u8): Likewise.
+	(vreinterpretq_p128_u16, vreinterpretq_p128_u32): Likewise.
+	(vreinterpretq_f16_p128, vreinterpretq_f32_p128): Likewise.
+	(vreinterpretq_s64_p128, vreinterpretq_u64_p128): Likewise.
+	(vreinterpretq_s8_p128, vreinterpretq_s16_p128): Likewise.
+	(vreinterpretq_s32_p128, vreinterpretq_u8_p128): Likewise.
+	(vreinterpretq_u16_p128, vreinterpretq_u32_p128): Likewise.
+
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/71537
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 7d4d2a2..b846644 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -3506,6 +3506,13 @@ vreinterpretq_p8_p64 (poly64x2_t __a)
   return (poly8x16_t) __a;
 }
 
+__extension__ extern __inline poly8x16_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p8_p128 (poly128_t __a)
+{
+  return (poly8x16_t)__a;
+}
+
 __extension__ extern __inline poly16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_p16_f16 (float16x4_t __a)
@@ -3688,6 +3695,13 @@ vreinterpretq_p16_p64 (poly64x2_t __a)
   return (poly16x8_t) __a;
 }
 
+__extension__ extern __inline poly16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p16_p128 (poly128_t __a)
+{
+  return (poly16x8_t)__a;
+}
+
 __extension__ extern __inline poly64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_p64_f16 (float16x4_t __a)
@@ -3772,6 +3786,13 @@ vreinterpret_p64_p8 (poly8x8_t __a)
   return (poly64x1_t) __a;
 }
 
+__extension__ extern __inline poly64x1_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpret_p64_p16 (poly16x4_t __a)
+{
+  return (poly64x1_t)__a;
+}
+
 __extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpretq_p64_f64 (float64x2_t __a)
@@ -3823,6 +3844,13 @@ vreinterpretq_p64_f32 (float32x4_t __a)
 
 __extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_p128 (poly128_t __a)
+{
+  return (poly64x2_t)__a;
+}
+
+__extension__ extern __inline poly64x2_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpretq_p64_u8 (uint8x16_t __a)
 {
   return (poly64x2_t) __a;
@@ -3836,6 +3864,13 @@ vreinterpretq_p64_u16 (uint16x8_t __a)
 }
 
 __extension__ extern __inline poly64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p64_p16 (poly16x8_t __a)
+{
+  return (poly64x2_t)__a;
+}
+
+__extension__ extern __inline poly64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpretq_p64_u32 (uint32x4_t __a)
 {
@@ -3856,6 +3891,97 @@ vreinterpretq_p64_p8 (poly8x16_t __a)
   return (poly64x2_t) __a;
 }
 
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_p8 (poly8x16_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_p16 (poly16x8_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f16 (float16x8_t __a)
+{
+  return (poly128_t) __a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_f32 (float32x4_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_p64 (poly64x2_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s64 (int64x2_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u64 (uint64x2_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s8 (int8x16_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s16 (int16x8_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_s32 (int32x4_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u8 (uint8x16_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u16 (uint16x8_t __a)
+{
+  return (poly128_t)__a;
+}
+
+__extension__ extern __inline poly128_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_p128_u32 (uint32x4_t __a)
+{
+  return (poly128_t)__a;
+}
+
 __extension__ extern __inline float16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_f16_f64 (float64x1_t __a)
@@ -4025,6 +4151,13 @@ vreinterpretq_f16_p8 (poly8x16_t __a)
 }
 
 __extension__ extern __inline float16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f16_p128 (poly128_t __a)
+{
+  return (float16x8_t) __a;
+}
+
+__extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpretq_f16_p16 (poly16x8_t __a)
 {
@@ -4220,6 +4353,14 @@ vreinterpretq_f32_p64 (poly64x2_t __a)
   return (float32x4_t) __a;
 }
 
+__extension__ extern __inline float32x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_f32_p128 (poly128_t __a)
+{
+  return (float32x4_t)__a;
+}
+
+
 __extension__ extern __inline float64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_f64_f16 (float16x4_t __a)
@@ -4584,6 +4725,13 @@ vreinterpretq_s64_p64 (poly64x2_t __a)
   return (int64x2_t) __a;
 }
 
+__extension__ extern __inline int64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s64_p128 (poly128_t __a)
+{
+  return (int64x2_t)__a;
+}
+
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_u64_f16 (float16x4_t __a)
@@ -4766,6 +4914,13 @@ vreinterpretq_u64_p64 (poly64x2_t __a)
   return (uint64x2_t) __a;
 }
 
+__extension__ extern __inline uint64x2_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u64_p128 (poly128_t __a)
+{
+  return (uint64x2_t)__a;
+}
+
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_s8_f16 (float16x4_t __a)
@@ -4948,6 +5103,13 @@ vreinterpretq_s8_p64 (poly64x2_t __a)
   return (int8x16_t) __a;
 }
 
+__extension__ extern __inline int8x16_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s8_p128 (poly128_t __a)
+{
+  return (int8x16_t)__a;
+}
+
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_s16_f16 (float16x4_t __a)
@@ -5130,6 +5292,13 @@ vreinterpretq_s16_p64 (poly64x2_t __a)
   return (int16x8_t) __a;
 }
 
+__extension__ extern __inline int16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s16_p128 (poly128_t __a)
+{
+  return (int16x8_t)__a;
+}
+
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_s32_f16 (float16x4_t __a)
@@ -5312,6 +5481,13 @@ vreinterpretq_s32_p64 (poly64x2_t __a)
   return (int32x4_t) __a;
 }
 
+__extension__ extern __inline int32x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_s32_p128 (poly128_t __a)
+{
+  return (int32x4_t)__a;
+}
+
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_u8_f16 (float16x4_t __a)
@@ -5494,6 +5670,13 @@ vreinterpretq_u8_p64 (poly64x2_t __a)
   return (uint8x16_t) __a;
 }
 
+__extension__ extern __inline uint8x16_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u8_p128 (poly128_t __a)
+{
+  return (uint8x16_t)__a;
+}
+
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_u16_f16 (float16x4_t __a)
@@ -5676,6 +5859,13 @@ vreinterpretq_u16_p64 (poly64x2_t __a)
   return (uint16x8_t) __a;
 }
 
+__extension__ extern __inline uint16x8_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u16_p128 (poly128_t __a)
+{
+  return (uint16x8_t)__a;
+}
+
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vreinterpret_u32_f16 (float16x4_t __a)
@@ -5858,6 +6048,13 @@ vreinterpretq_u32_p64 (poly64x2_t __a)
   return (uint32x4_t) __a;
 }
 
+__extension__ extern __inline uint32x4_t
+__attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
+vreinterpretq_u32_p128 (poly128_t __a)
+{
+  return (uint32x4_t)__a;
+}
+
 /* vset_lane  */
 
 __extension__ extern __inline float16x4_t
-- 
cgit v1.1


From 95ac78ce0efbc5d8396a9effa99e25e4ca50d8a9 Mon Sep 17 00:00:00 2001
From: Aldy Hernandez <aldyh@redhat.com>
Date: Tue, 6 Dec 2016 10:33:41 +0000
Subject: re PR middle-end/78566 (gcc.dg/uninit-pred-6_[abc]*.c regressions on
 some non-x86 platforms)

	PR middle-end/78566
	* tree-ssa-uninit.c (can_one_predicate_be_invalidated_p): Change
	argument type to a pred_chain.
	(can_chain_union_be_invalidated_p): Use pred_chain instead of a
	worklist.
	(flatten_out_predicate_chains): Remove.
	(uninit_uses_cannot_happen): Rename from
	uninit_ops_invalidate_phi_use.
	Change logic so that we are checking that the PHI use will
	invalidate _ALL_ possibly uninitialized operands.
	(is_use_properly_guarded): Rename call to
	uninit_ops_invalidate_phi_use into uninit_uses_cannot_happen.

From-SVN: r243288
---
 gcc/ChangeLog         |  15 +++++
 gcc/tree-ssa-uninit.c | 160 +++++++++++++++++---------------------------------
 2 files changed, 68 insertions(+), 107 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bd74061..8141125 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2016-12-06  Aldy Hernandez  <aldyh@redhat.com>
+
+	PR middle-end/78566
+	* tree-ssa-uninit.c (can_one_predicate_be_invalidated_p): Change
+	argument type to a pred_chain.
+	(can_chain_union_be_invalidated_p): Use pred_chain instead of a
+	worklist.
+	(flatten_out_predicate_chains): Remove.
+	(uninit_uses_cannot_happen): Rename from
+	uninit_ops_invalidate_phi_use.
+	Change logic so that we are checking that the PHI use will
+	invalidate _ALL_ possibly uninitialized operands.
+	(is_use_properly_guarded): Rename call to
+	uninit_ops_invalidate_phi_use into uninit_uses_cannot_happen.
+
 2016-12-06  Tamar Christina  <tamar.christina@arm.com>
 
 	* gcc/config/aarch64/arm_neon.h
diff --git a/gcc/tree-ssa-uninit.c b/gcc/tree-ssa-uninit.c
index 4557403..a648995 100644
--- a/gcc/tree-ssa-uninit.c
+++ b/gcc/tree-ssa-uninit.c
@@ -2155,115 +2155,66 @@ normalize_preds (pred_chain_union preds, gimple *use_or_def, bool is_use)
 
 static bool
 can_one_predicate_be_invalidated_p (pred_info predicate,
-				    vec<pred_info *> worklist)
+				    pred_chain use_guard)
 {
-  for (size_t i = 0; i < worklist.length (); ++i)
+  for (size_t i = 0; i < use_guard.length (); ++i)
     {
-      pred_info *p = worklist[i];
-
       /* NOTE: This is a very simple check, and only understands an
 	 exact opposite.  So, [i == 0] is currently only invalidated
 	 by [.NOT. i == 0] or [i != 0].  Ideally we should also
 	 invalidate with say [i > 5] or [i == 8].  There is certainly
 	 room for improvement here.  */
-      if (pred_neg_p (predicate, *p))
+      if (pred_neg_p (predicate, use_guard[i]))
 	return true;
     }
   return false;
 }
 
-/* Return TRUE if all USE_PREDS can be invalidated by some predicate
-   in WORKLIST.  */
+/* Return TRUE if all predicates in UNINIT_PRED are invalidated by
+   USE_GUARD being true.  */
 
 static bool
-can_chain_union_be_invalidated_p (pred_chain_union use_preds,
-				  vec<pred_info *> worklist)
+can_chain_union_be_invalidated_p (pred_chain_union uninit_pred,
+				  pred_chain use_guard)
 {
-  /* Remember:
-       PRED_CHAIN_UNION = PRED_CHAIN1 || PRED_CHAIN2 || PRED_CHAIN3
-       PRED_CHAIN = PRED_INFO1 && PRED_INFO2 && PRED_INFO3, etc.
-
-       We need to invalidate the entire PRED_CHAIN_UNION, which means,
-       invalidating every PRED_CHAIN in this union.  But to invalidate
-       an individual PRED_CHAIN, all we need to invalidate is _any_ one
-       PRED_INFO, by boolean algebra !PRED_INFO1 || !PRED_INFO2...  */
-  for (size_t i = 0; i < use_preds.length (); ++i)
+  if (uninit_pred.is_empty ())
+    return false;
+  for (size_t i = 0; i < uninit_pred.length (); ++i)
     {
-      pred_chain c = use_preds[i];
-      bool entire_pred_chain_invalidated = false;
+      pred_chain c = uninit_pred[i];
       for (size_t j = 0; j < c.length (); ++j)
-	if (can_one_predicate_be_invalidated_p (c[j], worklist))
-	  {
-	    entire_pred_chain_invalidated = true;
-	    break;
-	  }
-      if (!entire_pred_chain_invalidated)
-	return false;
+	if (!can_one_predicate_be_invalidated_p (c[j], use_guard))
+	  return false;
     }
   return true;
 }
 
-/* Flatten out all the factors in all the pred_chain_union's in PREDS
-   into a WORKLIST of individual PRED_INFO's.
+/* Return TRUE if none of the uninitialized operands in UNINT_OPNDS
+   can actually happen if we arrived at a use for PHI.
 
-   N is the number of pred_chain_union's in PREDS.
+   PHI_USE_GUARDS are the guard conditions for the use of the PHI.  */
 
-   Since we are interested in the inverse of the PRED_CHAIN's, by
-   boolean algebra, an inverse turns those PRED_CHAINS into unions,
-   which means we can flatten all the factors out for easy access.  */
-
-static void
-flatten_out_predicate_chains (pred_chain_union preds[], size_t n,
-			      vec<pred_info *> *worklist)
+static bool
+uninit_uses_cannot_happen (gphi *phi, unsigned uninit_opnds,
+			   pred_chain_union phi_use_guards)
 {
-  for (size_t i = 0; i < n; ++i)
-    {
-      pred_chain_union u = preds[i];
-      for (size_t j = 0; j < u.length (); ++j)
-	{
-	  pred_chain c = u[j];
-	  for (size_t k = 0; k < c.length (); ++k)
-	    worklist->safe_push (&c[k]);
-	}
-    }
-}
-
-/* Return TRUE if executing the path to some uninitialized operands in
-   a PHI will invalidate the use of the PHI result later on.
-
-   UNINIT_OPNDS is a bit vector specifying which PHI arguments have
-   arguments which are considered uninitialized.
-
-   USE_PREDS is the pred_chain_union specifying the guard conditions
-   for the use of the PHI result.
-
-   What we want to do is disprove each of the guards in the factors of
-   the USE_PREDS.  So if we have:
-
-   # USE_PREDS guards of:
-   #	1. i > 5 && i < 100
-   #	2. j > 10 && j < 88
-
-   Then proving that the control dependenies for the UNINIT_OPNDS are:
-
-   #      [i <= 5]
-   # .OR. [i >= 100]
-   #
+  unsigned phi_args = gimple_phi_num_args (phi);
+  if (phi_args > max_phi_args)
+    return false;
 
-   ...we can prove that the 1st guard above in USE_PREDS is invalid.
-   Similarly for the 2nd guard.  We return TRUE if we can disprove
-   both of the guards in USE_PREDS above.  */
+  /* PHI_USE_GUARDS are OR'ed together.  If we have more than one
+     possible guard, there's no way of knowing which guard was true.
+     Since we need to be absolutely sure that the uninitialized
+     operands will be invalidated, bail.  */
+  if (phi_use_guards.length () != 1)
+    return false;
 
-static bool
-uninit_ops_invalidate_phi_use (gphi *phi, unsigned uninit_opnds,
-			       pred_chain_union use_preds)
-{
   /* Look for the control dependencies of all the uninitialized
-     operands and build predicates describing them.  */
+     operands and build guard predicates describing them.  */
   unsigned i;
   pred_chain_union uninit_preds[max_phi_args];
-  memset (uninit_preds, 0, sizeof (pred_chain_union) * max_phi_args);
-  for (i = 0; i < MIN (max_phi_args, gimple_phi_num_args (phi)); i++)
+  memset (uninit_preds, 0, sizeof (pred_chain_union) * phi_args);
+  for (i = 0; i < phi_args; ++i)
     {
       if (!MASK_TEST_BIT (uninit_opnds, i))
 	continue;
@@ -2274,32 +2225,27 @@ uninit_ops_invalidate_phi_use (gphi *phi, unsigned uninit_opnds,
       size_t num_chains = 0;
       int num_calls = 0;
 
-      /* Build the control dependency chain for `i'...  */
-      if (compute_control_dep_chain (find_dom (e->src),
-				     e->src,
-				     dep_chains,
-				     &num_chains,
-				     &cur_chain,
-				     &num_calls))
-	{
-	  /* ...and convert it into a set of predicates.  */
-	  convert_control_dep_chain_into_preds (dep_chains, num_chains,
-						&uninit_preds[i]);
-	  for (size_t j = 0; j < num_chains; ++j)
-	    dep_chains[j].release ();
-	  simplify_preds (&uninit_preds[i], NULL, false);
-	  uninit_preds[i]
-	    = normalize_preds (uninit_preds[i], NULL, false);
-	}
+      /* Build the control dependency chain for uninit operand `i'...  */
+      if (!compute_control_dep_chain (find_dom (e->src),
+				      e->src, dep_chains, &num_chains,
+				      &cur_chain, &num_calls))
+	return false;
+      /* ...and convert it into a set of predicates.  */
+      convert_control_dep_chain_into_preds (dep_chains, num_chains,
+					    &uninit_preds[i]);
+      for (size_t j = 0; j < num_chains; ++j)
+	dep_chains[j].release ();
+      simplify_preds (&uninit_preds[i], NULL, false);
+      uninit_preds[i]
+	= normalize_preds (uninit_preds[i], NULL, false);
+
+      /* Can the guard for this uninitialized operand be invalidated
+	 by the PHI use?  */
+      if (!can_chain_union_be_invalidated_p (uninit_preds[i],
+					     phi_use_guards[0]))
+	return false;
     }
-
-  /* Munge all the predicates into one worklist, and see if we can
-     invalidate all the chains in USE_PREDs with the predicates in
-     WORKLIST.  */
-  auto_vec<pred_info *> worklist;
-  flatten_out_predicate_chains (uninit_preds, i, &worklist);
-  bool ret = can_chain_union_be_invalidated_p (use_preds, worklist);
-  return ret;
+  return true;
 }
 
 /* Computes the predicates that guard the use and checks
@@ -2361,8 +2307,8 @@ is_use_properly_guarded (gimple *use_stmt,
      for UNINIT_OPNDS are true, that the control dependencies for
      USE_STMT can never be true.  */
   if (!is_properly_guarded)
-    is_properly_guarded |= uninit_ops_invalidate_phi_use (phi, uninit_opnds,
-							  preds);
+    is_properly_guarded |= uninit_uses_cannot_happen (phi, uninit_opnds,
+						      preds);
 
   if (is_properly_guarded)
     {
-- 
cgit v1.1


From 3703d0958bac9f21b75d91bc1e87c922d86434a3 Mon Sep 17 00:00:00 2001
From: Aldy Hernandez <aldyh@redhat.com>
Date: Tue, 6 Dec 2016 10:35:37 +0000
Subject: re PR middle-end/78548 (ICE on valid C code on x86_64-linux-gnu at
 -O2 and -O3 in 64-bit mode with -Wall (*** Error in
 `/usr/local/gcc-trunk/libexec/gcc/x86_64-pc-linux-gnu/7.0.0/cc1': double free
 or corruption (fasttop): 0x0000000003c15810 ***))

	PR middle-end/78548
	* tree-ssa-uninit.c (simplify_preds_4): Call release() instead of
	destroy_predicate_vecs.
	(uninit_uses_cannot_happen): Make uninit_preds a scalar.

From-SVN: r243289
---
 gcc/ChangeLog                         |  7 +++++++
 gcc/testsuite/gcc.dg/uninit-pr78548.c | 24 ++++++++++++++++++++++++
 gcc/tree-ssa-uninit.c                 | 33 +++++++++++++++++++--------------
 3 files changed, 50 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/uninit-pr78548.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8141125..3c842b6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,12 @@
 2016-12-06  Aldy Hernandez  <aldyh@redhat.com>
 
+	PR middle-end/78548
+	* tree-ssa-uninit.c (simplify_preds_4): Call release() instead of
+	destroy_predicate_vecs.
+	(uninit_uses_cannot_happen): Make uninit_preds a scalar.
+
+2016-12-06  Aldy Hernandez  <aldyh@redhat.com>
+
 	PR middle-end/78566
 	* tree-ssa-uninit.c (can_one_predicate_be_invalidated_p): Change
 	argument type to a pred_chain.
diff --git a/gcc/testsuite/gcc.dg/uninit-pr78548.c b/gcc/testsuite/gcc.dg/uninit-pr78548.c
new file mode 100644
index 0000000..12e06dd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/uninit-pr78548.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-Wall -w -O2" } */
+
+char a;
+int b;
+unsigned c, d;
+short e;
+int main_f;
+int main (  ) {
+L0:
+    if ( e )     goto L1;
+    b = c & d || a;
+    if ( !c )     printf ( "", ( long long ) main_f );
+    if ( d || !c )     {
+        printf ( "%llu\n", ( long long ) main );
+        goto L2;
+    }
+    unsigned g = b;
+L1:
+    b = g;
+L2:
+    if ( b )     goto L0;
+  return 0;
+}
diff --git a/gcc/tree-ssa-uninit.c b/gcc/tree-ssa-uninit.c
index a648995..b4892c7 100644
--- a/gcc/tree-ssa-uninit.c
+++ b/gcc/tree-ssa-uninit.c
@@ -1774,7 +1774,7 @@ simplify_preds_4 (pred_chain_union *preds)
 	  s_preds.safe_push ((*preds)[i]);
 	}
 
-      destroy_predicate_vecs (preds);
+      preds->release ();
       (*preds) = s_preds;
       s_preds = vNULL;
     }
@@ -2211,10 +2211,9 @@ uninit_uses_cannot_happen (gphi *phi, unsigned uninit_opnds,
 
   /* Look for the control dependencies of all the uninitialized
      operands and build guard predicates describing them.  */
-  unsigned i;
-  pred_chain_union uninit_preds[max_phi_args];
-  memset (uninit_preds, 0, sizeof (pred_chain_union) * phi_args);
-  for (i = 0; i < phi_args; ++i)
+  pred_chain_union uninit_preds;
+  bool ret = true;
+  for (unsigned i = 0; i < phi_args; ++i)
     {
       if (!MASK_TEST_BIT (uninit_opnds, i))
 	continue;
@@ -2226,26 +2225,32 @@ uninit_uses_cannot_happen (gphi *phi, unsigned uninit_opnds,
       int num_calls = 0;
 
       /* Build the control dependency chain for uninit operand `i'...  */
+      uninit_preds = vNULL;
       if (!compute_control_dep_chain (find_dom (e->src),
 				      e->src, dep_chains, &num_chains,
 				      &cur_chain, &num_calls))
-	return false;
+	{
+	  ret = false;
+	  break;
+	}
       /* ...and convert it into a set of predicates.  */
       convert_control_dep_chain_into_preds (dep_chains, num_chains,
-					    &uninit_preds[i]);
+					    &uninit_preds);
       for (size_t j = 0; j < num_chains; ++j)
 	dep_chains[j].release ();
-      simplify_preds (&uninit_preds[i], NULL, false);
-      uninit_preds[i]
-	= normalize_preds (uninit_preds[i], NULL, false);
+      simplify_preds (&uninit_preds, NULL, false);
+      uninit_preds = normalize_preds (uninit_preds, NULL, false);
 
       /* Can the guard for this uninitialized operand be invalidated
 	 by the PHI use?  */
-      if (!can_chain_union_be_invalidated_p (uninit_preds[i],
-					     phi_use_guards[0]))
-	return false;
+      if (!can_chain_union_be_invalidated_p (uninit_preds, phi_use_guards[0]))
+	{
+	  ret = false;
+	  break;
+	}
     }
-  return true;
+  destroy_predicate_vecs (&uninit_preds);
+  return ret;
 }
 
 /* Computes the predicates that guard the use and checks
-- 
cgit v1.1


From 39a0325104227d48df4d49dcb47f38b44e8da732 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Tue, 6 Dec 2016 10:43:42 +0000
Subject: Add noexcept to various basic_string string operations

	* include/bits/basic_string.h (basic_string::find, basic_string::rfind)
	(basic_string::find_first_of, basic_string::find_last_of)
	(basic_string::find_first_not_of, basic_string::find_last_not_of):
	Make all overloads noexcept.
	(basic_string::compare(const _CharT*)): Make noexcept.

From-SVN: r243290
---
 libstdc++-v3/ChangeLog                     |  8 +++
 libstdc++-v3/include/bits/basic_string.h   | 82 ++++++++++++++++++------------
 libstdc++-v3/include/bits/basic_string.tcc |  8 ++-
 3 files changed, 64 insertions(+), 34 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 451b6ee..1b52efe 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-06  Jonathan Wakely  <jwakely@redhat.com>
+
+	* include/bits/basic_string.h (basic_string::find, basic_string::rfind)
+	(basic_string::find_first_of, basic_string::find_last_of)
+	(basic_string::find_first_not_of, basic_string::find_last_not_of):
+	Make all overloads noexcept.
+	(basic_string::compare(const _CharT*)): Make noexcept.
+
 2016-12-03  John David Anglin  <danglin@gcc.gnu.org>
 
 	* config/abi/post/hppa-linux-gnu/baseline_symbols.txt: Regenerate.
diff --git a/libstdc++-v3/include/bits/basic_string.h b/libstdc++-v3/include/bits/basic_string.h
index 9af7bfb..1e096ec 100644
--- a/libstdc++-v3/include/bits/basic_string.h
+++ b/libstdc++-v3/include/bits/basic_string.h
@@ -2251,7 +2251,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        *  npos.
       */
       size_type
-      find(const _CharT* __s, size_type __pos, size_type __n) const;
+      find(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find position of a string.
@@ -2265,7 +2266,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find(const basic_string& __str, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find(__str.data(), __pos, __str.size()); }
 
 #if __cplusplus > 201402L
@@ -2291,7 +2292,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        *  it begins.  If not found, returns npos.
       */
       size_type
-      find(const _CharT* __s, size_type __pos = 0) const
+      find(const _CharT* __s, size_type __pos = 0) const _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find(__s, __pos, traits_type::length(__s));
@@ -2322,7 +2323,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       rfind(const basic_string& __str, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->rfind(__str.data(), __pos, __str.size()); }
 
 #if __cplusplus > 201402L
@@ -2350,7 +2351,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        *  npos.
       */
       size_type
-      rfind(const _CharT* __s, size_type __pos, size_type __n) const;
+      rfind(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find last position of a C string.
@@ -2395,7 +2397,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_first_of(const basic_string& __str, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_first_of(__str.data(), __pos, __str.size()); }
 
 #if __cplusplus > 201402L
@@ -2423,7 +2425,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        *  returns npos.
       */
       size_type
-      find_first_of(const _CharT* __s, size_type __pos, size_type __n) const;
+      find_first_of(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find position of a character of C string.
@@ -2437,6 +2440,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_first_of(const _CharT* __s, size_type __pos = 0) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_first_of(__s, __pos, traits_type::length(__s));
@@ -2471,7 +2475,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_last_of(const basic_string& __str, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_last_of(__str.data(), __pos, __str.size()); }
 
 #if __cplusplus > 201402L
@@ -2499,7 +2503,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        *  returns npos.
       */
       size_type
-      find_last_of(const _CharT* __s, size_type __pos, size_type __n) const;
+      find_last_of(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find last position of a character of C string.
@@ -2513,6 +2518,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_last_of(const _CharT* __s, size_type __pos = npos) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_last_of(__s, __pos, traits_type::length(__s));
@@ -2546,7 +2552,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(const basic_string& __str, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_first_not_of(__str.data(), __pos, __str.size()); }
 
 #if __cplusplus > 201402L
@@ -2575,7 +2581,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(const _CharT* __s, size_type __pos,
-			size_type __n) const;
+			size_type __n) const _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find position of a character not in C string.
@@ -2589,6 +2595,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(const _CharT* __s, size_type __pos = 0) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_first_not_of(__s, __pos, traits_type::length(__s));
@@ -2606,7 +2613,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(_CharT __c, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT;
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find last position of a character not in string.
@@ -2621,7 +2628,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(const basic_string& __str, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_last_not_of(__str.data(), __pos, __str.size()); }
 
 #if __cplusplus > 201402L
@@ -2650,7 +2657,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(const _CharT* __s, size_type __pos,
-		       size_type __n) const;
+		       size_type __n) const _GLIBCXX_NOEXCEPT;
       /**
        *  @brief  Find last position of a character not in C string.
        *  @param __s  C string containing characters to avoid.
@@ -2664,6 +2671,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(const _CharT* __s, size_type __pos = npos) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_last_not_of(__s, __pos, traits_type::length(__s));
@@ -2681,7 +2689,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(_CharT __c, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT;
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Get a substring.
@@ -2841,7 +2849,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
        *  ordered first.
       */
       int
-      compare(const _CharT* __s) const;
+      compare(const _CharT* __s) const _GLIBCXX_NOEXCEPT;
 
       // _GLIBCXX_RESOLVE_LIB_DEFECTS
       // 5 String::compare specification questionable
@@ -4787,7 +4795,8 @@ _GLIBCXX_END_NAMESPACE_CXX11
        *  npos.
       */
       size_type
-      find(const _CharT* __s, size_type __pos, size_type __n) const;
+      find(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find position of a string.
@@ -4801,7 +4810,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find(const basic_string& __str, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find(__str.data(), __pos, __str.size()); }
 
       /**
@@ -4815,7 +4824,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
        *  it begins.  If not found, returns npos.
       */
       size_type
-      find(const _CharT* __s, size_type __pos = 0) const
+      find(const _CharT* __s, size_type __pos = 0) const _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find(__s, __pos, traits_type::length(__s));
@@ -4846,7 +4855,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       rfind(const basic_string& __str, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->rfind(__str.data(), __pos, __str.size()); }
 
       /**
@@ -4862,7 +4871,8 @@ _GLIBCXX_END_NAMESPACE_CXX11
        *  npos.
       */
       size_type
-      rfind(const _CharT* __s, size_type __pos, size_type __n) const;
+      rfind(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find last position of a C string.
@@ -4875,7 +4885,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
        *  where it begins.  If not found, returns npos.
       */
       size_type
-      rfind(const _CharT* __s, size_type __pos = npos) const
+      rfind(const _CharT* __s, size_type __pos = npos) const _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->rfind(__s, __pos, traits_type::length(__s));
@@ -4907,7 +4917,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_first_of(const basic_string& __str, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_first_of(__str.data(), __pos, __str.size()); }
 
       /**
@@ -4923,7 +4933,8 @@ _GLIBCXX_END_NAMESPACE_CXX11
        *  returns npos.
       */
       size_type
-      find_first_of(const _CharT* __s, size_type __pos, size_type __n) const;
+      find_first_of(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find position of a character of C string.
@@ -4937,6 +4948,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_first_of(const _CharT* __s, size_type __pos = 0) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_first_of(__s, __pos, traits_type::length(__s));
@@ -4971,7 +4983,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_last_of(const basic_string& __str, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_last_of(__str.data(), __pos, __str.size()); }
 
       /**
@@ -4987,7 +4999,8 @@ _GLIBCXX_END_NAMESPACE_CXX11
        *  returns npos.
       */
       size_type
-      find_last_of(const _CharT* __s, size_type __pos, size_type __n) const;
+      find_last_of(const _CharT* __s, size_type __pos, size_type __n) const
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find last position of a character of C string.
@@ -5001,6 +5014,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_last_of(const _CharT* __s, size_type __pos = npos) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_last_of(__s, __pos, traits_type::length(__s));
@@ -5034,7 +5048,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(const basic_string& __str, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_first_not_of(__str.data(), __pos, __str.size()); }
 
       /**
@@ -5051,7 +5065,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(const _CharT* __s, size_type __pos,
-			size_type __n) const;
+			size_type __n) const _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find position of a character not in C string.
@@ -5065,6 +5079,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(const _CharT* __s, size_type __pos = 0) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_first_not_of(__s, __pos, traits_type::length(__s));
@@ -5082,7 +5097,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_first_not_of(_CharT __c, size_type __pos = 0) const
-	_GLIBCXX_NOEXCEPT;
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Find last position of a character not in string.
@@ -5097,7 +5112,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(const basic_string& __str, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT
+      _GLIBCXX_NOEXCEPT
       { return this->find_last_not_of(__str.data(), __pos, __str.size()); }
 
       /**
@@ -5114,7 +5129,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(const _CharT* __s, size_type __pos,
-		       size_type __n) const;
+		       size_type __n) const _GLIBCXX_NOEXCEPT;
       /**
        *  @brief  Find last position of a character not in C string.
        *  @param __s  C string containing characters to avoid.
@@ -5128,6 +5143,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(const _CharT* __s, size_type __pos = npos) const
+      _GLIBCXX_NOEXCEPT
       {
 	__glibcxx_requires_string(__s);
 	return this->find_last_not_of(__s, __pos, traits_type::length(__s));
@@ -5145,7 +5161,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
       */
       size_type
       find_last_not_of(_CharT __c, size_type __pos = npos) const
-	_GLIBCXX_NOEXCEPT;
+      _GLIBCXX_NOEXCEPT;
 
       /**
        *  @brief  Get a substring.
@@ -5255,7 +5271,7 @@ _GLIBCXX_END_NAMESPACE_CXX11
        *  ordered first.
       */
       int
-      compare(const _CharT* __s) const;
+      compare(const _CharT* __s) const _GLIBCXX_NOEXCEPT;
 
       // _GLIBCXX_RESOLVE_LIB_DEFECTS
       // 5 String::compare specification questionable
diff --git a/libstdc++-v3/include/bits/basic_string.tcc b/libstdc++-v3/include/bits/basic_string.tcc
index df1e8dd..21238d6 100644
--- a/libstdc++-v3/include/bits/basic_string.tcc
+++ b/libstdc++-v3/include/bits/basic_string.tcc
@@ -1186,6 +1186,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
     basic_string<_CharT, _Traits, _Alloc>::
     find(const _CharT* __s, size_type __pos, size_type __n) const
+    _GLIBCXX_NOEXCEPT
     {
       __glibcxx_requires_string_len(__s, __n);
       const size_type __size = this->size();
@@ -1227,6 +1228,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
     basic_string<_CharT, _Traits, _Alloc>::
     rfind(const _CharT* __s, size_type __pos, size_type __n) const
+    _GLIBCXX_NOEXCEPT
     {
       __glibcxx_requires_string_len(__s, __n);
       const size_type __size = this->size();
@@ -1265,6 +1267,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
     basic_string<_CharT, _Traits, _Alloc>::
     find_first_of(const _CharT* __s, size_type __pos, size_type __n) const
+    _GLIBCXX_NOEXCEPT
     {
       __glibcxx_requires_string_len(__s, __n);
       for (; __n && __pos < this->size(); ++__pos)
@@ -1280,6 +1283,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
     basic_string<_CharT, _Traits, _Alloc>::
     find_last_of(const _CharT* __s, size_type __pos, size_type __n) const
+    _GLIBCXX_NOEXCEPT
     {
       __glibcxx_requires_string_len(__s, __n);
       size_type __size = this->size();
@@ -1301,6 +1305,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
     basic_string<_CharT, _Traits, _Alloc>::
     find_first_not_of(const _CharT* __s, size_type __pos, size_type __n) const
+    _GLIBCXX_NOEXCEPT
     {
       __glibcxx_requires_string_len(__s, __n);
       for (; __pos < this->size(); ++__pos)
@@ -1324,6 +1329,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     typename basic_string<_CharT, _Traits, _Alloc>::size_type
     basic_string<_CharT, _Traits, _Alloc>::
     find_last_not_of(const _CharT* __s, size_type __pos, size_type __n) const
+    _GLIBCXX_NOEXCEPT
     {
       __glibcxx_requires_string_len(__s, __n);
       size_type __size = this->size();
@@ -1397,7 +1403,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _CharT, typename _Traits, typename _Alloc>
     int
     basic_string<_CharT, _Traits, _Alloc>::
-    compare(const _CharT* __s) const
+    compare(const _CharT* __s) const _GLIBCXX_NOEXCEPT
     {
       __glibcxx_requires_string(__s);
       const size_type __size = this->size();
-- 
cgit v1.1


From b6f684ae25e66e3f9ad40c29af8b42f67421652e Mon Sep 17 00:00:00 2001
From: Aditya Kumar <hiraditya@msn.com>
Date: Tue, 6 Dec 2016 10:43:49 +0000
Subject: Add missing noexcept on std::_Sp_locker constructors

2016-12-06  Aditya Kumar  <hiraditya@msn.com>

	* src/c++11/shared_ptr.cc (_Sp_locker::_Sp_locker(const void* p)): Add
	noexcept on constructor.
	(_Sp_locker::_Sp_locker(const void* p1, const void* p2)): Same.

From-SVN: r243291
---
 libstdc++-v3/ChangeLog               | 6 ++++++
 libstdc++-v3/src/c++11/shared_ptr.cc | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 1b52efe..5dd7504 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-06  Aditya Kumar  <hiraditya@msn.com>
+
+	* src/c++11/shared_ptr.cc (_Sp_locker::_Sp_locker(const void* p)): Add
+	noexcept on constructor.
+	(_Sp_locker::_Sp_locker(const void* p1, const void* p2)): Same.
+
 2016-12-06  Jonathan Wakely  <jwakely@redhat.com>
 
 	* include/bits/basic_string.h (basic_string::find, basic_string::rfind)
diff --git a/libstdc++-v3/src/c++11/shared_ptr.cc b/libstdc++-v3/src/c++11/shared_ptr.cc
index 9028040..b4addd0 100644
--- a/libstdc++-v3/src/c++11/shared_ptr.cc
+++ b/libstdc++-v3/src/c++11/shared_ptr.cc
@@ -56,7 +56,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { return _Hash_impl::hash(addr) & __gnu_internal::mask; }
   }
 
-  _Sp_locker::_Sp_locker(const void* p)
+  _Sp_locker::_Sp_locker(const void* p) noexcept
   {
     if (__gthread_active_p())
       {
@@ -67,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       _M_key1 = _M_key2 = __gnu_internal::invalid;
   }
 
-  _Sp_locker::_Sp_locker(const void* p1, const void* p2)
+  _Sp_locker::_Sp_locker(const void* p1, const void* p2) noexcept
   {
     if (__gthread_active_p())
       {
-- 
cgit v1.1


From 44f46885959d501929eeca3d3618db6db6d50c77 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 6 Dec 2016 11:17:56 +0000
Subject: variant (__erased_use_alloc_ctor, [...]): Remove uses-allocator
 related functions.

	* include/std/variant (__erased_use_alloc_ctor,
	_Variant_base::_Variant_base, variant::variant): Remove uses-allocator
	related functions.
	* testsuite/20_util/variant/compile.cc: Remove related tests.
	* testsuite/20_util/variant/run.cc: Remove related tests.

From-SVN: r243292
---
 libstdc++-v3/ChangeLog                            |   8 ++
 libstdc++-v3/include/std/variant                  | 132 ----------------------
 libstdc++-v3/testsuite/20_util/variant/compile.cc |  32 ------
 libstdc++-v3/testsuite/20_util/variant/run.cc     |  43 -------
 4 files changed, 8 insertions(+), 207 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 5dd7504..ecda026 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-07  Tim Shen  <timshen@google.com>
+
+	* include/std/variant (__erased_use_alloc_ctor,
+	_Variant_base::_Variant_base, variant::variant): Remove uses-allocator
+	related functions.
+	* testsuite/20_util/variant/compile.cc: Remove related tests.
+	* testsuite/20_util/variant/run.cc: Remove related tests.
+
 2016-12-06  Aditya Kumar  <hiraditya@msn.com>
 
 	* src/c++11/shared_ptr.cc (_Sp_locker::_Sp_locker(const void* p)): Add
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 89ca979..32c0dc3 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -40,7 +40,6 @@
 #include <bits/enable_special_members.h>
 #include <bits/functexcept.h>
 #include <bits/move.h>
-#include <bits/uses_allocator.h>
 #include <bits/functional_hash.h>
 
 namespace std _GLIBCXX_VISIBILITY(default)
@@ -202,14 +201,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     __erased_ctor(void* __lhs, void* __rhs)
     { ::new (__lhs) decay_t<_Lhs>(__get_alternative<_Rhs>(__rhs)); }
 
-  template<typename _Alloc, typename _Lhs, typename _Rhs>
-    constexpr void
-    __erased_use_alloc_ctor(const _Alloc& __a, void* __lhs, void* __rhs)
-    {
-      __uses_allocator_construct(__a, static_cast<decay_t<_Lhs>*>(__lhs),
-				 __get_alternative<_Rhs>(__rhs));
-    }
-
   // TODO: Find a potential chance to reuse this accross the project.
   template<typename _Tp>
     constexpr void
@@ -353,47 +344,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	: _Storage(__i, std::forward<_Args>(__args)...), _M_index(_Np)
 	{ }
 
-      template<typename _Alloc>
-	_Variant_base(const _Alloc& __a, const _Variant_base& __rhs)
-	: _Storage(), _M_index(__rhs._M_index)
-	{
-	  if (__rhs._M_valid())
-	    {
-	      static constexpr void
-	      (*_S_vtable[])(const _Alloc&, void*, void*) =
-		{ &__erased_use_alloc_ctor<_Alloc, __storage<_Types>&,
-					   const __storage<_Types>&>... };
-	      _S_vtable[__rhs._M_index](__a, _M_storage(), __rhs._M_storage());
-	    }
-	}
-
-      template<typename _Alloc>
-	_Variant_base(const _Alloc& __a, _Variant_base&& __rhs)
-	: _Storage(), _M_index(__rhs._M_index)
-	{
-	  if (__rhs._M_valid())
-	    {
-	      static constexpr void
-	      (*_S_vtable[])(const _Alloc&, void*, void*) =
-		{ &__erased_use_alloc_ctor<_Alloc, __storage<_Types>&,
-					   __storage<_Types>&&>... };
-	      _S_vtable[__rhs._M_index](__a, _M_storage(), __rhs._M_storage());
-	    }
-	}
-
-      template<typename _Alloc, size_t _Np, typename... _Args>
-	constexpr explicit
-	_Variant_base(const _Alloc& __a, in_place_index_t<_Np>,
-		      _Args&&... __args)
-	: _Storage(), _M_index(_Np)
-	{
-	  using _Storage =
-	    __storage<variant_alternative_t<_Np, variant<_Types...>>>;
-	  __uses_allocator_construct(__a, static_cast<_Storage*>(_M_storage()),
-				     std::forward<_Args>(__args)...);
-	  __glibcxx_assert(_M_index == _Np);
-	}
-
       _Variant_base&
       operator=(const _Variant_base& __rhs)
       {
@@ -1033,84 +983,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_Default_ctor_enabler(_Enable_default_constructor_tag{})
 	{ __glibcxx_assert(index() == _Np); }
 
-      template<typename _Alloc,
-	       typename = enable_if_t<
-		 __is_uses_allocator_constructible_v<__to_type<0>, _Alloc>>>
-	variant(allocator_arg_t, const _Alloc& __a)
-	: variant(allocator_arg, __a, in_place_index<0>)
-	{ }
-
-      template<typename _Alloc,
-	       typename = enable_if_t<__and_<__is_uses_allocator_constructible<
-		 _Types, _Alloc,
-		 add_lvalue_reference_t<add_const_t<_Types>>>...>::value>>
-	variant(allocator_arg_t, const _Alloc& __a, const variant& __rhs)
-	: _Base(__a, __rhs),
-	_Default_ctor_enabler(_Enable_default_constructor_tag{})
-	{ }
-
-      template<typename _Alloc,
-	       typename = enable_if_t<__and_<
-		 __is_uses_allocator_constructible<
-		   _Types, _Alloc, add_rvalue_reference_t<_Types>>...>::value>>
-	variant(allocator_arg_t, const _Alloc& __a, variant&& __rhs)
-	: _Base(__a, std::move(__rhs)),
-	_Default_ctor_enabler(_Enable_default_constructor_tag{})
-	{ }
-
-      template<typename _Alloc, typename _Tp,
-	       typename = enable_if_t<
-		 __exactly_once<__accepted_type<_Tp&&>>
-		 && __is_uses_allocator_constructible_v<
-		   __accepted_type<_Tp&&>, _Alloc, _Tp&&>
-		 && !is_same_v<decay_t<_Tp>, variant>, variant&>>
-	variant(allocator_arg_t, const _Alloc& __a, _Tp&& __t)
-	: variant(allocator_arg, __a, in_place_index<__accepted_index<_Tp&&>>,
-		  std::forward<_Tp>(__t))
-	{ __glibcxx_assert(holds_alternative<__accepted_type<_Tp&&>>(*this)); }
-
-      template<typename _Alloc, typename _Tp, typename... _Args,
-	       typename = enable_if_t<
-		 __exactly_once<_Tp>
-		 && __is_uses_allocator_constructible_v<
-		   _Tp, _Alloc, _Args&&...>>>
-	variant(allocator_arg_t, const _Alloc& __a, in_place_type_t<_Tp>,
-		_Args&&... __args)
-	: variant(allocator_arg, __a, in_place_index<__index_of<_Tp>>,
-		  std::forward<_Args>(__args)...)
-	{ __glibcxx_assert(holds_alternative<_Tp>(*this)); }
-
-      template<typename _Alloc, typename _Tp, typename _Up, typename... _Args,
-	       typename = enable_if_t<
-		 __exactly_once<_Tp>
-		 && __is_uses_allocator_constructible_v<
-		   _Tp, _Alloc, initializer_list<_Up>&, _Args&&...>>>
-	variant(allocator_arg_t, const _Alloc& __a, in_place_type_t<_Tp>,
-		initializer_list<_Up> __il, _Args&&... __args)
-	: variant(allocator_arg, __a, in_place_index<__index_of<_Tp>>, __il,
-		  std::forward<_Args>(__args)...)
-	{ __glibcxx_assert(holds_alternative<_Tp>(*this)); }
-
-      template<typename _Alloc, size_t _Np, typename... _Args,
-	       typename = enable_if_t<
-		 __is_uses_allocator_constructible_v<
-		   __to_type<_Np>, _Alloc, _Args&&...>>>
-	variant(allocator_arg_t, const _Alloc& __a, in_place_index_t<_Np>,
-		_Args&&... __args)
-	: _Base(__a, in_place_index<_Np>, std::forward<_Args>(__args)...),
-	_Default_ctor_enabler(_Enable_default_constructor_tag{})
-	{ __glibcxx_assert(index() == _Np); }
-
-      template<typename _Alloc, size_t _Np, typename _Up, typename... _Args,
-	       typename = enable_if_t<
-		 __is_uses_allocator_constructible_v<
-		   __to_type<_Np>, _Alloc, initializer_list<_Up>&, _Args&&...>>>
-	variant(allocator_arg_t, const _Alloc& __a, in_place_index_t<_Np>,
-		initializer_list<_Up> __il, _Args&&... __args)
-	: _Base(__a, in_place_index<_Np>, __il, std::forward<_Args>(__args)...),
-	_Default_ctor_enabler(_Enable_default_constructor_tag{})
-	{ __glibcxx_assert(index() == _Np); }
-
       ~variant() = default;
 
       variant& operator=(const variant&) = default;
@@ -1300,10 +1172,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 			   __detail::__variant::__get_storage(__variants)...);
     }
 
-  template<typename... _Types, typename _Alloc>
-    struct uses_allocator<variant<_Types...>, _Alloc>
-    : true_type { };
-
   template<typename... _Types>
     struct hash<variant<_Types...>>
     : private __poison_hash<remove_const_t<_Types>>...
diff --git a/libstdc++-v3/testsuite/20_util/variant/compile.cc b/libstdc++-v3/testsuite/20_util/variant/compile.cc
index 8250a95..a67b651 100644
--- a/libstdc++-v3/testsuite/20_util/variant/compile.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/compile.cc
@@ -117,31 +117,6 @@ void in_place_type_ctor()
   static_assert(!is_constructible_v<variant<string, string>, in_place_type_t<string>, const char*>, "");
 }
 
-void uses_alloc_ctors()
-{
-  std::allocator<char> alloc;
-  variant<int> a(allocator_arg, alloc);
-  static_assert(!is_constructible_v<variant<AllDeleted>, allocator_arg_t, std::allocator<char>>, "");
-  {
-    variant<string, int> b(allocator_arg, alloc, "a");
-    static_assert(!is_constructible_v<variant<string, string>, allocator_arg_t, std::allocator<char>, const char*>, "");
-  }
-  {
-    variant<string, int> b(allocator_arg, alloc, in_place_index<0>, "a");
-    variant<string, string> c(allocator_arg, alloc, in_place_index<1>, "a");
-  }
-  {
-    variant<string, int> b(allocator_arg, alloc, in_place_index<0>, {'a'});
-    variant<string, string> c(allocator_arg, alloc, in_place_index<1>, {'a'});
-  }
-  {
-    variant<int, string, int> b(allocator_arg, alloc, in_place_type<string>, "a");
-  }
-  {
-    variant<int, string, int> b(allocator_arg, alloc, in_place_type<string>, {'a'});
-  }
-}
-
 void dtor()
 {
   static_assert(is_destructible_v<variant<int, string>>, "");
@@ -324,9 +299,7 @@ namespace adl_trap
 void test_adl()
 {
    using adl_trap::X;
-   using std::allocator_arg;
    X x;
-   std::allocator<int> a;
    std::initializer_list<int> il;
    adl_trap::Visitor vis;
 
@@ -339,11 +312,6 @@ void test_adl()
    variant<X> v2{in_place_type<X>, x};
    variant<X> v3{in_place_index<0>, il, x};
    variant<X> v4{in_place_type<X>, il, x};
-   variant<X> v5{allocator_arg, a, in_place_index<0>, x};
-   variant<X> v6{allocator_arg, a, in_place_type<X>, x};
-   variant<X> v7{allocator_arg, a, in_place_index<0>, il, x};
-   variant<X> v8{allocator_arg, a, in_place_type<X>, il, x};
-   variant<X> v9{allocator_arg, a, in_place_type<X>, 1};
 }
 
 void test_variant_alternative() {
diff --git a/libstdc++-v3/testsuite/20_util/variant/run.cc b/libstdc++-v3/testsuite/20_util/variant/run.cc
index 71e0176..fb5d7c4 100644
--- a/libstdc++-v3/testsuite/20_util/variant/run.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/run.cc
@@ -160,48 +160,6 @@ void in_place_type_ctor()
   }
 }
 
-struct UsesAllocatable
-{
-  template<typename Alloc>
-    UsesAllocatable(std::allocator_arg_t, const Alloc& a)
-    : d(0), a(static_cast<const void*>(&a)) { }
-
-  template<typename Alloc>
-    UsesAllocatable(std::allocator_arg_t, const Alloc& a, const UsesAllocatable&)
-    : d(1), a(static_cast<const void*>(&a)) { }
-
-  template<typename Alloc>
-    UsesAllocatable(std::allocator_arg_t, const Alloc& a, UsesAllocatable&&)
-    : d(2), a(static_cast<const void*>(&a)) { }
-
-  int d;
-  const void* a;
-};
-
-namespace std
-{
-  template<>
-    struct uses_allocator<UsesAllocatable, std::allocator<char>> : true_type { };
-}
-
-void uses_allocator_ctor()
-{
-  std::allocator<char> a;
-  variant<UsesAllocatable> v(std::allocator_arg, a);
-  VERIFY(get<0>(v).d == 0);
-  VERIFY(get<0>(v).a == &a);
-  {
-    variant<UsesAllocatable> u(std::allocator_arg, a, v);
-    VERIFY(get<0>(u).d == 1);
-    VERIFY(get<0>(u).a == &a);
-  }
-  {
-    variant<UsesAllocatable> u(std::allocator_arg, a, std::move(v));
-    VERIFY(get<0>(u).d == 2);
-    VERIFY(get<0>(u).a == &a);
-  }
-}
-
 void emplace()
 {
   variant<int, string> v;
@@ -450,7 +408,6 @@ int main()
   arbitrary_ctor();
   in_place_index_ctor();
   in_place_type_ctor();
-  uses_allocator_ctor();
   copy_assign();
   move_assign();
   arbitrary_assign();
-- 
cgit v1.1


From 9189f55908d6655e63fff8d9b9f87ec83d4891e1 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 6 Dec 2016 11:20:13 +0000
Subject: variant (std::get, operator==): Implement constexpr comparison and
 get<>.

	* include/std/variant (std::get, operator==): Implement constexpr
	comparison and get<>.
	* testsuite/20_util/variant/compile.cc: Tests.

From-SVN: r243293
---
 libstdc++-v3/ChangeLog                            |   6 +
 libstdc++-v3/include/std/variant                  | 401 +++++++++++++---------
 libstdc++-v3/testsuite/20_util/variant/compile.cc | 107 +++++-
 3 files changed, 335 insertions(+), 179 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index ecda026..8a3ab43 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,11 @@
 2016-12-07  Tim Shen  <timshen@google.com>
 
+	* include/std/variant (std::get, operator==): Implement constexpr
+	comparison and get<>.
+	* testsuite/20_util/variant/compile.cc: Tests.
+
+2016-12-07  Tim Shen  <timshen@google.com>
+
 	* include/std/variant (__erased_use_alloc_ctor,
 	_Variant_base::_Variant_base, variant::variant): Remove uses-allocator
 	related functions.
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 32c0dc3..a961a05 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -41,6 +41,7 @@
 #include <bits/functexcept.h>
 #include <bits/move.h>
 #include <bits/functional_hash.h>
+#include <ext/aligned_buffer.h>
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
@@ -153,33 +154,60 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Alternative>
     using __storage = _Alternative;
 
-  template<typename _Type, bool __is_literal = std::is_literal_type_v<_Type>>
+  // _Uninitialized<T> is guaranteed to be a literal type, even if T is not.
+  // We have to do this, because [basic.types]p10.5.3 (n4606) is not implemented
+  // yet. When it's implemented, _Uninitialized<T> can be changed to the alias
+  // to T, therefore equivalent to being removed entirely.
+  //
+  // Another reason we may not want to remove _Uninitialzied<T> may be that, we
+  // want _Uninitialized<T> to be trivially destructible, no matter whether T
+  // is; but we will see.
+  template<typename _Type, bool = std::is_literal_type_v<_Type>>
     struct _Uninitialized;
 
   template<typename _Type>
     struct _Uninitialized<_Type, true>
     {
-      constexpr _Uninitialized() = default;
-
       template<typename... _Args>
       constexpr _Uninitialized(in_place_index_t<0>, _Args&&... __args)
       : _M_storage(std::forward<_Args>(__args)...)
       { }
 
+      constexpr const _Type& _M_get() const &
+      { return _M_storage; }
+
+      constexpr _Type& _M_get() &
+      { return _M_storage; }
+
+      constexpr const _Type&& _M_get() const &&
+      { return std::move(_M_storage); }
+
+      constexpr _Type&& _M_get() &&
+      { return std::move(_M_storage); }
+
       _Type _M_storage;
     };
 
   template<typename _Type>
     struct _Uninitialized<_Type, false>
     {
-      constexpr _Uninitialized() = default;
-
       template<typename... _Args>
       constexpr _Uninitialized(in_place_index_t<0>, _Args&&... __args)
       { ::new (&_M_storage) _Type(std::forward<_Args>(__args)...); }
 
-      typename std::aligned_storage<sizeof(_Type), alignof(_Type)>::type
-	  _M_storage;
+      const _Type& _M_get() const &
+      { return *_M_storage._M_ptr(); }
+
+      _Type& _M_get() &
+      { return *_M_storage._M_ptr(); }
+
+      const _Type&& _M_get() const &&
+      { return std::move(*_M_storage._M_ptr()); }
+
+      _Type&& _M_get() &&
+      { return std::move(*_M_storage._M_ptr()); }
+
+      __gnu_cxx::__aligned_membuf<_Type> _M_storage;
     };
 
   // Given a qualified storage type, return the desired reference.
@@ -194,6 +222,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	*static_cast<_Storage*>(__ptr));
     }
 
+  template<typename _Union>
+    constexpr decltype(auto) __get(in_place_index_t<0>, _Union&& __u)
+    { return std::forward<_Union>(__u)._M_first._M_get(); }
+
+  template<size_t _Np, typename _Union>
+    constexpr decltype(auto) __get(in_place_index_t<_Np>, _Union&& __u)
+    { return __get(in_place_index<_Np-1>, std::forward<_Union>(__u)._M_rest); }
+
+  // Returns the typed storage for __v.
+  template<size_t _Np, typename _Variant>
+    constexpr decltype(auto) __get(_Variant&& __v)
+    {
+      return __get(std::in_place_index<_Np>, std::forward<_Variant>(__v)._M_u);
+    }
+
   // Various functions as "vtable" entries, where those vtables are used by
   // polymorphic operations.
   template<typename _Lhs, typename _Rhs>
@@ -201,13 +244,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     __erased_ctor(void* __lhs, void* __rhs)
     { ::new (__lhs) decay_t<_Lhs>(__get_alternative<_Rhs>(__rhs)); }
 
-  // TODO: Find a potential chance to reuse this accross the project.
-  template<typename _Tp>
+  template<typename _Variant, size_t _Np>
     constexpr void
-    __erased_dtor(void* __ptr)
+    __erased_dtor(_Variant&& __v)
     {
-      using _Storage = decay_t<_Tp>;
-      static_cast<_Storage*>(__ptr)->~_Storage();
+      auto&& __element = __get<_Np>(std::forward<_Variant>(__v));
+      using _Type = std::remove_reference_t<decltype(__element)>;
+      __element.~_Type();
     }
 
   template<typename _Lhs, typename _Rhs>
@@ -223,90 +266,108 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       swap(__get_alternative<_Lhs>(__lhs), __get_alternative<_Rhs>(__rhs));
     }
 
-  template<typename _Lhs, typename _Rhs>
+  template<typename _Variant, size_t _Np>
     constexpr bool
-    __erased_equal_to(void* __lhs, void* __rhs)
-    { return __get_alternative<_Lhs>(__lhs) == __get_alternative<_Rhs>(__rhs); }
+    __erased_equal_to(_Variant&& __lhs, _Variant&& __rhs)
+    {
+      return __get<_Np>(std::forward<_Variant>(__lhs))
+	  == __get<_Np>(std::forward<_Variant>(__rhs));
+    }
 
-  template<typename _Lhs, typename _Rhs>
+  template<typename _Variant, size_t _Np>
     constexpr bool
-    __erased_less_than(void* __lhs, void* __rhs)
-    { return __get_alternative<_Lhs>(__lhs) < __get_alternative<_Rhs>(__rhs); }
+    __erased_less_than(const _Variant& __lhs, const _Variant& __rhs)
+    {
+      return __get<_Np>(std::forward<_Variant>(__lhs))
+	  < __get<_Np>(std::forward<_Variant>(__rhs));
+    }
 
   template<typename _Tp>
     constexpr size_t
     __erased_hash(void* __t)
     { return std::hash<decay_t<_Tp>>{}(__get_alternative<_Tp>(__t)); }
 
+  // Defines members and ctors.
   template<typename... _Types>
-    struct _Variant_base;
+    union _Variadic_union { };
 
-  template<typename... _Types>
-    struct _Variant_storage
-    { constexpr _Variant_storage() = default; };
-
-  // Use recursive unions to implement a trivially destructible variant.
   template<typename _First, typename... _Rest>
-    struct _Variant_storage<_First, _Rest...>
+    union _Variadic_union<_First, _Rest...>
     {
-      constexpr _Variant_storage() = default;
+      constexpr _Variadic_union() : _M_rest() { }
+
+      template<typename... _Args>
+	constexpr _Variadic_union(in_place_index_t<0>, _Args&&... __args)
+	: _M_first(in_place_index<0>, std::forward<_Args>(__args)...)
+	{ }
+
+      template<size_t _Np, typename... _Args>
+	constexpr _Variadic_union(in_place_index_t<_Np>, _Args&&... __args)
+	: _M_rest(in_place_index<_Np-1>, std::forward<_Args>(__args)...)
+	{ }
+
+      _Uninitialized<_First> _M_first;
+      _Variadic_union<_Rest...> _M_rest;
+    };
+
+  // Defines index and the dtor, possibly trivial.
+  template<bool __trivially_destructible, typename... _Types>
+    struct _Variant_storage;
+
+  template<typename... _Types>
+    struct _Variant_storage<false, _Types...>
+    {
+      template<size_t... __indices>
+	static constexpr void (*_S_vtable[])(const _Variant_storage&) =
+	    { &__erased_dtor<const _Variant_storage&, __indices>... };
+
+      constexpr _Variant_storage() : _M_index(variant_npos) { }
 
       template<size_t _Np, typename... _Args>
 	constexpr _Variant_storage(in_place_index_t<_Np>, _Args&&... __args)
-	: _M_union(in_place_index<_Np>, std::forward<_Args>(__args)...)
+	: _M_u(in_place_index<_Np>, std::forward<_Args>(__args)...),
+	_M_index(_Np)
 	{ }
 
-      ~_Variant_storage() = default;
+      template<size_t... __indices>
+	constexpr void _M_destroy_impl(std::index_sequence<__indices...>)
+	{
+	  if (_M_index != variant_npos)
+	    _S_vtable<__indices...>[_M_index](*this);
+	}
 
-      constexpr void*
-      _M_storage() const
-      {
-	return const_cast<void*>(
-	  static_cast<const void*>(std::addressof(_M_union._M_first._M_storage)));
-      }
+      ~_Variant_storage()
+      { _M_destroy_impl(std::index_sequence_for<_Types...>{}); }
 
-      union _Union
-      {
-	constexpr _Union() {};
-
-	template<typename... _Args>
-	  constexpr _Union(in_place_index_t<0>, _Args&&... __args)
-	  : _M_first(in_place_index<0>, std::forward<_Args>(__args)...)
-	  { }
-
-	template<size_t _Np, typename... _Args,
-		 typename = enable_if_t<0 < _Np && _Np < sizeof...(_Rest) + 1>>
-	  constexpr _Union(in_place_index_t<_Np>, _Args&&... __args)
-	  : _M_rest(in_place_index<_Np - 1>, std::forward<_Args>(__args)...)
-	  { }
-
-	_Uninitialized<__storage<_First>> _M_first;
-	_Variant_storage<_Rest...> _M_rest;
-      } _M_union;
+      _Variadic_union<_Types...> _M_u;
+      size_t _M_index;
     };
 
-  template<typename _Derived, bool __is_trivially_destructible>
-    struct _Dtor_mixin
+  template<typename... _Types>
+    struct _Variant_storage<true, _Types...>
     {
-      ~_Dtor_mixin()
-      { static_cast<_Derived*>(this)->_M_destroy(); }
-    };
+      constexpr _Variant_storage() : _M_index(variant_npos) { }
 
-  template<typename _Derived>
-    struct _Dtor_mixin<_Derived, true>
-    {
-      ~_Dtor_mixin() = default;
+      template<size_t _Np, typename... _Args>
+	constexpr _Variant_storage(in_place_index_t<_Np>, _Args&&... __args)
+	: _M_u(in_place_index<_Np>, std::forward<_Args>(__args)...),
+	_M_index(_Np)
+	{ }
+
+      _Variadic_union<_Types...> _M_u;
+      size_t _M_index;
     };
 
   // Helps SFINAE on special member functions. Otherwise it can live in variant
   // class.
   template<typename... _Types>
     struct _Variant_base :
-      _Variant_storage<_Types...>,
-      _Dtor_mixin<_Variant_base<_Types...>,
-		  __and_<std::is_trivially_destructible<_Types>...>::value>
+      _Variant_storage<(std::is_trivially_destructible_v<_Types> && ...),
+			_Types...>
     {
-      using _Storage = _Variant_storage<_Types...>;
+      using _Storage =
+	  _Variant_storage<(std::is_trivially_destructible_v<_Types> && ...),
+			    _Types...>;
 
       constexpr
       _Variant_base()
@@ -315,7 +376,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       : _Variant_base(in_place_index<0>) { }
 
       _Variant_base(const _Variant_base& __rhs)
-      : _Storage(), _M_index(__rhs._M_index)
       {
 	if (__rhs._M_valid())
 	  {
@@ -323,31 +383,32 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      { &__erased_ctor<__storage<_Types>&,
 			       const __storage<_Types>&>... };
 	    _S_vtable[__rhs._M_index](_M_storage(), __rhs._M_storage());
+	    this->_M_index = __rhs._M_index;
 	  }
       }
 
       _Variant_base(_Variant_base&& __rhs)
       noexcept(__and_<is_nothrow_move_constructible<_Types>...>::value)
-      : _Storage(), _M_index(__rhs._M_index)
       {
 	if (__rhs._M_valid())
 	  {
 	    static constexpr void (*_S_vtable[])(void*, void*) =
 	      { &__erased_ctor<__storage<_Types>&, __storage<_Types>&&>... };
 	    _S_vtable[__rhs._M_index](_M_storage(), __rhs._M_storage());
+	    this->_M_index = __rhs._M_index;
 	  }
       }
 
       template<size_t _Np, typename... _Args>
 	constexpr explicit
 	_Variant_base(in_place_index_t<_Np> __i, _Args&&... __args)
-	: _Storage(__i, std::forward<_Args>(__args)...), _M_index(_Np)
+	: _Storage(__i, std::forward<_Args>(__args)...)
 	{ }
 
       _Variant_base&
       operator=(const _Variant_base& __rhs)
       {
-	if (_M_index == __rhs._M_index)
+	if (this->_M_index == __rhs._M_index)
 	  {
 	    if (__rhs._M_valid())
 	      {
@@ -367,11 +428,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      }
 	    __catch (...)
 	      {
-		_M_index = variant_npos;
+		this->_M_index = variant_npos;
 		__throw_exception_again;
 	      }
 	  }
-	__glibcxx_assert(_M_index == __rhs._M_index);
+	__glibcxx_assert(this->_M_index == __rhs._M_index);
 	return *this;
       }
 
@@ -380,7 +441,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       noexcept(__and_<is_nothrow_move_constructible<_Types>...,
 		      is_nothrow_move_assignable<_Types>...>::value)
       {
-	if (_M_index == __rhs._M_index)
+	if (this->_M_index == __rhs._M_index)
 	  {
 	    if (__rhs._M_valid())
 	      {
@@ -399,32 +460,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      }
 	    __catch (...)
 	      {
-		_M_index = variant_npos;
+		this->_M_index = variant_npos;
 		__throw_exception_again;
 	      }
 	  }
 	return *this;
       }
 
-      void _M_destroy()
+      void*
+      _M_storage() const
       {
-	if (_M_valid())
-	  {
-	    static constexpr void (*_S_vtable[])(void*) =
-	      { &__erased_dtor<__storage<_Types>&>... };
-	    _S_vtable[this->_M_index](_M_storage());
-	  }
+	return const_cast<void*>(static_cast<const void*>(
+	    std::addressof(_Storage::_M_u)));
       }
 
-      constexpr void*
-      _M_storage() const
-      { return _Storage::_M_storage(); }
-
       constexpr bool
       _M_valid() const noexcept
-      { return _M_index != variant_npos; }
-
-      size_t _M_index;
+      { return this->_M_index != variant_npos; }
     };
 
   // For how many times does _Tp appear in _Tuple?
@@ -489,15 +541,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     void* __get_storage(_Variant&& __v)
     { return __v._M_storage(); }
 
-  // Returns the reference to the desired alternative.
-  // It is as unsafe as a reinterpret_cast.
-  template<typename _Tp, typename _Variant>
-    decltype(auto) __access(_Variant&& __v)
-    {
-      return __get_alternative<__reserved_type_map<_Variant&&, __storage<_Tp>>>(
-	__get_storage(std::forward<_Variant>(__v)));
-    }
-
   // A helper used to create variadic number of _To types.
   template<typename _From, typename _To>
     using _To_type = _To;
@@ -597,9 +640,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_S_apply_all_alts(_Array_type& __vtable, index_sequence<__indices...>)
 	{ (_S_apply_single_alt<__indices>(__vtable._M_arr[__indices]), ...); }
 
-      template<size_t __index>
+      template<size_t __index, typename _Tp>
 	static constexpr void
-	_S_apply_single_alt(auto& __element)
+	_S_apply_single_alt(_Tp& __element)
 	{
 	  using _Alternative = variant_alternative_t<__index, decay_t<_First>>;
 	  using _Qualified_storage = __reserved_type_map<
@@ -655,23 +698,23 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<size_t _Np, typename... _Types>
-    variant_alternative_t<_Np, variant<_Types...>>&
+    constexpr variant_alternative_t<_Np, variant<_Types...>>&
     get(variant<_Types...>&);
 
   template<size_t _Np, typename... _Types>
-    variant_alternative_t<_Np, variant<_Types...>>&&
+    constexpr variant_alternative_t<_Np, variant<_Types...>>&&
     get(variant<_Types...>&&);
 
   template<size_t _Np, typename... _Types>
-    variant_alternative_t<_Np, variant<_Types...>> const&
+    constexpr variant_alternative_t<_Np, variant<_Types...>> const&
     get(const variant<_Types...>&);
 
   template<size_t _Np, typename... _Types>
-    variant_alternative_t<_Np, variant<_Types...>> const&&
+    constexpr variant_alternative_t<_Np, variant<_Types...>> const&&
     get(const variant<_Types...>&&);
 
   template<typename _Tp, typename... _Types>
-    inline _Tp& get(variant<_Types...>& __v)
+    constexpr inline _Tp& get(variant<_Types...>& __v)
     {
       static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
 		    "T should occur for exactly once in alternatives");
@@ -680,7 +723,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _Tp, typename... _Types>
-    inline _Tp&& get(variant<_Types...>&& __v)
+    constexpr inline _Tp&& get(variant<_Types...>&& __v)
     {
       static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
 		    "T should occur for exactly once in alternatives");
@@ -690,7 +733,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _Tp, typename... _Types>
-    inline const _Tp& get(const variant<_Types...>& __v)
+    constexpr inline const _Tp& get(const variant<_Types...>& __v)
     {
       static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
 		    "T should occur for exactly once in alternatives");
@@ -699,7 +742,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _Tp, typename... _Types>
-    inline const _Tp&& get(const variant<_Types...>&& __v)
+    constexpr inline const _Tp&& get(const variant<_Types...>&& __v)
     {
       static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
 		    "T should occur for exactly once in alternatives");
@@ -709,7 +752,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<size_t _Np, typename... _Types>
-    inline add_pointer_t<variant_alternative_t<_Np, variant<_Types...>>>
+    constexpr inline
+    add_pointer_t<variant_alternative_t<_Np, variant<_Types...>>>
     get_if(variant<_Types...>* __ptr) noexcept
     {
       using _Alternative_type = variant_alternative_t<_Np, variant<_Types...>>;
@@ -717,12 +761,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		    "The index should be in [0, number of alternatives)");
       static_assert(!is_void_v<_Alternative_type>, "_Tp should not be void");
       if (__ptr && __ptr->index() == _Np)
-	return &__detail::__variant::__access<_Alternative_type>(*__ptr);
+	return &__detail::__variant::__get<_Np>(*__ptr);
       return nullptr;
     }
 
   template<size_t _Np, typename... _Types>
-    inline add_pointer_t<const variant_alternative_t<_Np, variant<_Types...>>>
+    constexpr inline
+    add_pointer_t<const variant_alternative_t<_Np, variant<_Types...>>>
     get_if(const variant<_Types...>* __ptr) noexcept
     {
       using _Alternative_type = variant_alternative_t<_Np, variant<_Types...>>;
@@ -730,12 +775,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 		    "The index should be in [0, number of alternatives)");
       static_assert(!is_void_v<_Alternative_type>, "_Tp should not be void");
       if (__ptr && __ptr->index() == _Np)
-	return &__detail::__variant::__access<_Alternative_type>(*__ptr);
+	return &__detail::__variant::__get<_Np>(*__ptr);
       return nullptr;
     }
 
   template<typename _Tp, typename... _Types>
-    inline add_pointer_t<_Tp> get_if(variant<_Types...>* __ptr) noexcept
+    constexpr inline add_pointer_t<_Tp>
+    get_if(variant<_Types...>* __ptr) noexcept
     {
       static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
 		    "T should occur for exactly once in alternatives");
@@ -744,7 +790,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _Tp, typename... _Types>
-    inline add_pointer_t<const _Tp> get_if(const variant<_Types...>* __ptr)
+    constexpr inline add_pointer_t<const _Tp>
+    get_if(const variant<_Types...>* __ptr)
     noexcept
     {
       static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
@@ -754,64 +801,36 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename... _Types>
-    bool operator==(const variant<_Types...>& __lhs,
-		    const variant<_Types...>& __rhs)
+    constexpr bool operator==(const variant<_Types...>& __lhs,
+			      const variant<_Types...>& __rhs)
     {
-      if (__lhs.index() != __rhs.index())
-	return false;
-
-      if (__lhs.valueless_by_exception())
-	return true;
-
-      using __detail::__variant::__storage;
-      static constexpr bool (*_S_vtable[])(void*, void*) =
-	{ &__detail::__variant::__erased_equal_to<
-	  const __storage<_Types>&, const __storage<_Types>&>... };
-      return _S_vtable[__lhs.index()](
-	  __detail::__variant::__get_storage(__lhs),
-	  __detail::__variant::__get_storage(__rhs));
+      return __lhs._M_equal_to(__rhs, std::index_sequence_for<_Types...>{});
     }
 
   template<typename... _Types>
-    inline bool
+    constexpr inline bool
     operator!=(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs)
     { return !(__lhs == __rhs); }
 
   template<typename... _Types>
-    inline bool
+    constexpr inline bool
     operator<(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs)
     {
-      if (__lhs.index() < __rhs.index())
-	return true;
-
-      if (__lhs.index() > __rhs.index())
-	return false;
-
-      if (__lhs.valueless_by_exception())
-	return false;
-
-      using __detail::__variant::__storage;
-      static constexpr bool (*_S_vtable[])(void*, void*) =
-	{ &__detail::__variant::__erased_less_than<
-	    const __storage<_Types>&,
-	    const __storage<_Types>&>... };
-      return _S_vtable[__lhs.index()](
-	  __detail::__variant::__get_storage(__lhs),
-	  __detail::__variant::__get_storage(__rhs));
+      return __lhs._M_less_than(__rhs, std::index_sequence_for<_Types...>{});
     }
 
   template<typename... _Types>
-    inline bool
+    constexpr inline bool
     operator>(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs)
     { return __rhs < __lhs; }
 
   template<typename... _Types>
-    inline bool
+    constexpr inline bool
     operator<=(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs)
     { return !(__lhs > __rhs); }
 
   template<typename... _Types>
-    inline bool
+    constexpr inline bool
     operator>=(const variant<_Types...>& __lhs, const variant<_Types...>& __rhs)
     { return !(__lhs < __rhs); }
 
@@ -1102,60 +1121,120 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  }
       }
 
+    private:
+      template<size_t... __indices>
+	static constexpr bool
+	(*_S_equal_to_vtable[])(const variant&, const variant&) =
+	  { &__detail::__variant::__erased_equal_to<
+	    const variant&, __indices>... };
+
+      template<size_t... __indices>
+	static constexpr bool
+	(*_S_less_than_vtable[])(const variant&, const variant&) =
+	  { &__detail::__variant::__erased_less_than<
+	      const variant&, __indices>... };
+
+      template<size_t... __indices>
+	constexpr bool
+	_M_equal_to(const variant& __rhs,
+		    std::index_sequence<__indices...>) const
+	{
+	  if (this->index() != __rhs.index())
+	    return false;
+
+	  if (this->valueless_by_exception())
+	    return true;
+
+	  return _S_equal_to_vtable<__indices...>[this->index()](*this, __rhs);
+	}
+
+      template<size_t... __indices>
+	constexpr inline bool
+	_M_less_than(const variant& __rhs,
+		     std::index_sequence<__indices...>) const
+	{
+	  auto __lhs_index = this->index();
+	  auto __rhs_index = __rhs.index();
+
+	  if (__lhs_index < __rhs_index)
+	    return true;
+
+	  if (__lhs_index > __rhs_index)
+	    return false;
+
+	  if (this->valueless_by_exception())
+	    return false;
+
+	  return _S_less_than_vtable<__indices...>[__lhs_index](*this, __rhs);
+	}
+
+      template<size_t _Np, typename _Vp>
+	friend constexpr decltype(auto) __detail::__variant::
+#if _GLIBCXX_INLINE_VERSION
+        __7:: // Required due to PR c++/59256
+#endif
+	__get(_Vp&& __v);
+
       template<typename _Vp>
 	friend void* __detail::__variant::
 #if _GLIBCXX_INLINE_VERSION
         __7:: // Required due to PR c++/59256
 #endif
         __get_storage(_Vp&& __v);
+
+      template<typename... _Tp>
+	friend constexpr bool
+	operator==(const variant<_Tp...>& __lhs,
+		   const variant<_Tp...>& __rhs);
+
+      template<typename... _Tp>
+	friend constexpr bool
+	operator<(const variant<_Tp...>& __lhs,
+		  const variant<_Tp...>& __rhs);
     };
 
   template<size_t _Np, typename... _Types>
-    variant_alternative_t<_Np, variant<_Types...>>&
+    constexpr variant_alternative_t<_Np, variant<_Types...>>&
     get(variant<_Types...>& __v)
     {
       static_assert(_Np < sizeof...(_Types),
 		    "The index should be in [0, number of alternatives)");
       if (__v.index() != _Np)
 	__throw_bad_variant_access("Unexpected index");
-      return __detail::__variant::__access<
-	variant_alternative_t<_Np, variant<_Types...>>>(__v);
+      return __detail::__variant::__get<_Np>(__v);
     }
 
   template<size_t _Np, typename... _Types>
-    variant_alternative_t<_Np, variant<_Types...>>&&
+    constexpr variant_alternative_t<_Np, variant<_Types...>>&&
     get(variant<_Types...>&& __v)
     {
       static_assert(_Np < sizeof...(_Types),
 		    "The index should be in [0, number of alternatives)");
       if (__v.index() != _Np)
 	__throw_bad_variant_access("Unexpected index");
-      return __detail::__variant::__access<
-	variant_alternative_t<_Np, variant<_Types...>>>(std::move(__v));
+      return __detail::__variant::__get<_Np>(std::move(__v));
     }
 
   template<size_t _Np, typename... _Types>
-    const variant_alternative_t<_Np, variant<_Types...>>&
+    constexpr const variant_alternative_t<_Np, variant<_Types...>>&
     get(const variant<_Types...>& __v)
     {
       static_assert(_Np < sizeof...(_Types),
 		    "The index should be in [0, number of alternatives)");
       if (__v.index() != _Np)
 	__throw_bad_variant_access("Unexpected index");
-      return __detail::__variant::__access<
-	variant_alternative_t<_Np, variant<_Types...>>>(__v);
+      return __detail::__variant::__get<_Np>(__v);
     }
 
   template<size_t _Np, typename... _Types>
-    const variant_alternative_t<_Np, variant<_Types...>>&&
+    constexpr const variant_alternative_t<_Np, variant<_Types...>>&&
     get(const variant<_Types...>&& __v)
     {
       static_assert(_Np < sizeof...(_Types),
 		    "The index should be in [0, number of alternatives)");
       if (__v.index() != _Np)
 	__throw_bad_variant_access("Unexpected index");
-      return __detail::__variant::__access<
-	variant_alternative_t<_Np, variant<_Types...>>>(std::move(__v));
+      return __detail::__variant::__get<_Np>(std::move(__v));
     }
 
   template<typename _Visitor, typename... _Variants>
diff --git a/libstdc++-v3/testsuite/20_util/variant/compile.cc b/libstdc++-v3/testsuite/20_util/variant/compile.cc
index a67b651..ab8ada2 100644
--- a/libstdc++-v3/testsuite/20_util/variant/compile.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/compile.cc
@@ -51,6 +51,14 @@ struct DefaultNoexcept
   DefaultNoexcept& operator=(DefaultNoexcept&&) noexcept = default;
 };
 
+struct nonliteral
+{
+  nonliteral() { }
+
+  bool operator<(const nonliteral&) const;
+  bool operator==(const nonliteral&) const;
+};
+
 void default_ctor()
 {
   static_assert(is_default_constructible_v<variant<int, string>>, "");
@@ -175,22 +183,40 @@ void test_get()
 void test_relational()
 {
   {
-    const variant<int, string> a, b;
-    (void)(a < b);
-    (void)(a > b);
-    (void)(a <= b);
-    (void)(a == b);
-    (void)(a != b);
-    (void)(a >= b);
+    constexpr variant<int, nonliteral> a(42), b(43);
+    static_assert((a < b), "");
+    static_assert(!(a > b), "");
+    static_assert((a <= b), "");
+    static_assert(!(a == b), "");
+    static_assert((a != b), "");
+    static_assert(!(a >= b), "");
   }
   {
-    const monostate a, b;
-    (void)(a < b);
-    (void)(a > b);
-    (void)(a <= b);
-    (void)(a == b);
-    (void)(a != b);
-    (void)(a >= b);
+    constexpr variant<int, nonliteral> a(42), b(42);
+    static_assert(!(a < b), "");
+    static_assert(!(a > b), "");
+    static_assert((a <= b), "");
+    static_assert((a == b), "");
+    static_assert(!(a != b), "");
+    static_assert((a >= b), "");
+  }
+  {
+    constexpr variant<int, nonliteral> a(43), b(42);
+    static_assert(!(a < b), "");
+    static_assert((a > b), "");
+    static_assert(!(a <= b), "");
+    static_assert(!(a == b), "");
+    static_assert((a != b), "");
+    static_assert((a >= b), "");
+  }
+  {
+    constexpr monostate a, b;
+    static_assert(!(a < b), "");
+    static_assert(!(a > b), "");
+    static_assert((a <= b), "");
+    static_assert((a == b), "");
+    static_assert(!(a != b), "");
+    static_assert((a >= b), "");
   }
 }
 
@@ -262,14 +288,59 @@ void test_constexpr()
 	constexpr literal() = default;
     };
 
-    struct nonliteral {
-	nonliteral() { }
-    };
-
     constexpr variant<literal, nonliteral> v{};
     constexpr variant<literal, nonliteral> v1{in_place_type<literal>};
     constexpr variant<literal, nonliteral> v2{in_place_index<0>};
   }
+
+  {
+    constexpr variant<int> a(42);
+    static_assert(get<0>(a) == 42, "");
+  }
+  {
+    constexpr variant<int, nonliteral> a(42);
+    static_assert(get<0>(a) == 42, "");
+  }
+  {
+    constexpr variant<nonliteral, int> a(42);
+    static_assert(get<1>(a) == 42, "");
+  }
+  {
+    constexpr variant<int> a(42);
+    static_assert(get<int>(a) == 42, "");
+  }
+  {
+    constexpr variant<int, nonliteral> a(42);
+    static_assert(get<int>(a) == 42, "");
+  }
+  {
+    constexpr variant<nonliteral, int> a(42);
+    static_assert(get<int>(a) == 42, "");
+  }
+  {
+    constexpr variant<int> a(42);
+    static_assert(get<0>(std::move(a)) == 42, "");
+  }
+  {
+    constexpr variant<int, nonliteral> a(42);
+    static_assert(get<0>(std::move(a)) == 42, "");
+  }
+  {
+    constexpr variant<nonliteral, int> a(42);
+    static_assert(get<1>(std::move(a)) == 42, "");
+  }
+  {
+    constexpr variant<int> a(42);
+    static_assert(get<int>(std::move(a)) == 42, "");
+  }
+  {
+    constexpr variant<int, nonliteral> a(42);
+    static_assert(get<int>(std::move(a)) == 42, "");
+  }
+  {
+    constexpr variant<nonliteral, int> a(42);
+    static_assert(get<int>(std::move(a)) == 42, "");
+  }
 }
 
 void test_pr77641()
-- 
cgit v1.1


From 458ef69052224b5d3d2c78cfbe0a0e0ec85a4193 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 6 Dec 2016 11:26:48 +0000
Subject: enable_special_members.h: Make _Enable_default_constructor constexpr.

	* include/bits/enable_special_members.h: Make
	_Enable_default_constructor constexpr.
	* include/std/variant (variant::emplace, variant::swap, std::swap,
	std::hash): Sfinae on emplace and std::swap; handle __poison_hash bases
	of duplicated types.
	* testsuite/20_util/variant/compile.cc: Add tests.
	* testsuite/20_util/variant/hash.cc: Add tests.

From-SVN: r243294
---
 libstdc++-v3/ChangeLog                             |  8 ++
 libstdc++-v3/include/bits/enable_special_members.h |  5 +-
 libstdc++-v3/include/std/variant                   | 89 +++++++++++++++-------
 libstdc++-v3/testsuite/20_util/variant/compile.cc  | 43 ++++++++++-
 libstdc++-v3/testsuite/20_util/variant/hash.cc     |  4 +
 5 files changed, 114 insertions(+), 35 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 8a3ab43..cdad972 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,13 @@
 2016-12-07  Tim Shen  <timshen@google.com>
 
+	* include/bits/enable_special_members.h: Make
+	_Enable_default_constructor constexpr.
+	* include/std/variant (variant::emplace, variant::swap, std::swap,
+	std::hash): Sfinae on emplace and std::swap; handle __poison_hash bases
+	of duplicated types.
+
+2016-12-07  Tim Shen  <timshen@google.com>
+
 	* include/std/variant (std::get, operator==): Implement constexpr
 	comparison and get<>.
 	* testsuite/20_util/variant/compile.cc: Tests.
diff --git a/libstdc++-v3/include/bits/enable_special_members.h b/libstdc++-v3/include/bits/enable_special_members.h
index 07c6c99..4f4477b 100644
--- a/libstdc++-v3/include/bits/enable_special_members.h
+++ b/libstdc++-v3/include/bits/enable_special_members.h
@@ -38,7 +38,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   struct _Enable_default_constructor_tag
   {
-    explicit _Enable_default_constructor_tag() = default;
+    explicit constexpr _Enable_default_constructor_tag() = default;
   };
 
 /**
@@ -118,7 +118,8 @@ template<typename _Tag>
     operator=(_Enable_default_constructor&&) noexcept = default;
 
     // Can be used in other ctors.
-    explicit _Enable_default_constructor(_Enable_default_constructor_tag) { }
+    constexpr explicit
+    _Enable_default_constructor(_Enable_default_constructor_tag) { }
   };
 
 template<typename _Tag>
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index a961a05..fa1e654 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -330,14 +330,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	{ }
 
       template<size_t... __indices>
-	constexpr void _M_destroy_impl(std::index_sequence<__indices...>)
+	constexpr void _M_reset_impl(std::index_sequence<__indices...>)
 	{
 	  if (_M_index != variant_npos)
 	    _S_vtable<__indices...>[_M_index](*this);
 	}
 
+      void _M_reset()
+      {
+	_M_reset_impl(std::index_sequence_for<_Types...>{});
+	_M_index = variant_npos;
+      }
+
       ~_Variant_storage()
-      { _M_destroy_impl(std::index_sequence_for<_Types...>{}); }
+      { _M_reset(); }
 
       _Variadic_union<_Types...> _M_u;
       size_t _M_index;
@@ -354,6 +360,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_M_index(_Np)
 	{ }
 
+      void _M_reset()
+      { _M_index = variant_npos; }
+
       _Variadic_union<_Types...> _M_u;
       size_t _M_index;
     };
@@ -436,6 +445,20 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	return *this;
       }
 
+      void _M_destructive_move(_Variant_base&& __rhs)
+      {
+	this->~_Variant_base();
+	__try
+	  {
+	    ::new (this) _Variant_base(std::move(__rhs));
+	  }
+	__catch (...)
+	  {
+	    this->_M_index = variant_npos;
+	    __throw_exception_again;
+	  }
+      }
+
       _Variant_base&
       operator=(_Variant_base&& __rhs)
       noexcept(__and_<is_nothrow_move_constructible<_Types>...,
@@ -453,16 +476,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  }
 	else
 	  {
-	    this->~_Variant_base();
-	    __try
-	      {
-		::new (this) _Variant_base(std::move(__rhs));
-	      }
-	    __catch (...)
-	      {
-		this->_M_index = variant_npos;
-		__throw_exception_again;
-	      }
+	    _M_destructive_move(std::move(__rhs));
 	  }
 	return *this;
       }
@@ -682,6 +696,17 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       }
     };
 
+  template<size_t _Np, typename _Tp>
+    struct _Base_dedup : public _Tp { };
+
+  template<typename _Variant, typename __indices>
+    struct _Variant_hash_base;
+
+  template<typename... _Types, size_t... __indices>
+    struct _Variant_hash_base<variant<_Types...>,
+			      std::index_sequence<__indices...>>
+    : _Base_dedup<__indices, __poison_hash<remove_const_t<_Types>>>... { };
+
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace __variant
 } // namespace __detail
@@ -858,8 +883,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { return false; }
 
   template<typename... _Types>
-    inline enable_if_t<__and_<is_move_constructible<_Types>...,
-			      is_swappable<_Types>...>::value>
+    inline enable_if_t<(is_move_constructible_v<_Types> && ...)
+			&& (is_swappable_v<_Types> && ...)>
     swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs)
     noexcept(noexcept(__lhs.swap(__rhs)))
     { __lhs.swap(__rhs); }
@@ -1028,25 +1053,26 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
 
       template<typename _Tp, typename... _Args>
-	void emplace(_Args&&... __args)
+	enable_if_t<is_constructible_v<_Tp, _Args...> && __exactly_once<_Tp>>
+	emplace(_Args&&... __args)
 	{
-	  static_assert(__exactly_once<_Tp>,
-			"T should occur for exactly once in alternatives");
 	  this->emplace<__index_of<_Tp>>(std::forward<_Args>(__args)...);
 	  __glibcxx_assert(holds_alternative<_Tp>(*this));
 	}
 
       template<typename _Tp, typename _Up, typename... _Args>
-	void emplace(initializer_list<_Up> __il, _Args&&... __args)
+	enable_if_t<is_constructible_v<_Tp, initializer_list<_Up>&, _Args...>
+		    && __exactly_once<_Tp>>
+	emplace(initializer_list<_Up> __il, _Args&&... __args)
 	{
-	  static_assert(__exactly_once<_Tp>,
-			"T should occur for exactly once in alternatives");
 	  this->emplace<__index_of<_Tp>>(__il, std::forward<_Args>(__args)...);
 	  __glibcxx_assert(holds_alternative<_Tp>(*this));
 	}
 
       template<size_t _Np, typename... _Args>
-	void emplace(_Args&&... __args)
+	enable_if_t<is_constructible_v<variant_alternative_t<_Np, variant>,
+				       _Args...>>
+	emplace(_Args&&... __args)
 	{
 	  static_assert(_Np < sizeof...(_Types),
 			"The index should be in [0, number of alternatives)");
@@ -1065,7 +1091,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	}
 
       template<size_t _Np, typename _Up, typename... _Args>
-	void emplace(initializer_list<_Up> __il, _Args&&... __args)
+	enable_if_t<is_constructible_v<variant_alternative_t<_Np, variant>,
+				       initializer_list<_Up>&, _Args...>>
+	emplace(initializer_list<_Up> __il, _Args&&... __args)
 	{
 	  static_assert(_Np < sizeof...(_Types),
 			"The index should be in [0, number of alternatives)");
@@ -1092,7 +1120,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       void
       swap(variant& __rhs)
       noexcept(__and_<__is_nothrow_swappable<_Types>...>::value
-	       && is_nothrow_move_assignable_v<variant>)
+	       && is_nothrow_move_constructible_v<variant>)
       {
 	if (this->index() == __rhs.index())
 	  {
@@ -1107,17 +1135,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  }
 	else if (!this->_M_valid())
 	  {
-	    *this = std::move(__rhs);
+	    this->_M_destructive_move(std::move(__rhs));
+	    __rhs._M_reset();
 	  }
 	else if (!__rhs._M_valid())
 	  {
-	    __rhs = std::move(*this);
+	    __rhs._M_destructive_move(std::move(*this));
+	    this->_M_reset();
 	  }
 	else
 	  {
 	    auto __tmp = std::move(__rhs);
-	    __rhs = std::move(*this);
-	    *this = std::move(__tmp);
+	    __rhs._M_destructive_move(std::move(*this));
+	    this->_M_destructive_move(std::move(__tmp));
 	  }
       }
 
@@ -1253,14 +1283,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template<typename... _Types>
     struct hash<variant<_Types...>>
-    : private __poison_hash<remove_const_t<_Types>>...
+    : private __detail::__variant::_Variant_hash_base<
+	variant<_Types...>, std::index_sequence_for<_Types...>>
     {
       using result_type = size_t;
       using argument_type = variant<_Types...>;
 
       size_t
       operator()(const variant<_Types...>& __t) const
-      noexcept((... && noexcept(hash<decay_t<_Types>>{}(std::declval<_Types>()))))
+      noexcept((is_nothrow_callable_v<hash<decay_t<_Types>>(_Types)> && ...))
       {
 	if (!__t.valueless_by_exception())
 	  {
diff --git a/libstdc++-v3/testsuite/20_util/variant/compile.cc b/libstdc++-v3/testsuite/20_util/variant/compile.cc
index ab8ada2..087a17c 100644
--- a/libstdc++-v3/testsuite/20_util/variant/compile.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/compile.cc
@@ -51,6 +51,15 @@ struct DefaultNoexcept
   DefaultNoexcept& operator=(DefaultNoexcept&&) noexcept = default;
 };
 
+struct MoveCtorOnly
+{
+  MoveCtorOnly() noexcept = delete;
+  MoveCtorOnly(const DefaultNoexcept&) noexcept = delete;
+  MoveCtorOnly(DefaultNoexcept&&) noexcept { }
+  MoveCtorOnly& operator=(const DefaultNoexcept&) noexcept = delete;
+  MoveCtorOnly& operator=(DefaultNoexcept&&) noexcept = delete;
+};
+
 struct nonliteral
 {
   nonliteral() { }
@@ -237,9 +246,9 @@ static_assert( !std::is_swappable_v<variant<D, int>> );
 
 void test_swap()
 {
-  variant<int, string> a, b;
-  a.swap(b);
-  swap(a, b);
+  static_assert(is_swappable_v<variant<int, string>>, "");
+  static_assert(is_swappable_v<variant<MoveCtorOnly>>, "");
+  static_assert(!is_swappable_v<variant<AllDeleted>>, "");
 }
 
 void test_visit()
@@ -385,7 +394,8 @@ void test_adl()
    variant<X> v4{in_place_type<X>, il, x};
 }
 
-void test_variant_alternative() {
+void test_variant_alternative()
+{
   static_assert(is_same_v<variant_alternative_t<0, variant<int, string>>, int>, "");
   static_assert(is_same_v<variant_alternative_t<1, variant<int, string>>, string>, "");
 
@@ -393,3 +403,28 @@ void test_variant_alternative() {
   static_assert(is_same_v<variant_alternative_t<0, volatile variant<int>>, volatile int>, "");
   static_assert(is_same_v<variant_alternative_t<0, const volatile variant<int>>, const volatile int>, "");
 }
+
+template<typename V, typename T>
+  constexpr auto has_type_emplace(int) -> decltype((declval<V>().template emplace<T>(), true))
+  { return true; };
+
+template<typename V, typename T>
+  constexpr bool has_type_emplace(...)
+  { return false; };
+
+template<typename V, size_t N>
+  constexpr auto has_index_emplace(int) -> decltype((declval<V>().template emplace<N>(), true))
+  { return true; };
+
+template<typename V, size_t T>
+  constexpr bool has_index_emplace(...)
+  { return false; };
+
+void test_emplace()
+{
+  static_assert(has_type_emplace<variant<int>, int>(0), "");
+  static_assert(!has_type_emplace<variant<long>, int>(0), "");
+  static_assert(has_index_emplace<variant<int>, 0>(0), "");
+  static_assert(!has_type_emplace<variant<AllDeleted>, AllDeleted>(0), "");
+  static_assert(!has_index_emplace<variant<AllDeleted>, 0>(0), "");
+}
diff --git a/libstdc++-v3/testsuite/20_util/variant/hash.cc b/libstdc++-v3/testsuite/20_util/variant/hash.cc
index 38991ae..64d053f 100644
--- a/libstdc++-v3/testsuite/20_util/variant/hash.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/hash.cc
@@ -29,6 +29,10 @@ template<class T>
 auto f(...) -> decltype(std::false_type());
 
 static_assert(!decltype(f<S>(0))::value, "");
+static_assert(!decltype(f<std::variant<S>>(0))::value, "");
+static_assert(!decltype(f<std::variant<S, S>>(0))::value, "");
+static_assert(decltype(f<std::variant<int>>(0))::value, "");
+static_assert(decltype(f<std::variant<int, int>>(0))::value, "");
 
 int main()
 {
-- 
cgit v1.1


From b01af236b7d0d92cf1e9d642495d9177347bd740 Mon Sep 17 00:00:00 2001
From: Tim Shen <timshen@google.com>
Date: Tue, 6 Dec 2016 11:28:09 +0000
Subject: variant (visit): Make visit constexpr.

	* include/std/variant (visit): Make visit constexpr. Also cleanup
	__get_alternative and __storage, since we don't support reference/void
	alternatives any more.
	* testsuite/20_util/variant/compile.cc: Add tests.

From-SVN: r243295
---
 libstdc++-v3/ChangeLog                            |   7 +
 libstdc++-v3/include/std/variant                  | 287 ++++++++++------------
 libstdc++-v3/testsuite/20_util/variant/compile.cc |  16 ++
 3 files changed, 154 insertions(+), 156 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index cdad972..1d47e38 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,10 @@
+2016-11-27  Tim Shen  <timshen@google.com>
+
+	* include/std/variant (visit): Make visit constexpr. Also cleanup
+	__get_alternative and __storage, since we don't support reference/void
+	alternatives any more.
+	* testsuite/20_util/variant/compile.cc: Add tests.
+
 2016-12-07  Tim Shen  <timshen@google.com>
 
 	* include/bits/enable_special_members.h: Make
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index fa1e654..dd6109d 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -41,11 +41,34 @@
 #include <bits/functexcept.h>
 #include <bits/move.h>
 #include <bits/functional_hash.h>
+#include <bits/invoke.h>
 #include <ext/aligned_buffer.h>
 
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
+namespace __detail
+{
+namespace __variant
+{
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
+
+  template<size_t _Np, typename... _Types>
+    struct _Nth_type;
+
+  template<size_t _Np, typename _First, typename... _Rest>
+    struct _Nth_type<_Np, _First, _Rest...>
+    : _Nth_type<_Np-1, _Rest...> { };
+
+  template<typename _First, typename... _Rest>
+    struct _Nth_type<0, _First, _Rest...>
+    { using type = _First; };
+
+_GLIBCXX_END_NAMESPACE_VERSION
+} // namespace __variant
+} // namespace __detail
+
+_GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template<typename... _Types> class tuple;
   template<typename... _Types> class variant;
@@ -99,6 +122,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   constexpr size_t variant_npos = -1;
 
+  template<size_t _Np, typename... _Types>
+    constexpr variant_alternative_t<_Np, variant<_Types...>>&
+    get(variant<_Types...>&);
+
+  template<size_t _Np, typename... _Types>
+    constexpr variant_alternative_t<_Np, variant<_Types...>>&&
+    get(variant<_Types...>&&);
+
+  template<size_t _Np, typename... _Types>
+    constexpr variant_alternative_t<_Np, variant<_Types...>> const&
+    get(const variant<_Types...>&);
+
+  template<size_t _Np, typename... _Types>
+    constexpr variant_alternative_t<_Np, variant<_Types...>> const&&
+    get(const variant<_Types...>&&);
+
 _GLIBCXX_END_NAMESPACE_VERSION
 
 namespace __detail
@@ -119,41 +158,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       std::integral_constant<size_t, is_same_v<_Tp, _First>
 	? 0 : __index_of_v<_Tp, _Rest...> + 1> {};
 
-  // Extract _From's qualifiers and references and apply it to _To.
-  // __reserved_type_map<const int&, char> is const char&.
-  template<typename _From, typename _To>
-    struct __reserved_type_map_impl
-    { using type = _To; };
-
-  template<typename _From, typename _To>
-    using __reserved_type_map =
-      typename __reserved_type_map_impl<_From, _To>::type;
-
-  template<typename _From, typename _To>
-    struct __reserved_type_map_impl<_From&, _To>
-    { using type = add_lvalue_reference_t<__reserved_type_map<_From, _To>>; };
-
-  template<typename _From, typename _To>
-    struct __reserved_type_map_impl<_From&&, _To>
-    { using type = add_rvalue_reference_t<__reserved_type_map<_From, _To>>; };
-
-  template<typename _From, typename _To>
-    struct __reserved_type_map_impl<const _From, _To>
-    { using type = add_const_t<__reserved_type_map<_From, _To>>; };
-
-  template<typename _From, typename _To>
-    struct __reserved_type_map_impl<volatile _From, _To>
-    { using type = add_volatile_t<__reserved_type_map<_From, _To>>; };
-
-  template<typename _From, typename _To>
-    struct __reserved_type_map_impl<const volatile _From, _To>
-    { using type = add_cv_t<__reserved_type_map<_From, _To>>; };
-
-  // This abstraction might be useful for future features,
-  // e.g. boost::recursive_wrapper.
-  template<typename _Alternative>
-    using __storage = _Alternative;
-
   // _Uninitialized<T> is guaranteed to be a literal type, even if T is not.
   // We have to do this, because [basic.types]p10.5.3 (n4606) is not implemented
   // yet. When it's implemented, _Uninitialized<T> can be changed to the alias
@@ -210,16 +214,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       __gnu_cxx::__aligned_membuf<_Type> _M_storage;
     };
 
-  // Given a qualified storage type, return the desired reference.
-  // For example, variant<int>&& stores the int as __storage<int>, and
-  // _Qualified_storage will be __storage<int>&&.
-  template<typename _Qualified_storage>
-    decltype(auto)
-    __get_alternative(void* __ptr)
+  template<typename _Ref>
+    _Ref __ref_cast(void* __ptr)
     {
-      using _Storage = decay_t<_Qualified_storage>;
-      return __reserved_type_map<_Qualified_storage, _Storage>(
-	*static_cast<_Storage*>(__ptr));
+      return static_cast<_Ref>(*static_cast<remove_reference_t<_Ref>*>(__ptr));
     }
 
   template<typename _Union>
@@ -242,7 +240,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Lhs, typename _Rhs>
     constexpr void
     __erased_ctor(void* __lhs, void* __rhs)
-    { ::new (__lhs) decay_t<_Lhs>(__get_alternative<_Rhs>(__rhs)); }
+    { ::new (__lhs) remove_reference_t<_Lhs>(__ref_cast<_Rhs>(__rhs)); }
 
   template<typename _Variant, size_t _Np>
     constexpr void
@@ -256,14 +254,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Lhs, typename _Rhs>
     constexpr void
     __erased_assign(void* __lhs, void* __rhs)
-    { __get_alternative<_Lhs>(__lhs) = __get_alternative<_Rhs>(__rhs); }
+    { __ref_cast<_Lhs>(__lhs) = __ref_cast<_Rhs>(__rhs); }
 
   template<typename _Lhs, typename _Rhs>
     constexpr void
     __erased_swap(void* __lhs, void* __rhs)
     {
       using std::swap;
-      swap(__get_alternative<_Lhs>(__lhs), __get_alternative<_Rhs>(__rhs));
+      swap(__ref_cast<_Lhs>(__lhs), __ref_cast<_Rhs>(__rhs));
     }
 
   template<typename _Variant, size_t _Np>
@@ -285,7 +283,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   template<typename _Tp>
     constexpr size_t
     __erased_hash(void* __t)
-    { return std::hash<decay_t<_Tp>>{}(__get_alternative<_Tp>(__t)); }
+    {
+      return std::hash<remove_cv_t<remove_reference_t<_Tp>>>{}(
+	  __ref_cast<_Tp>(__t));
+    }
 
   // Defines members and ctors.
   template<typename... _Types>
@@ -389,8 +390,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	if (__rhs._M_valid())
 	  {
 	    static constexpr void (*_S_vtable[])(void*, void*) =
-	      { &__erased_ctor<__storage<_Types>&,
-			       const __storage<_Types>&>... };
+	      { &__erased_ctor<_Types&, const _Types&>... };
 	    _S_vtable[__rhs._M_index](_M_storage(), __rhs._M_storage());
 	    this->_M_index = __rhs._M_index;
 	  }
@@ -402,7 +402,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	if (__rhs._M_valid())
 	  {
 	    static constexpr void (*_S_vtable[])(void*, void*) =
-	      { &__erased_ctor<__storage<_Types>&, __storage<_Types>&&>... };
+	      { &__erased_ctor<_Types&, _Types&&>... };
 	    _S_vtable[__rhs._M_index](_M_storage(), __rhs._M_storage());
 	    this->_M_index = __rhs._M_index;
 	  }
@@ -422,8 +422,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    if (__rhs._M_valid())
 	      {
 		static constexpr void (*_S_vtable[])(void*, void*) =
-		  { &__erased_assign<__storage<_Types>&,
-				     const __storage<_Types>&>... };
+		  { &__erased_assign<_Types&, const _Types&>... };
 		_S_vtable[__rhs._M_index](_M_storage(), __rhs._M_storage());
 	      }
 	  }
@@ -469,8 +468,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    if (__rhs._M_valid())
 	      {
 		static constexpr void (*_S_vtable[])(void*, void*) =
-		  { &__erased_assign<__storage<_Types>&,
-				     __storage<_Types>&&>... };
+		  { &__erased_assign<_Types&, _Types&&>... };
 		_S_vtable[__rhs._M_index](_M_storage(), __rhs._M_storage());
 	      }
 	  }
@@ -555,20 +553,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     void* __get_storage(_Variant&& __v)
     { return __v._M_storage(); }
 
-  // A helper used to create variadic number of _To types.
-  template<typename _From, typename _To>
-    using _To_type = _To;
-
-  // Call the actual visitor.
-  // _Args are qualified storage types.
-  template<typename _Visitor, typename... _Args>
-    decltype(auto)
-    __visit_invoke(_Visitor&& __visitor, _To_type<_Args, void*>... __ptrs)
-    {
-      return std::forward<_Visitor>(__visitor)(
-	  __get_alternative<_Args>(__ptrs)...);
-    }
-
   // Used for storing multi-dimensional vtable.
   template<typename _Tp, size_t... _Dimensions>
     struct _Multi_array
@@ -592,108 +576,115 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     };
 
   // Creates a multi-dimensional vtable recursively.
-  // _Variant_tuple is initially the input from visit(), and gets gradually
-  // consumed.
-  // _Arg_tuple is enumerated alternative sequence, represented by a
-  // qualified storage.
   //
   // For example,
   // visit([](auto, auto){},
-  //       variant<int, char>(),
-  //       variant<float, double, long double>())
+  //       variant<int, char>(),  // typedef'ed as V1
+  //       variant<float, double, long double>())  // typedef'ed as V2
   // will trigger instantiations of:
-  // __gen_vtable_impl<_Multi_array<void(*)(void*, void*), 2, 3>,
-  //                   tuple<variant<int, char>,
-  //                         variant<float, double, long double>>,
-  //                   tuple<>>
-  //   __gen_vtable_impl<_Multi_array<void(*)(void*, void*), 3>,
-  //                     tuple<variant<float, double, long double>>,
-  //                     tuple<int>>
-  //     __gen_vtable_impl<_Multi_array<void(*)(void*, void*)>,
-  //                       tuple<>,
-  //                       tuple<int, float>>
-  //     __gen_vtable_impl<_Multi_array<void(*)(void*, void*)>,
-  //                       tuple<>,
-  //                       tuple<int, double>>
-  //     __gen_vtable_impl<_Multi_array<void(*)(void*, void*)>,
-  //                       tuple<>,
-  //                       tuple<int, long double>>
-  //   __gen_vtable_impl<_Multi_array<void(*)(void*, void*), 3>,
-  //                     tuple<variant<float, double, long double>>,
-  //                     tuple<char>>
-  //     __gen_vtable_impl<_Multi_array<void(*)(void*, void*)>,
-  //                       tuple<>,
-  //                       tuple<char, float>>
-  //     __gen_vtable_impl<_Multi_array<void(*)(void*, void*)>,
-  //                       tuple<>,
-  //                       tuple<char, double>>
-  //     __gen_vtable_impl<_Multi_array<void(*)(void*, void*)>,
-  //                       tuple<>,
-  //                       tuple<char, long double>>
+  // __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&), 2, 3>,
+  //                   tuple<V1&&, V2&&>, std::index_sequence<>>
+  //   __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&), 3>,
+  //                     tuple<V1&&, V2&&>, std::index_sequence<0>>
+  //     __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&)>,
+  //                       tuple<V1&&, V2&&>, std::index_sequence<0, 0>>
+  //     __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&)>,
+  //                       tuple<V1&&, V2&&>, std::index_sequence<0, 1>>
+  //     __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&)>,
+  //                       tuple<V1&&, V2&&>, std::index_sequence<0, 2>>
+  //   __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&), 3>,
+  //                     tuple<V1&&, V2&&>, std::index_sequence<1>>
+  //     __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&)>,
+  //                       tuple<V1&&, V2&&>, std::index_sequence<1, 0>>
+  //     __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&)>,
+  //                       tuple<V1&&, V2&&>, std::index_sequence<1, 1>>
+  //     __gen_vtable_impl<_Multi_array<void(*)(V1&&, V2&&)>,
+  //                       tuple<V1&&, V2&&>, std::index_sequence<1, 2>>
   // The returned multi-dimensional vtable can be fast accessed by the visitor
   // using index calculation.
-  template<typename _Array_type, typename _Variant_tuple, typename _Arg_tuple>
+  template<typename _Array_type, typename _Variant_tuple, typename _Index_seq>
     struct __gen_vtable_impl;
 
-  template<typename _Array_type, typename _First, typename... _Rest,
-	   typename... _Args>
-    struct __gen_vtable_impl<_Array_type, tuple<_First, _Rest...>,
-			     tuple<_Args...>>
+  template<typename _Result_type, typename _Visitor, size_t... __unused,
+	   typename... _Variants, size_t... __indices>
+    struct __gen_vtable_impl<
+	_Multi_array<_Result_type (*)(_Visitor, _Variants...), __unused...>,
+	tuple<_Variants...>, std::index_sequence<__indices...>>
     {
+      using _Next =
+	  remove_reference_t<typename _Nth_type<sizeof...(__indices),
+			     _Variants...>::type>;
+      using _Array_type =
+	  _Multi_array<_Result_type (*)(_Visitor, _Variants...), __unused...>;
+
       static constexpr _Array_type
       _S_apply()
       {
 	_Array_type __vtable{};
 	_S_apply_all_alts(
-	  __vtable, make_index_sequence<variant_size_v<decay_t<_First>>>());
+	  __vtable, make_index_sequence<variant_size_v<_Next>>());
 	return __vtable;
       }
 
-      template<size_t... __indices>
+      template<size_t... __var_indices>
 	static constexpr void
-	_S_apply_all_alts(_Array_type& __vtable, index_sequence<__indices...>)
-	{ (_S_apply_single_alt<__indices>(__vtable._M_arr[__indices]), ...); }
+	_S_apply_all_alts(_Array_type& __vtable,
+			  std::index_sequence<__var_indices...>)
+	{
+	  (_S_apply_single_alt<__var_indices>(
+	     __vtable._M_arr[__var_indices]), ...);
+	}
 
       template<size_t __index, typename _Tp>
 	static constexpr void
 	_S_apply_single_alt(_Tp& __element)
 	{
-	  using _Alternative = variant_alternative_t<__index, decay_t<_First>>;
-	  using _Qualified_storage = __reserved_type_map<
-	    _First, __storage<_Alternative>>;
+	  using _Alternative = variant_alternative_t<__index, _Next>;
 	  __element = __gen_vtable_impl<
-	    decay_t<decltype(__element)>, tuple<_Rest...>,
-	    tuple<_Args..., _Qualified_storage>>::_S_apply();
+	    remove_reference_t<
+	      decltype(__element)>, tuple<_Variants...>,
+	      std::index_sequence<__indices..., __index>>::_S_apply();
 	}
     };
 
-  template<typename _Result_type, typename _Visitor, typename... _Args>
+  template<typename _Result_type, typename _Visitor, typename... _Variants,
+	   size_t... __indices>
     struct __gen_vtable_impl<
-      _Multi_array<_Result_type (*)(_Visitor, _To_type<_Args, void*>...)>,
-		   tuple<>, tuple<_Args...>>
+      _Multi_array<_Result_type (*)(_Visitor, _Variants...)>,
+		   tuple<_Variants...>, std::index_sequence<__indices...>>
     {
       using _Array_type =
-	_Multi_array<_Result_type (*)(_Visitor&&, _To_type<_Args, void*>...)>;
+	  _Multi_array<_Result_type (*)(_Visitor&&, _Variants...)>;
+
+      decltype(auto)
+      static constexpr __visit_invoke(_Visitor&& __visitor, _Variants... __vars)
+      {
+	return __invoke(std::forward<_Visitor>(__visitor),
+			std::get<__indices>(
+			    std::forward<_Variants>(__vars))...);
+      }
 
       static constexpr auto
       _S_apply()
-      { return _Array_type{&__visit_invoke<_Visitor, _Args...>}; }
+      { return _Array_type{&__visit_invoke}; }
     };
 
   template<typename _Result_type, typename _Visitor, typename... _Variants>
     struct __gen_vtable
     {
-      using _Func_ptr =
-	_Result_type (*)(_Visitor&&, _To_type<_Variants, void*>...);
+      using _Func_ptr = _Result_type (*)(_Visitor&&, _Variants...);
       using _Array_type =
-	_Multi_array<_Func_ptr, variant_size_v<decay_t<_Variants>>...>;
+	  _Multi_array<_Func_ptr,
+		       variant_size_v<remove_reference_t<_Variants>>...>;
 
       static constexpr _Array_type
       _S_apply()
       {
-	return __gen_vtable_impl<
-	  _Array_type, tuple<_Variants...>, tuple<>>::_S_apply();
+	return __gen_vtable_impl<_Array_type, tuple<_Variants...>,
+				 std::index_sequence<>>::_S_apply();
       }
+
+      static constexpr auto _S_vtable = _S_apply();
     };
 
   template<size_t _Np, typename _Tp>
@@ -722,22 +713,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       return __v.index() == __detail::__variant::__index_of_v<_Tp, _Types...>;
     }
 
-  template<size_t _Np, typename... _Types>
-    constexpr variant_alternative_t<_Np, variant<_Types...>>&
-    get(variant<_Types...>&);
-
-  template<size_t _Np, typename... _Types>
-    constexpr variant_alternative_t<_Np, variant<_Types...>>&&
-    get(variant<_Types...>&&);
-
-  template<size_t _Np, typename... _Types>
-    constexpr variant_alternative_t<_Np, variant<_Types...>> const&
-    get(const variant<_Types...>&);
-
-  template<size_t _Np, typename... _Types>
-    constexpr variant_alternative_t<_Np, variant<_Types...>> const&&
-    get(const variant<_Types...>&&);
-
   template<typename _Tp, typename... _Types>
     constexpr inline _Tp& get(variant<_Types...>& __v)
     {
@@ -860,7 +835,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { return !(__lhs < __rhs); }
 
   template<typename _Visitor, typename... _Variants>
-    decltype(auto) visit(_Visitor&&, _Variants&&...);
+    constexpr decltype(auto) visit(_Visitor&&, _Variants&&...);
 
   struct monostate { };
 
@@ -965,9 +940,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	using __accepted_type = __to_type<__accepted_index<_Tp>>;
 
       template<typename _Tp>
-	using __storage = __detail::__variant::__storage<_Tp>;
-
-      template<typename _Tp>
 	static constexpr size_t __index_of =
 	  __detail::__variant::__index_of_v<_Tp, _Types...>;
 
@@ -1127,8 +1099,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	    if (this->_M_valid())
 	      {
 		static constexpr void (*_S_vtable[])(void*, void*) =
-		  { &__detail::__variant::__erased_swap<
-		      __storage<_Types>&, __storage<_Types>&>... };
+		  { &__detail::__variant::__erased_swap<_Types&, _Types&>... };
 		_S_vtable[__rhs._M_index](this->_M_storage(),
 					  __rhs._M_storage());
 	      }
@@ -1268,17 +1239,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
 
   template<typename _Visitor, typename... _Variants>
-    decltype(auto)
+    constexpr decltype(auto)
     visit(_Visitor&& __visitor, _Variants&&... __variants)
     {
+      if ((__variants.valueless_by_exception() || ...))
+	__throw_bad_variant_access("Unexpected index");
+
       using _Result_type =
 	decltype(std::forward<_Visitor>(__visitor)(get<0>(__variants)...));
-      static constexpr auto _S_vtable =
-	__detail::__variant::__gen_vtable<
-	  _Result_type, _Visitor&&, _Variants&&...>::_S_apply();
-      auto __func_ptr = _S_vtable._M_access(__variants.index()...);
+
+      constexpr auto& __vtable = __detail::__variant::__gen_vtable<
+	_Result_type, _Visitor&&, _Variants&&...>::_S_vtable;
+
+      auto __func_ptr = __vtable._M_access(__variants.index()...);
       return (*__func_ptr)(std::forward<_Visitor>(__visitor),
-			   __detail::__variant::__get_storage(__variants)...);
+			   std::forward<_Variants>(__variants)...);
     }
 
   template<typename... _Types>
@@ -1297,7 +1272,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  {
 	    namespace __edv = __detail::__variant;
 	    static constexpr size_t (*_S_vtable[])(void*) =
-	      { &__edv::__erased_hash<const __edv::__storage<_Types>&>... };
+	      { &__edv::__erased_hash<const _Types&>... };
 	    return hash<size_t>{}(__t.index())
 	      + _S_vtable[__t.index()](__edv::__get_storage(__t));
 	  }
diff --git a/libstdc++-v3/testsuite/20_util/variant/compile.cc b/libstdc++-v3/testsuite/20_util/variant/compile.cc
index 087a17c..a8ffaea 100644
--- a/libstdc++-v3/testsuite/20_util/variant/compile.cc
+++ b/libstdc++-v3/testsuite/20_util/variant/compile.cc
@@ -275,6 +275,22 @@ void test_visit()
     };
     visit(Visitor(), variant<int, char>(), variant<float, double>());
   }
+  {
+    struct Visitor
+    {
+      constexpr bool operator()(const int&) { return true; }
+      constexpr bool operator()(const nonliteral&) { return false; }
+    };
+    static_assert(visit(Visitor(), variant<int, nonliteral>(0)), "");
+  }
+  {
+    struct Visitor
+    {
+      constexpr bool operator()(const int&) { return true; }
+      constexpr bool operator()(const nonliteral&) { return false; }
+    };
+    static_assert(visit(Visitor(), variant<int, nonliteral>(0)), "");
+  }
 }
 
 void test_constexpr()
-- 
cgit v1.1


From 1637d42545cced0c58204ecb36499e512eeb3841 Mon Sep 17 00:00:00 2001
From: Ville Voutilainen <ville.voutilainen@gmail.com>
Date: Tue, 6 Dec 2016 14:47:54 +0200
Subject: Constrain optional's __constexpr_addressof in its return type and use
 a constexpr addressof for optional, if available.

Constrain optional's __constexpr_addressof in its return type
and use a constexpr addressof for optional, if available.
* include/experimental/optional (__constexpr_addressof):
Constrain in the return type instead of in a template parameter.
(_Has_addressof_mem)
(_Has_addressof_free, _Has_addressof, __constexpr_addressof):
Guard with #ifndef __cpp_lib_addressof_constexpr.
(operator->()): Use std::__addressof if it's constexpr.

From-SVN: r243298
---
 libstdc++-v3/ChangeLog                     | 11 +++++++++++
 libstdc++-v3/include/experimental/optional | 22 +++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 1d47e38..47e9abf 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,14 @@
+2016-12-06  Ville Voutilainen  <ville.voutilainen@gmail.com>
+
+	Constrain optional's __constexpr_addressof in its return type
+	and use a constexpr addressof for optional, if available.
+	* include/experimental/optional (__constexpr_addressof):
+	Constrain in the return type instead of in a template parameter.
+	(_Has_addressof_mem)
+	(_Has_addressof_free, _Has_addressof, __constexpr_addressof):
+	Guard with #ifndef __cpp_lib_addressof_constexpr.
+	(operator->()): Use std::__addressof if it's constexpr.
+
 2016-11-27  Tim Shen  <timshen@google.com>
 
 	* include/std/variant (visit): Make visit constexpr. Also cleanup
diff --git a/libstdc++-v3/include/experimental/optional b/libstdc++-v3/include/experimental/optional
index a631158..6994e77 100644
--- a/libstdc++-v3/include/experimental/optional
+++ b/libstdc++-v3/include/experimental/optional
@@ -134,6 +134,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   __throw_bad_optional_access(const char* __s)
   { _GLIBCXX_THROW_OR_ABORT(bad_optional_access(__s)); }
 
+#ifndef __cpp_lib_addressof_constexpr
   template<typename _Tp, typename = void>
     struct _Has_addressof_mem : std::false_type { };
 
@@ -170,16 +171,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     * overloaded addressof operator (unary operator&), in which case the call
     * will not be a constant expression.
     */
-  template<typename _Tp, enable_if_t<!_Has_addressof<_Tp>::value, int>...>
-    constexpr _Tp* __constexpr_addressof(_Tp& __t)
+  template<typename _Tp>
+    constexpr
+    enable_if_t<!_Has_addressof<_Tp>::value, _Tp*>
+    __constexpr_addressof(_Tp& __t)
     { return &__t; }
 
   /**
     * @brief Fallback overload that defers to __addressof.
     */
-  template<typename _Tp, enable_if_t<_Has_addressof<_Tp>::value, int>...>
-    inline _Tp* __constexpr_addressof(_Tp& __t)
+  template<typename _Tp>
+    inline
+    enable_if_t<_Has_addressof<_Tp>::value, _Tp*>
+    __constexpr_addressof(_Tp& __t)
     { return std::__addressof(__t); }
+#endif // __cpp_lib_addressof_constexpr
 
   /**
     * @brief Class template that holds the necessary state for @ref optional
@@ -705,7 +711,13 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // [X.Y.4.5] Observers.
       constexpr const _Tp*
       operator->() const
-      { return __constexpr_addressof(this->_M_get()); }
+      {
+#ifndef __cpp_lib_addressof_constexpr
+	return __constexpr_addressof(this->_M_get());
+#else
+	return std::__addressof(this->_M_get());
+#endif
+      }
 
       _Tp*
       operator->()
-- 
cgit v1.1


From 505326670f1d5fb161f1676c59462f3ca16754fd Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <redi@gcc.gnu.org>
Date: Tue, 6 Dec 2016 12:48:54 +0000
Subject: Fix libstdc++-v3/ChangeLog dates

From-SVN: r243299
---
 libstdc++-v3/ChangeLog | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 47e9abf..b8edb7b 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -9,14 +9,14 @@
 	Guard with #ifndef __cpp_lib_addressof_constexpr.
 	(operator->()): Use std::__addressof if it's constexpr.
 
-2016-11-27  Tim Shen  <timshen@google.com>
+2016-12-06  Tim Shen  <timshen@google.com>
 
 	* include/std/variant (visit): Make visit constexpr. Also cleanup
 	__get_alternative and __storage, since we don't support reference/void
 	alternatives any more.
 	* testsuite/20_util/variant/compile.cc: Add tests.
 
-2016-12-07  Tim Shen  <timshen@google.com>
+2016-12-06  Tim Shen  <timshen@google.com>
 
 	* include/bits/enable_special_members.h: Make
 	_Enable_default_constructor constexpr.
@@ -24,13 +24,13 @@
 	std::hash): Sfinae on emplace and std::swap; handle __poison_hash bases
 	of duplicated types.
 
-2016-12-07  Tim Shen  <timshen@google.com>
+2016-12-06  Tim Shen  <timshen@google.com>
 
 	* include/std/variant (std::get, operator==): Implement constexpr
 	comparison and get<>.
 	* testsuite/20_util/variant/compile.cc: Tests.
 
-2016-12-07  Tim Shen  <timshen@google.com>
+2016-12-06  Tim Shen  <timshen@google.com>
 
 	* include/std/variant (__erased_use_alloc_ctor,
 	_Variant_base::_Variant_base, variant::variant): Remove uses-allocator
-- 
cgit v1.1


From 1011502b46e880679458a65ef87e3307076cfa57 Mon Sep 17 00:00:00 2001
From: Andre Vehreschild <vehre@gcc.gnu.org>
Date: Tue, 6 Dec 2016 14:41:46 +0100
Subject: re PR fortran/78226 (Fill out location information everywhere)

gcc/fortran/ChangeLog:

2016-12-06  Andre Vehreschild  <vehre@gcc.gnu.org>

	PR fortran/78226
	* class.c (finalize_component): Add missing locus information.
	(finalization_scalarizer): Likewise.
	(finalization_get_offset): Likewise.
	(finalizer_insert_packed_call): Likewise.
	(generate_finalization_wrapper): Likewise.

From-SVN: r243300
---
 gcc/fortran/ChangeLog |  9 +++++++++
 gcc/fortran/class.c   | 14 ++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index f1858ea..baef2d3 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,12 @@
+2016-12-06  Andre Vehreschild  <vehre@gcc.gnu.org>
+
+	PR fortran/78226
+	* class.c (finalize_component): Add missing locus information.
+	(finalization_scalarizer): Likewise.
+	(finalization_get_offset): Likewise.
+	(finalizer_insert_packed_call): Likewise.
+	(generate_finalization_wrapper): Likewise.
+
 2016-12-05  Nathan Sidwell  <nathan@acm.org>
 
 	* error.c (gfc_warning_check): Call diagnostic_check_max_errors.
diff --git a/gcc/fortran/class.c b/gcc/fortran/class.c
index ba965c9..e59b87c 100644
--- a/gcc/fortran/class.c
+++ b/gcc/fortran/class.c
@@ -965,6 +965,7 @@ finalize_component (gfc_expr *expr, gfc_symbol *derived, gfc_component *comp,
       cond->block = gfc_get_code (EXEC_IF);
       cond->block->expr1 = gfc_get_expr ();
       cond->block->expr1->expr_type = EXPR_FUNCTION;
+      cond->block->expr1->where = gfc_current_locus;
       gfc_get_sym_tree ("associated", sub_ns, &cond->block->expr1->symtree, false);
       cond->block->expr1->symtree->n.sym->attr.flavor = FL_PROCEDURE;
       cond->block->expr1->symtree->n.sym->attr.intrinsic = 1;
@@ -1077,6 +1078,7 @@ finalization_scalarizer (gfc_symbol *array, gfc_symbol *ptr,
   gfc_commit_symbol (expr->symtree->n.sym);
   expr->ts.type = BT_INTEGER;
   expr->ts.kind = gfc_index_integer_kind;
+  expr->where = gfc_current_locus;
 
   /* TRANSFER.  */
   expr2 = gfc_build_intrinsic_call (sub_ns, GFC_ISYM_TRANSFER, "transfer",
@@ -1093,6 +1095,7 @@ finalization_scalarizer (gfc_symbol *array, gfc_symbol *ptr,
   block->ext.actual->expr->value.op.op1 = expr2;
   block->ext.actual->expr->value.op.op2 = offset;
   block->ext.actual->expr->ts = expr->ts;
+  block->ext.actual->expr->where = gfc_current_locus;
 
   /* C_F_POINTER's 2nd arg: ptr -- and its absent shape=.  */
   block->ext.actual->next = gfc_get_actual_arglist ();
@@ -1149,6 +1152,7 @@ finalization_get_offset (gfc_symbol *idx, gfc_symbol *idx2, gfc_symbol *offset,
   expr->ref->u.ar.dimen = 1;
   expr->ref->u.ar.dimen_type[0] = DIMEN_ELEMENT;
   expr->ref->u.ar.start[0] = gfc_lval_expr_from_sym (idx2);
+  expr->where = sizes->declared_at;
 
   expr = gfc_build_intrinsic_call (sub_ns, GFC_ISYM_MOD, "mod",
 				   gfc_current_locus, 2,
@@ -1169,6 +1173,7 @@ finalization_get_offset (gfc_symbol *idx, gfc_symbol *idx2, gfc_symbol *offset,
   expr2->value.op.op2->ref->u.ar.dimen_type[0] = DIMEN_ELEMENT;
   expr2->value.op.op2->ref->u.ar.start[0] = gfc_get_expr ();
   expr2->value.op.op2->ref->u.ar.start[0]->expr_type = EXPR_OP;
+  expr2->value.op.op2->ref->u.ar.start[0]->where = gfc_current_locus;
   expr2->value.op.op2->ref->u.ar.start[0]->value.op.op = INTRINSIC_MINUS;
   expr2->value.op.op2->ref->u.ar.start[0]->value.op.op1
 	= gfc_lval_expr_from_sym (idx2);
@@ -1177,6 +1182,7 @@ finalization_get_offset (gfc_symbol *idx, gfc_symbol *idx2, gfc_symbol *offset,
   expr2->value.op.op2->ref->u.ar.start[0]->ts
 	= expr2->value.op.op2->ref->u.ar.start[0]->value.op.op1->ts;
   expr2->ts = idx->ts;
+  expr2->where = gfc_current_locus;
 
   /* ... * strides(idx2).  */
   expr = gfc_get_expr ();
@@ -1192,6 +1198,7 @@ finalization_get_offset (gfc_symbol *idx, gfc_symbol *idx2, gfc_symbol *offset,
   expr->value.op.op2->ref->u.ar.start[0] = gfc_lval_expr_from_sym (idx2);
   expr->value.op.op2->ref->u.ar.as = strides->as;
   expr->ts = idx->ts;
+  expr->where = gfc_current_locus;
 
   /* offset = offset + ...  */
   block->block->next = gfc_get_code (EXEC_ASSIGN);
@@ -1202,6 +1209,7 @@ finalization_get_offset (gfc_symbol *idx, gfc_symbol *idx2, gfc_symbol *offset,
   block->block->next->expr2->value.op.op1 = gfc_lval_expr_from_sym (offset);
   block->block->next->expr2->value.op.op2 = expr;
   block->block->next->expr2->ts = idx->ts;
+  block->block->next->expr2->where = gfc_current_locus;
 
   /* After the loop:  offset = offset * byte_stride.  */
   block->next = gfc_get_code (EXEC_ASSIGN);
@@ -1213,6 +1221,7 @@ finalization_get_offset (gfc_symbol *idx, gfc_symbol *idx2, gfc_symbol *offset,
   block->expr2->value.op.op1 = gfc_lval_expr_from_sym (offset);
   block->expr2->value.op.op2 = gfc_lval_expr_from_sym (byte_stride);
   block->expr2->ts = block->expr2->value.op.op1->ts;
+  block->expr2->where = gfc_current_locus;
   return block;
 }
 
@@ -1422,6 +1431,7 @@ finalizer_insert_packed_call (gfc_code *block, gfc_finalizer *fini,
   /* Offset calculation for the new array: idx * size of type (in bytes).  */
   offset2 = gfc_get_expr ();
   offset2->expr_type = EXPR_OP;
+  offset2->where = gfc_current_locus;
   offset2->value.op.op = INTRINSIC_TIMES;
   offset2->value.op.op1 = gfc_lval_expr_from_sym (idx);
   offset2->value.op.op2 = gfc_copy_expr (size_expr);
@@ -1826,6 +1836,7 @@ generate_finalization_wrapper (gfc_symbol *derived, gfc_namespace *ns,
   block->expr2 = gfc_get_expr ();
   block->expr2->expr_type = EXPR_OP;
   block->expr2->value.op.op = INTRINSIC_TIMES;
+  block->expr2->where = gfc_current_locus;
 
   /* sizes(idx-1).  */
   block->expr2->value.op.op1 = gfc_lval_expr_from_sym (sizes);
@@ -1837,6 +1848,7 @@ generate_finalization_wrapper (gfc_symbol *derived, gfc_namespace *ns,
   block->expr2->value.op.op1->ref->u.ar.dimen_type[0] = DIMEN_ELEMENT;
   block->expr2->value.op.op1->ref->u.ar.start[0] = gfc_get_expr ();
   block->expr2->value.op.op1->ref->u.ar.start[0]->expr_type = EXPR_OP;
+  block->expr2->value.op.op1->ref->u.ar.start[0]->where = gfc_current_locus;
   block->expr2->value.op.op1->ref->u.ar.start[0]->value.op.op = INTRINSIC_MINUS;
   block->expr2->value.op.op1->ref->u.ar.start[0]->value.op.op1
 	= gfc_lval_expr_from_sym (idx);
@@ -1890,6 +1902,7 @@ generate_finalization_wrapper (gfc_symbol *derived, gfc_namespace *ns,
   block->expr1->value.op.op2->ref->u.ar.dimen_type[0] = DIMEN_ELEMENT;
   block->expr1->value.op.op2->ref->u.ar.start[0] = gfc_get_expr ();
   block->expr1->value.op.op2->ref->u.ar.start[0]->expr_type = EXPR_OP;
+  block->expr1->value.op.op2->ref->u.ar.start[0]->where = gfc_current_locus;
   block->expr1->value.op.op2->ref->u.ar.start[0]->value.op.op = INTRINSIC_MINUS;
   block->expr1->value.op.op2->ref->u.ar.start[0]->value.op.op1
 	= gfc_lval_expr_from_sym (idx);
@@ -1927,6 +1940,7 @@ generate_finalization_wrapper (gfc_symbol *derived, gfc_namespace *ns,
   last_code->expr2->value.op.op2
 	= gfc_get_int_expr (gfc_index_integer_kind, NULL, 1);
   last_code->expr2->ts = last_code->expr2->value.op.op2->ts;
+  last_code->expr2->where = gfc_current_locus;
 
   last_code->expr2->value.op.op1 = gfc_lval_expr_from_sym (sizes);
   last_code->expr2->value.op.op1->ref = gfc_get_ref ();
-- 
cgit v1.1


From fb7b736cbbeebcd61997a83b6715c4fed29375cf Mon Sep 17 00:00:00 2001
From: Robert Suchanek <robert.suchanek@imgtec.com>
Date: Tue, 6 Dec 2016 13:45:34 +0000
Subject: [MIPS][MSA] Fix builtins with literal integer arguments.

gcc/
	* config/mips/mips.c (mips_expand_builtin_insn): Check input
	ranges of literal integer arguments.

gcc/testsuite/

	* gcc.target/mips/msa-builtins-err.c: New test.

From-SVN: r243301
---
 gcc/ChangeLog                                    |   5 +
 gcc/config/mips/mips.c                           |  83 +++++---
 gcc/testsuite/ChangeLog                          |   4 +
 gcc/testsuite/gcc.target/mips/msa-builtins-err.c | 241 +++++++++++++++++++++++
 4 files changed, 311 insertions(+), 22 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/mips/msa-builtins-err.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 3c842b6..8d405cf 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Robert Suchanek  <robert.suchanek@imgtec.com>
+
+	* config/mips/mips.c (mips_expand_builtin_insn): Check input
+	ranges of literal integer arguments.
+
 2016-12-06  Aldy Hernandez  <aldyh@redhat.com>
 
 	PR middle-end/78548
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 0e83cb4..c7eb2a8 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -16570,6 +16570,7 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
 			  struct expand_operand *ops, bool has_target_p)
 {
   machine_mode imode;
+  int rangelo = 0, rangehi = 0, error_opno = 0;
 
   switch (icode)
     {
@@ -16600,12 +16601,19 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
       gcc_assert (has_target_p && nops == 3);
       /* We only generate a vector of constants iff the second argument
 	 is an immediate.  We also validate the range of the immediate.  */
-      if (!CONST_INT_P (ops[2].value)
-	  || !IN_RANGE (INTVAL (ops[2].value), 0,  31))
-	break;
-      ops[2].mode = ops[0].mode;
-      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
-						INTVAL (ops[2].value));
+      if (CONST_INT_P (ops[2].value))
+	{
+	  rangelo = 0;
+	  rangehi = 31;
+	  if (IN_RANGE (INTVAL (ops[2].value), rangelo, rangehi))
+	    {
+	      ops[2].mode = ops[0].mode;
+	      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
+							INTVAL (ops[2].value));
+	    }
+	  else
+	    error_opno = 2;
+	}
       break;
 
     case CODE_FOR_msa_ceqi_b:
@@ -16631,12 +16639,19 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
       gcc_assert (has_target_p && nops == 3);
       /* We only generate a vector of constants iff the second argument
 	 is an immediate.  We also validate the range of the immediate.  */
-      if (!CONST_INT_P (ops[2].value)
-	  || !IN_RANGE (INTVAL (ops[2].value), -16,  15))
-	break;
-      ops[2].mode = ops[0].mode;
-      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
-						INTVAL (ops[2].value));
+      if (CONST_INT_P (ops[2].value))
+	{
+	  rangelo = -16;
+	  rangehi = 15;
+	  if (IN_RANGE (INTVAL (ops[2].value), rangelo, rangehi))
+	    {
+	      ops[2].mode = ops[0].mode;
+	      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
+							INTVAL (ops[2].value));
+	    }
+	  else
+	    error_opno = 2;
+	}
       break;
 
     case CODE_FOR_msa_andi_b:
@@ -16716,13 +16731,19 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
     case CODE_FOR_msa_srli_w:
     case CODE_FOR_msa_srli_d:
       gcc_assert (has_target_p && nops == 3);
-      if (!CONST_INT_P (ops[2].value)
-	  || !IN_RANGE (INTVAL (ops[2].value), 0,
-			GET_MODE_UNIT_PRECISION (ops[0].mode) - 1))
-	break;
-      ops[2].mode = ops[0].mode;
-      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
-						INTVAL (ops[2].value));
+      if (CONST_INT_P (ops[2].value))
+	{
+	  rangelo = 0;
+	  rangehi = GET_MODE_UNIT_BITSIZE (ops[0].mode) - 1;
+	  if (IN_RANGE (INTVAL (ops[2].value), rangelo, rangehi))
+	    {
+	      ops[2].mode = ops[0].mode;
+	      ops[2].value = mips_gen_const_int_vector (ops[2].mode,
+							INTVAL (ops[2].value));
+	    }
+	  else
+	    error_opno = 2;
+	}
       break;
 
     case CODE_FOR_msa_insert_b:
@@ -16738,7 +16759,13 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
       imode = GET_MODE_INNER (ops[0].mode);
       ops[1].value = lowpart_subreg (imode, ops[1].value, ops[1].mode);
       ops[1].mode = imode;
-      ops[3].value = GEN_INT (1 << INTVAL (ops[3].value));
+      rangelo = 0;
+      rangehi = GET_MODE_NUNITS (ops[0].mode) - 1;
+      if (CONST_INT_P (ops[3].value)
+	  && IN_RANGE (INTVAL (ops[3].value), rangelo, rangehi))
+	ops[3].value = GEN_INT (1 << INTVAL (ops[3].value));
+      else
+	error_opno = 2;
       break;
 
     case CODE_FOR_msa_insve_b:
@@ -16750,7 +16777,13 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
       gcc_assert (has_target_p && nops == 4);
       std::swap (ops[1], ops[2]);
       std::swap (ops[1], ops[3]);
-      ops[3].value = GEN_INT (1 << INTVAL (ops[3].value));
+      rangelo = 0;
+      rangehi = GET_MODE_NUNITS (ops[0].mode) - 1;
+      if (CONST_INT_P (ops[3].value)
+	  && IN_RANGE (INTVAL (ops[3].value), rangelo, rangehi))
+	ops[3].value = GEN_INT (1 << INTVAL (ops[3].value));
+      else
+	error_opno = 2;
       break;
 
     case CODE_FOR_msa_shf_b:
@@ -16774,7 +16807,13 @@ mips_expand_builtin_insn (enum insn_code icode, unsigned int nops,
       break;
   }
 
-  if (!maybe_expand_insn (icode, nops, ops))
+  if (error_opno != 0)
+    {
+      error ("argument %d to the built-in must be a constant"
+	     " in range %d to %d", error_opno, rangelo, rangehi);
+      return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;
+    }
+  else if (!maybe_expand_insn (icode, nops, ops))
     {
       error ("invalid argument to built-in function");
       return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 7aa429f..c7664e8 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-06  Robert Suchanek  <robert.suchanek@imgtec.com>
+
+	* gcc.target/mips/msa-builtins-err.c: New test.
+
 2016-12-06  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/71537
diff --git a/gcc/testsuite/gcc.target/mips/msa-builtins-err.c b/gcc/testsuite/gcc.target/mips/msa-builtins-err.c
new file mode 100644
index 0000000..041b7f5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/mips/msa-builtins-err.c
@@ -0,0 +1,241 @@
+/* Test builtins for MIPS MSA ASE instructions */
+/* { dg-do compile } */
+/* { dg-options "-mfp64 -mhard-float -mmsa" } */
+
+#include <msa.h>
+
+v16i8 v16i8_x;
+v16u8 v16u8_x;
+v8i16 v8i16_x;
+v8u16 v8u16_x;
+v4i32 v4i32_x;
+v4u32 v4u32_x;
+v2i64 v2i64_x;
+v2u64 v2u64_x;
+
+volatile v16i8 v16i8_r;
+volatile v16u8 v16u8_r;
+volatile v8i16 v8i16_r;
+volatile v8u16 v8u16_r;
+volatile v4i32 v4i32_r;
+volatile v4u32 v4u32_r;
+volatile v2i64 v2i64_r;
+volatile v2u64 v2u64_r;
+
+/* MSA builtins with literal range of 0 to 31.  */
+
+void
+msa_add ()
+{
+ v16i8_r = __builtin_msa_addvi_b (v16i8_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v16i8_r = __builtin_msa_addvi_b (v16i8_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_addvi_h (v8i16_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_addvi_h (v8i16_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_addvi_w (v4i32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_addvi_w (v4i32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_addvi_d (v2i64_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_addvi_d (v2i64_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+}
+
+void
+msa_sub ()
+{
+ v16i8_r = __builtin_msa_subvi_b (v16i8_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v16i8_r = __builtin_msa_subvi_b (v16i8_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_subvi_h (v8i16_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_subvi_h (v8i16_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_subvi_w (v4i32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_subvi_w (v4i32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_subvi_d (v2i64_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_subvi_d (v2i64_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+}
+
+void
+msa_mini_u ()
+{
+ v16u8_r = __builtin_msa_mini_u_b (v16u8_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v16u8_r = __builtin_msa_mini_u_b (v16u8_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8u16_r = __builtin_msa_mini_u_h (v8u16_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8u16_r = __builtin_msa_mini_u_h (v8u16_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4u32_r = __builtin_msa_mini_u_w (v4u32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4u32_r = __builtin_msa_mini_u_w (v4u32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2u64_r = __builtin_msa_mini_u_d (v2u64_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2u64_r = __builtin_msa_mini_u_d (v2u64_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+}
+
+void
+msa_maxi_u ()
+{
+ v16u8_r = __builtin_msa_maxi_u_b (v16u8_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v16u8_r = __builtin_msa_maxi_u_b (v16u8_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8u16_r = __builtin_msa_maxi_u_h (v8u16_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8u16_r = __builtin_msa_maxi_u_h (v8u16_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4u32_r = __builtin_msa_maxi_u_w (v4u32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4u32_r = __builtin_msa_maxi_u_w (v4u32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2u64_r = __builtin_msa_maxi_u_d (v2u64_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2u64_r = __builtin_msa_maxi_u_d (v2u64_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+}
+
+void
+msa_clti_u ()
+{
+ v16i8_r = __builtin_msa_clti_u_b (v16u8_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v16i8_r = __builtin_msa_clti_u_b (v16u8_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_clti_u_h (v8u16_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_clti_u_h (v8u16_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_clti_u_w (v4u32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_clti_u_w (v4u32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_clti_u_d (v2u64_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_clti_u_d (v2u64_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+}
+
+void
+msa_clei_u ()
+{
+ v16i8_r = __builtin_msa_clei_u_b (v16u8_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v16i8_r = __builtin_msa_clei_u_b (v16u8_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_clei_u_h (v8u16_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v8i16_r = __builtin_msa_clei_u_h (v8u16_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_clei_u_w (v4u32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_clei_u_w (v4u32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_clei_u_d (v2u64_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_clei_u_d (v2u64_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+}
+
+/* MSA builtins with literal range of -16 to 15.  */
+
+void
+msa_mini_s ()
+{
+ v16i8_r = __builtin_msa_mini_s_b (v16i8_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v16i8_r = __builtin_msa_mini_s_b (v16i8_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_mini_s_h (v8i16_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_mini_s_h (v8i16_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_mini_s_w (v4i32_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_mini_s_w (v4i32_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_mini_s_d (v2i64_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_mini_s_d (v2i64_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+}
+
+void
+msa_maxi_s ()
+{
+ v16i8_r = __builtin_msa_maxi_s_b (v16i8_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v16i8_r = __builtin_msa_maxi_s_b (v16i8_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_maxi_s_h (v8i16_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_maxi_s_h (v8i16_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_maxi_s_w (v4i32_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_maxi_s_w (v4i32_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_maxi_s_d (v2i64_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_maxi_s_d (v2i64_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+}
+
+void
+msa_ceqi ()
+{
+ v16i8_r = __builtin_msa_ceqi_b (v16i8_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v16i8_r = __builtin_msa_ceqi_b (v16i8_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_ceqi_h (v8i16_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_ceqi_h (v8i16_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_ceqi_w (v4i32_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_ceqi_w (v4i32_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_ceqi_d (v2i64_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_ceqi_d (v2i64_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+}
+
+void
+msa_clti_s ()
+{
+ v16i8_r = __builtin_msa_clti_s_b (v16i8_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v16i8_r = __builtin_msa_clti_s_b (v16i8_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_clti_s_h (v8i16_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_clti_s_h (v8i16_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_clti_s_w (v4i32_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_clti_s_w (v4i32_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_clti_s_d (v2i64_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_clti_s_d (v2i64_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+}
+
+void
+msa_clei_s ()
+{
+ v16i8_r = __builtin_msa_clei_s_b (v16i8_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v16i8_r = __builtin_msa_clei_s_b (v16i8_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_clei_s_h (v8i16_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v8i16_r = __builtin_msa_clei_s_h (v8i16_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_clei_s_w (v4i32_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v4i32_r = __builtin_msa_clei_s_w (v4i32_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_clei_s_d (v2i64_x, -17); /* { dg-error "must be a constant in range -16 to 15" } */
+ v2i64_r = __builtin_msa_clei_s_d (v2i64_x, 16); /* { dg-error "must be a constant in range -16 to 15" } */
+}
+
+/* MSA builtins with literal range of 0 to 7/15/31/63 for
+   byte/halfwords/words/doublewords elements, respectively.  */
+
+void
+msa_slli ()
+{
+ v16i8_r = __builtin_msa_slli_b (v16i8_x, -1); /* { dg-error "must be a constant in range 0 to 7" } */
+ v16i8_r = __builtin_msa_slli_b (v16i8_x, 8); /* { dg-error "must be a constant in range 0 to 7" } */
+ v8i16_r = __builtin_msa_slli_h (v8i16_x, -1); /* { dg-error "must be a constant in range 0 to 15" } */
+ v8i16_r = __builtin_msa_slli_h (v8i16_x, 16); /* { dg-error "must be a constant in range 0 to 15" } */
+ v4i32_r = __builtin_msa_slli_w (v4i32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_slli_w (v4i32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_slli_d (v2i64_x, -1); /* { dg-error "must be a constant in range 0 to 63" } */
+ v2i64_r = __builtin_msa_slli_d (v2i64_x, 64); /* { dg-error "must be a constant in range 0 to 63" } */
+}
+
+void
+msa_srai ()
+{
+ v16i8_r = __builtin_msa_srai_b (v16i8_x, -1); /* { dg-error "must be a constant in range 0 to 7" } */
+ v16i8_r = __builtin_msa_srai_b (v16i8_x, 8); /* { dg-error "must be a constant in range 0 to 7" } */
+ v8i16_r = __builtin_msa_srai_h (v8i16_x, -1); /* { dg-error "must be a constant in range 0 to 15" } */
+ v8i16_r = __builtin_msa_srai_h (v8i16_x, 16); /* { dg-error "must be a constant in range 0 to 15" } */
+ v4i32_r = __builtin_msa_srai_w (v4i32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_srai_w (v4i32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_srai_d (v2i64_x, -1); /* { dg-error "must be a constant in range 0 to 63" } */
+ v2i64_r = __builtin_msa_srai_d (v2i64_x, 64); /* { dg-error "must be a constant in range 0 to 63" } */
+}
+
+void
+msa_srli ()
+{
+ v16i8_r = __builtin_msa_srli_b (v16i8_x, -1); /* { dg-error "must be a constant in range 0 to 7" } */
+ v16i8_r = __builtin_msa_srli_b (v16i8_x, 8); /* { dg-error "must be a constant in range 0 to 7" } */
+ v8i16_r = __builtin_msa_srli_h (v8i16_x, -1); /* { dg-error "must be a constant in range 0 to 15" } */
+ v8i16_r = __builtin_msa_srli_h (v8i16_x, 16); /* { dg-error "must be a constant in range 0 to 15" } */
+ v4i32_r = __builtin_msa_srli_w (v4i32_x, -1); /* { dg-error "must be a constant in range 0 to 31" } */
+ v4i32_r = __builtin_msa_srli_w (v4i32_x, 32); /* { dg-error "must be a constant in range 0 to 31" } */
+ v2i64_r = __builtin_msa_srli_d (v2i64_x, -1); /* { dg-error "must be a constant in range 0 to 63" } */
+ v2i64_r = __builtin_msa_srli_d (v2i64_x, 64); /* { dg-error "must be a constant in range 0 to 63" } */
+}
+
+/* MSA builtins with literal range of 0 to 15/7/3/1 for
+   byte/halfwords/words/doublewords elements, respectively.  */
+
+void
+msa_insert (int a)
+{
+ v16i8_r = __builtin_msa_insert_b (v16i8_x, -1, a); /* { dg-error "must be a constant in range 0 to 15" } */
+ v16i8_r = __builtin_msa_insert_b (v16i8_x, 16, a); /* { dg-error "must be a constant in range 0 to 15" } */
+ v8i16_r = __builtin_msa_insert_h (v8i16_x, -1, a); /* { dg-error "must be a constant in range 0 to 7" } */
+ v8i16_r = __builtin_msa_insert_h (v8i16_x, 8, a); /* { dg-error "must be a constant in range 0 to 7" } */
+ v4i32_r = __builtin_msa_insert_w (v4i32_x, -1, a); /* { dg-error "must be a constant in range 0 to 3" } */
+ v4i32_r = __builtin_msa_insert_w (v4i32_x, 4, a); /* { dg-error "must be a constant in range 0 to 3" } */
+ v2i64_r = __builtin_msa_insert_d (v2i64_x, -1, a); /* { dg-error "must be a constant in range 0 to 1" } */
+ v2i64_r = __builtin_msa_insert_d (v2i64_x, 2, a); /* { dg-error "must be a constant in range 0 to 1" } */
+}
+
+void
+msa_insve ()
+{
+ v16i8_r = __builtin_msa_insve_b (v16i8_x, -1, v16i8_x); /* { dg-error "must be a constant in range 0 to 15" } */
+ v16i8_r = __builtin_msa_insve_b (v16i8_x, 16, v16i8_x); /* { dg-error "must be a constant in range 0 to 15" } */
+ v8i16_r = __builtin_msa_insve_h (v8i16_x, -1, v8i16_x); /* { dg-error "must be a constant in range 0 to 7" } */
+ v8i16_r = __builtin_msa_insve_h (v8i16_x, 8, v8i16_x); /* { dg-error "must be a constant in range 0 to 7" } */
+ v4i32_r = __builtin_msa_insve_w (v4i32_x, -1, v4i32_x); /* { dg-error "must be a constant in range 0 to 3" } */
+ v4i32_r = __builtin_msa_insve_w (v4i32_x, 4, v4i32_x); /* { dg-error "must be a constant in range 0 to 3" } */
+ v2i64_r = __builtin_msa_insve_d (v2i64_x, -1, v2i64_x); /* { dg-error "must be a constant in range 0 to 1" } */
+ v2i64_r = __builtin_msa_insve_d (v2i64_x, 2, v2i64_x); /* { dg-error "must be a constant in range 0 to 1" } */
+}
-- 
cgit v1.1


From 55037a694f63196eb28dc7eb0cab0857fe7ab85c Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Tue, 6 Dec 2016 14:36:00 +0000
Subject: Fix debug mode assertion for std::shared_ptr<void>

	* include/bits/shared_ptr_base.h
	(__shared_ptr_access<T, L, false, true>::operator->()): Fix assertion.

From-SVN: r243303
---
 libstdc++-v3/ChangeLog                      | 5 +++++
 libstdc++-v3/include/bits/shared_ptr_base.h | 5 +++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index b8edb7b..676d32b 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Jonathan Wakely  <jwakely@redhat.com>
+
+	* include/bits/shared_ptr_base.h
+	(__shared_ptr_access<T, L, false, true>::operator->()): Fix assertion.
+
 2016-12-06  Ville Voutilainen  <ville.voutilainen@gmail.com>
 
 	Constrain optional's __constexpr_addressof in its return type
diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h b/libstdc++-v3/include/bits/shared_ptr_base.h
index 2fb70b7..7e02043 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -983,8 +983,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       element_type*
       operator->() const noexcept
       {
-	_GLIBCXX_DEBUG_PEDASSERT(_M_get() != nullptr);
-	return static_cast<const __shared_ptr<_Tp, _Lp>*>(this)->get();
+	auto __ptr = static_cast<const __shared_ptr<_Tp, _Lp>*>(this)->get();
+	_GLIBCXX_DEBUG_PEDASSERT(__ptr != nullptr);
+	return __ptr;
       }
     };
 
-- 
cgit v1.1


From 449a432129c14002befb52eeabc0d9d6d4879a2f Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Tue, 6 Dec 2016 14:36:07 +0000
Subject: Fix pretty-printer for std::variant

	* python/libstdcxx/v6/printers.py (StdVariantPrinter): Update for new
	data member name.
	* testsuite/libstdc++-prettyprinters/cxx17.cc: Remove redundant test.

From-SVN: r243304
---
 libstdc++-v3/ChangeLog                                   | 4 ++++
 libstdc++-v3/python/libstdcxx/v6/printers.py             | 2 +-
 libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx17.cc | 2 --
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 676d32b..5662809 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,9 @@
 2016-12-06  Jonathan Wakely  <jwakely@redhat.com>
 
+	* python/libstdcxx/v6/printers.py (StdVariantPrinter): Update for new
+	data member name.
+	* testsuite/libstdc++-prettyprinters/cxx17.cc: Remove redundant test.
+
 	* include/bits/shared_ptr_base.h
 	(__shared_ptr_access<T, L, false, true>::operator->()): Fix assertion.
 
diff --git a/libstdc++-v3/python/libstdcxx/v6/printers.py b/libstdc++-v3/python/libstdcxx/v6/printers.py
index bad42b4..ff428e8 100644
--- a/libstdc++-v3/python/libstdcxx/v6/printers.py
+++ b/libstdc++-v3/python/libstdcxx/v6/printers.py
@@ -1002,7 +1002,7 @@ class StdVariantPrinter(SingleObjContainerPrinter):
             visualizer = None
         else:
             self.contained_type = alternatives[int(self.index)]
-            addr = val['_M_union']['_M_first']['_M_storage'].address
+            addr = val['_M_u']['_M_first']['_M_storage'].address
             contained_value = addr.cast(self.contained_type.pointer()).dereference()
             visualizer = gdb.default_visualizer(contained_value)
         super (StdVariantPrinter, self).__init__(contained_value, visualizer, 'array')
diff --git a/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx17.cc b/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx17.cc
index 96be8c7..69c16c1 100644
--- a/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx17.cc
+++ b/libstdc++-v3/testsuite/libstdc++-prettyprinters/cxx17.cc
@@ -86,8 +86,6 @@ main()
 // { dg-final { note-test v3 {std::variant<float, int, std::string_view> [index 1] = {3}} } }
   variant<float, int, string_view> v4{ str };
 // { dg-final { note-test v4 {std::variant<float, int, std::string_view> [index 2] = {"string"}} } }
-  variant<string_view> vref{str};
-// { dg-final { note-test vref {std::variant<std::string_view> [index 0] = {"string"}} } }
 
   map<int, string_view> m{ {1, "one"} };
   map<int, string_view>::node_type n0;
-- 
cgit v1.1


From 0eada3216bc0eab65325bdcf0fee3584007830ff Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Tue, 6 Dec 2016 16:56:56 +0000
Subject: [ARM][committed] Sort ARMv8 processors by alphabetic order

	* config/arm/arm-cores.def (cortex-m23, cortex-m33): Move into
	alphabetical order with respect to other ARMv8 processors.
	* config/arm/arm-tables.opt: Regenerate.
	* config/arm/arm-tune.md: Likewise.

From-SVN: r243306
---
 gcc/ChangeLog                 |  7 +++++++
 gcc/config/arm/arm-cores.def  |  4 ++--
 gcc/config/arm/arm-tables.opt | 12 ++++++------
 gcc/config/arm/arm-tune.md    |  6 +++---
 4 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8d405cf..662f145 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
+
+	* config/arm/arm-cores.def (cortex-m23, cortex-m33): Move into
+	alphabetical order with respect to other ARMv8 processors.
+	* config/arm/arm-tables.opt: Regenerate.
+	* config/arm/arm-tune.md: Likewise.
+
 2016-12-06  Robert Suchanek  <robert.suchanek@imgtec.com>
 
 	* config/mips/mips.c (mips_expand_builtin_insn): Check input
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 3f77c71..fd96a41 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -166,14 +166,14 @@ ARM_CORE("cortex-a15.cortex-a7", cortexa15cortexa7, cortexa7,	7A,	ARM_FSET_MAKE_
 ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7,	7A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_THUMB_DIV | FL_ARM_DIV | FL_FOR_ARCH7A), cortex_a12)
 
 /* V8 Architecture Processors */
-ARM_CORE("cortex-m23",	cortexm23, cortexm23,	8M_BASE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8M_BASE), v6m)
 ARM_CORE("cortex-a32",	cortexa32, cortexa53,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a35)
-ARM_CORE("cortex-m33",	cortexm33, cortexm33,	8M_MAIN, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARCH7EM | FL_FOR_ARCH8M_MAIN), v7m)
 ARM_CORE("cortex-a35",	cortexa35, cortexa53,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a35)
 ARM_CORE("cortex-a53",	cortexa53, cortexa53,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a53)
 ARM_CORE("cortex-a57",	cortexa57, cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
 ARM_CORE("cortex-a72",	cortexa72, cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
 ARM_CORE("cortex-a73",	cortexa73, cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a73)
+ARM_CORE("cortex-m23",	cortexm23, cortexm23,	8M_BASE, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8M_BASE), v6m)
+ARM_CORE("cortex-m33",	cortexm33, cortexm33,	8M_MAIN, ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_ARCH7EM | FL_FOR_ARCH8M_MAIN), v7m)
 ARM_CORE("exynos-m1",	exynosm1,  exynosm1,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), exynosm1)
 ARM_CORE("falkor",	falkor,    cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), qdf24xx)
 ARM_CORE("qdf24xx",	qdf24xx,   cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), qdf24xx)
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index b12e458..b5e12dc 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -307,15 +307,9 @@ EnumValue
 Enum(processor_type) String(cortex-a17.cortex-a7) Value( TARGET_CPU_cortexa17cortexa7)
 
 EnumValue
-Enum(processor_type) String(cortex-m23) Value( TARGET_CPU_cortexm23)
-
-EnumValue
 Enum(processor_type) String(cortex-a32) Value( TARGET_CPU_cortexa32)
 
 EnumValue
-Enum(processor_type) String(cortex-m33) Value( TARGET_CPU_cortexm33)
-
-EnumValue
 Enum(processor_type) String(cortex-a35) Value( TARGET_CPU_cortexa35)
 
 EnumValue
@@ -331,6 +325,12 @@ EnumValue
 Enum(processor_type) String(cortex-a73) Value( TARGET_CPU_cortexa73)
 
 EnumValue
+Enum(processor_type) String(cortex-m23) Value( TARGET_CPU_cortexm23)
+
+EnumValue
+Enum(processor_type) String(cortex-m33) Value( TARGET_CPU_cortexm33)
+
+EnumValue
 Enum(processor_type) String(exynos-m1) Value( TARGET_CPU_exynosm1)
 
 EnumValue
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 42a6d7a..4c92927 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -32,9 +32,9 @@
 	cortexr4f,cortexr5,cortexr7,
 	cortexr8,cortexm7,cortexm4,
 	cortexm3,marvell_pj4,cortexa15cortexa7,
-	cortexa17cortexa7,cortexm23,cortexa32,
-	cortexm33,cortexa35,cortexa53,
-	cortexa57,cortexa72,cortexa73,
+	cortexa17cortexa7,cortexa32,cortexa35,
+	cortexa53,cortexa57,cortexa72,
+	cortexa73,cortexm23,cortexm33,
 	exynosm1,falkor,qdf24xx,
 	xgene1,cortexa57cortexa53,cortexa72cortexa53,
 	cortexa73cortexa35,cortexa73cortexa53"
-- 
cgit v1.1


From 34022d2b2e55c34e2a86af04d9bfe5548e71f63e Mon Sep 17 00:00:00 2001
From: Jerry DeLisle <jvdelisle@gcc.gnu.org>
Date: Tue, 6 Dec 2016 17:13:31 +0000
Subject: re PR fortran/78659 ([F03] Spurious "requires DTIO" reported against
 namelist statement)

2016-12-06  Jerry DeLisle  <jvdelisle@gcc.gnu.org>

	PR fortran/78659
	* resolve.c (resolve_fl_namelist): Remove unneeded error.
	PR fortran/78659
	* gfortran.dg/alloc_comp_constraint_1.f90: Update test.
	* gfortran.dg/alloc_comp_constraint_7.f90: New test.
	* gfortran.dg/namelist_34.f90: Update test.
	* gfortran.dg/namelist_63.f90: Update test.

From-SVN: r243308
---
 gcc/fortran/ChangeLog                                 |  5 +++++
 gcc/fortran/resolve.c                                 | 10 +---------
 gcc/testsuite/ChangeLog                               |  8 ++++++++
 gcc/testsuite/gfortran.dg/alloc_comp_constraint_1.f90 |  7 ++++---
 gcc/testsuite/gfortran.dg/alloc_comp_constraint_7.f90 | 19 +++++++++++++++++++
 gcc/testsuite/gfortran.dg/namelist_34.f90             | 10 +++++-----
 gcc/testsuite/gfortran.dg/namelist_63.f90             |  7 ++++---
 7 files changed, 46 insertions(+), 20 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/alloc_comp_constraint_7.f90

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index baef2d3..14bf4f9 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR fortran/78659
+	* resolve.c (resolve_fl_namelist): Remove unneeded error.
+
 2016-12-06  Andre Vehreschild  <vehre@gcc.gnu.org>
 
 	PR fortran/78226
diff --git a/gcc/fortran/resolve.c b/gcc/fortran/resolve.c
index 7bc9f5f..e4ea10f 100644
--- a/gcc/fortran/resolve.c
+++ b/gcc/fortran/resolve.c
@@ -13900,15 +13900,7 @@ resolve_fl_namelist (gfc_symbol *sym)
 			       "or POINTER components", nl->sym->name,
 			       sym->name, &sym->declared_at))
 	    return false;
-
-	  if (!dtio)
-	    {
-	      gfc_error ("NAMELIST object %qs in namelist %qs at %L has "
-			"ALLOCATABLE or POINTER components and thus requires "
-			"a defined input/output procedure", nl->sym->name,
-			sym->name, &sym->declared_at);
-	      return false;
-	    }
+	  return true;
 	}
     }
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c7664e8..b293dcf 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-06  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR fortran/78659
+	* gfortran.dg/alloc_comp_constraint_1.f90: Update test.
+	* gfortran.dg/alloc_comp_constraint_7.f90: New test.
+	* gfortran.dg/namelist_34.f90: Update test.
+	* gfortran.dg/namelist_63.f90: Update test.
+
 2016-12-06  Robert Suchanek  <robert.suchanek@imgtec.com>
 
 	* gcc.target/mips/msa-builtins-err.c: New test.
diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_constraint_1.f90 b/gcc/testsuite/gfortran.dg/alloc_comp_constraint_1.f90
index eb1b105..e1715256 100644
--- a/gcc/testsuite/gfortran.dg/alloc_comp_constraint_1.f90
+++ b/gcc/testsuite/gfortran.dg/alloc_comp_constraint_1.f90
@@ -1,5 +1,6 @@
 ! { dg-do compile }
-! Check that we don't allow IO or NAMELISTs with types with allocatable
+! { dg-options -std=f2003 }
+! Check that we don't allow IO of NAMELISTs with types with allocatable
 ! components (PR 20541)
 program main
 
@@ -13,8 +14,8 @@ program main
 
     type(foo) :: a
     type(bar) :: b
-    namelist /blah/ a ! { dg-error "has ALLOCATABLE or POINTER components and thus requires a defined input/output" }
-
+    namelist /blah/ a  ! This is allowed under F2003, but not F95
+    ! The following require User Defined Derived Type I/O procedures.
     write (*, *) a  ! { dg-error "cannot have ALLOCATABLE components" }
 
     read (*, *) b  ! { dg-error "cannot have ALLOCATABLE components" }
diff --git a/gcc/testsuite/gfortran.dg/alloc_comp_constraint_7.f90 b/gcc/testsuite/gfortran.dg/alloc_comp_constraint_7.f90
new file mode 100644
index 0000000..35b8e1f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/alloc_comp_constraint_7.f90
@@ -0,0 +1,19 @@
+! { dg-do compile }
+! { dg-options -std=f95 }
+! Check that we don't allow types with allocatable
+program main
+
+    type :: foo
+        integer :: k
+        integer, allocatable :: x(:) ! { dg-error "Fortran 2003: ALLOCATABLE" }
+    end type foo
+
+    type :: bar
+        type(foo) :: x
+    end type bar
+
+    type(foo) :: a
+    type(bar) :: b
+    namelist /blah/ a
+
+end program main
diff --git a/gcc/testsuite/gfortran.dg/namelist_34.f90 b/gcc/testsuite/gfortran.dg/namelist_34.f90
index 9432771..be8f49f 100644
--- a/gcc/testsuite/gfortran.dg/namelist_34.f90
+++ b/gcc/testsuite/gfortran.dg/namelist_34.f90
@@ -1,7 +1,7 @@
 ! { dg-do compile }
-!
+! { dg-options -std=f95 }
 ! PR fortran/32905 - accepts types with ultimate POINTER components
-!
+! updated for PR78659
 MODULE types
   type :: tp3
     real :: x
@@ -22,7 +22,7 @@ MODULE nml
 USE types
    type(tp1) :: t1
    type(tp3) :: t3
-
-   namelist /a/ t1    ! { dg-error "has ALLOCATABLE or POINTER components and thus requires a defined input/output" }
-   namelist /b/ t3    ! { dg-error "has ALLOCATABLE or POINTER components and thus requires a defined input/output" }
+! The following are allowed under f2003.
+   namelist /a/ t1    ! { dg-error "with ALLOCATABLE or POINTER components" }
+   namelist /b/ t3    ! { dg-error "with ALLOCATABLE or POINTER components" }
 END MODULE
diff --git a/gcc/testsuite/gfortran.dg/namelist_63.f90 b/gcc/testsuite/gfortran.dg/namelist_63.f90
index 0210174..de27b82 100644
--- a/gcc/testsuite/gfortran.dg/namelist_63.f90
+++ b/gcc/testsuite/gfortran.dg/namelist_63.f90
@@ -1,6 +1,6 @@
 ! { dg-do compile }
-!
-! PR fortran/45530
+! { dg-options -std=f95 }
+! PR fortran/45530, updated for PR78659
 !
 ! Contributed by david.sagan@gmail.com
 !
@@ -24,5 +24,6 @@ type region_struct
 end type
 
 type (c_struct) curve(10)
-namelist / params / curve ! { dg-error "ALLOCATABLE or POINTER components and thus requires a defined input/output" }
+! The following is allowed with f2003.
+namelist / params / curve ! { dg-error "ALLOCATABLE or POINTER components" }
 end program
-- 
cgit v1.1


From 6649ad7efdd583d84099beb5ee2b03a0ed28b9ee Mon Sep 17 00:00:00 2001
From: Felipe Magno de Almeida <felipe@expertisesolutions.com.br>
Date: Tue, 6 Dec 2016 17:58:05 +0000
Subject: Enable libstdc++ compilation on AVR targets

Enable libstdc++ compilation in AVR targets with AVR-Libc. Most
floating point math functions are already defined in AVR-Libc, so
defines are in place to avoid multiple definition of these functions.

2016-12-06  Felipe Magno de Almeida  <felipe@expertisesolutions.com.br>

	* crossconfig.m4: Add avr target for cross-compilation.
	* configure: Regenerate.

From-SVN: r243309
---
 libstdc++-v3/ChangeLog      |  5 +++++
 libstdc++-v3/configure      | 49 +++++++++++++++++++++++++++++++++++++++++++++
 libstdc++-v3/crossconfig.m4 | 26 ++++++++++++++++++++++++
 3 files changed, 80 insertions(+)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 5662809..face139 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Felipe Magno de Almeida  <felipe@expertisesolutions.com.br>
+
+	* crossconfig.m4: Add avr target for cross-compilation.
+	* configure: Regenerate.
+
 2016-12-06  Jonathan Wakely  <jwakely@redhat.com>
 
 	* python/libstdcxx/v6/printers.py (StdVariantPrinter): Update for new
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 1f72e3f..cbd77f0 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -28902,6 +28902,55 @@ case "${host}" in
     # This is a freestanding configuration; there is nothing to do here.
     ;;
 
+  avr*-*-*)
+    $as_echo "#define HAVE_ACOSF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_ASINF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_ATAN2F 1" >>confdefs.h
+
+    $as_echo "#define HAVE_ATANF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_CEILF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_COSF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_COSHF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_EXPF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_FABSF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_FLOORF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_FMODF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_FREXPF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_SQRTF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_HYPOTF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_LDEXPF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_LOG10F 1" >>confdefs.h
+
+    $as_echo "#define HAVE_LOGF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_MODFF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_POWF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_SINF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_SINHF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_TANF 1" >>confdefs.h
+
+    $as_echo "#define HAVE_TANHF 1" >>confdefs.h
+
+    ;;
+
   mips*-sde-elf*)
     # These definitions are for the SDE C library rather than newlib.
     SECTION_FLAGS='-ffunction-sections -fdata-sections'
diff --git a/libstdc++-v3/crossconfig.m4 b/libstdc++-v3/crossconfig.m4
index 4eaf208..8cc788c 100644
--- a/libstdc++-v3/crossconfig.m4
+++ b/libstdc++-v3/crossconfig.m4
@@ -9,6 +9,32 @@ case "${host}" in
     # This is a freestanding configuration; there is nothing to do here.
     ;;
 
+  avr*-*-*)
+    AC_DEFINE(HAVE_ACOSF)
+    AC_DEFINE(HAVE_ASINF)
+    AC_DEFINE(HAVE_ATAN2F)
+    AC_DEFINE(HAVE_ATANF)
+    AC_DEFINE(HAVE_CEILF)
+    AC_DEFINE(HAVE_COSF)
+    AC_DEFINE(HAVE_COSHF)
+    AC_DEFINE(HAVE_EXPF)
+    AC_DEFINE(HAVE_FABSF)
+    AC_DEFINE(HAVE_FLOORF)
+    AC_DEFINE(HAVE_FMODF)
+    AC_DEFINE(HAVE_FREXPF)
+    AC_DEFINE(HAVE_SQRTF)
+    AC_DEFINE(HAVE_HYPOTF)
+    AC_DEFINE(HAVE_LDEXPF)
+    AC_DEFINE(HAVE_LOG10F)
+    AC_DEFINE(HAVE_LOGF)
+    AC_DEFINE(HAVE_MODFF)
+    AC_DEFINE(HAVE_POWF)
+    AC_DEFINE(HAVE_SINF)
+    AC_DEFINE(HAVE_SINHF)
+    AC_DEFINE(HAVE_TANF)
+    AC_DEFINE(HAVE_TANHF)
+    ;;
+
   mips*-sde-elf*)
     # These definitions are for the SDE C library rather than newlib.
     SECTION_FLAGS='-ffunction-sections -fdata-sections'
-- 
cgit v1.1


From f68963c09232f56ee793ab12b902f16c45af0214 Mon Sep 17 00:00:00 2001
From: Felipe Magno de Almeida <felipe@expertisesolutions.com.br>
Date: Tue, 6 Dec 2016 17:58:10 +0000
Subject: Add #ifdef case for 16 bits in cow-stdexcept.cc

Added #ifdef case for when void* is 16 bits so it compiles in AVR
target.

2016-12-06  Felipe Magno de Almeida  <felipe@expertisesolutions.com.br>

	* src/c++11/cow-stdexcept.cc: Add special case for 16 bit pointers.

From-SVN: r243310
---
 libstdc++-v3/ChangeLog                  |  2 ++
 libstdc++-v3/src/c++11/cow-stdexcept.cc | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index face139..138a020 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,7 @@
 2016-12-06  Felipe Magno de Almeida  <felipe@expertisesolutions.com.br>
 
+	* src/c++11/cow-stdexcept.cc: Add special case for 16 bit pointers.
+
 	* crossconfig.m4: Add avr target for cross-compilation.
 	* configure: Regenerate.
 
diff --git a/libstdc++-v3/src/c++11/cow-stdexcept.cc b/libstdc++-v3/src/c++11/cow-stdexcept.cc
index 31a89df..641b372 100644
--- a/libstdc++-v3/src/c++11/cow-stdexcept.cc
+++ b/libstdc++-v3/src/c++11/cow-stdexcept.cc
@@ -208,6 +208,8 @@ extern void* _ZGTtnaX (size_t sz) __attribute__((weak));
 extern void _ZGTtdlPv (void* ptr) __attribute__((weak));
 extern uint8_t _ITM_RU1(const uint8_t *p)
   ITM_REGPARM __attribute__((weak));
+extern uint16_t _ITM_RU2(const uint16_t *p)
+  ITM_REGPARM __attribute__((weak));
 extern uint32_t _ITM_RU4(const uint32_t *p)
   ITM_REGPARM __attribute__((weak));
 extern uint64_t _ITM_RU8(const uint64_t *p)
@@ -272,12 +274,15 @@ _txnal_cow_string_C1_for_exceptions(void* that, const char* s,
 static void* txnal_read_ptr(void* const * ptr)
 {
   static_assert(sizeof(uint64_t) == sizeof(void*)
-		|| sizeof(uint32_t) == sizeof(void*),
-		"Pointers must be 32 bits or 64 bits wide");
+		|| sizeof(uint32_t) == sizeof(void*)
+		|| sizeof(uint16_t) == sizeof(void*),
+		"Pointers must be 16 bits, 32 bits or 64 bits wide");
 #if __UINTPTR_MAX__ == __UINT64_MAX__
   return (void*)_ITM_RU8((const uint64_t*)ptr);
-#else
+#elif __UINTPTR_MAX__ == __UINT32_MAX__
   return (void*)_ITM_RU4((const uint32_t*)ptr);
+#else
+  return (void*)_ITM_RU2((const uint16_t*)ptr);
 #endif
 }
 
-- 
cgit v1.1


From cd3fe55a185181aeb1a6b626cfbaa6e14839b7e9 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <uros@gcc.gnu.org>
Date: Tue, 6 Dec 2016 20:56:03 +0100
Subject: predicates.md (general_gr_operand): New predicate.

	* config/i386/predicates.md (general_gr_operand): New predicate.
	* config/i386/i386.md (TImode and DImode push_operand splitter):
	Use general_gr_operand.  Macroize using DWI mode macro.
	(TImode and DImode nonimmediate_operand splitter): Use
	nonimmediate_gr_operand and general_gr_operand.  Macroize using
	DWI mode macro.
	(TF/XF/DFmode push_operand splitter): Use general_gr_operand.
	(TFmode nonimmediate_operand splitter): Use nonimmediate_gr_operand
	and general_gr_operand.
	(XFmode nonimmediate_operand splitter): Ditto.
	(DFmode nonimmediate_operand splitter): Ditto.
	* config/i386/mmx.md (MMXMODE nonimmediate_operand splitter): Ditto.

From-SVN: r243315
---
 gcc/ChangeLog                 | 18 +++++++++++--
 gcc/config/i386/i386.md       | 61 ++++++++++++-------------------------------
 gcc/config/i386/mmx.md        |  8 +++---
 gcc/config/i386/predicates.md |  6 +++++
 4 files changed, 42 insertions(+), 51 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 662f145..49dc646 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2016-12-06  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/i386/predicates.md (general_gr_operand): New predicate.
+	* config/i386/i386.md (TImode and DImode push_operand splitter):
+	Use general_gr_operand.  Macroize using DWI mode macro.
+	(TImode and DImode nonimmediate_operand splitter): Use
+	nonimmediate_gr_operand and general_gr_operand.  Macroize using
+	DWI mode macro.
+	(TF/XF/DFmode push_operand splitter): Use general_gr_operand.
+	(TFmode nonimmediate_operand splitter): Use nonimmediate_gr_operand
+	and general_gr_operand.
+	(XFmode nonimmediate_operand splitter): Ditto.
+	(DFmode nonimmediate_operand splitter): Ditto.
+	* config/i386/mmx.md (MMXMODE nonimmediate_operand splitter): Ditto.
+
 2016-12-06  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
 
 	* config/arm/arm-cores.def (cortex-m23, cortex-m33): Move into
@@ -138,8 +153,7 @@
 	* diagnostic.c (diagnostic_check_max_errors): New, broken out of ...
 	(diagnostic_action_after_output): ... here.
 	(diagnostic_report_diagnostic): Call it for non-notes.
-	* diagnostic.h (struct diagnostic_context): Make max_errors signed
-	int.
+	* diagnostic.h (struct diagnostic_context): Make max_errors signed int.
 	(diagnostic_check_max_errors): Declare.
 
 2016-12-05  Cupertino Miranda  <cmiranda@synopsys.com>
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 773f29b..f2248a5d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1749,10 +1749,9 @@
    (set_attr "mode" "<MODE>")])
 
 (define_split
-  [(set (match_operand:TI 0 "push_operand")
-        (match_operand:TI 1 "general_operand"))]
-  "TARGET_64BIT && reload_completed
-   && !SSE_REG_P (operands[1])"
+  [(set (match_operand:DWI 0 "push_operand")
+        (match_operand:DWI 1 "general_gr_operand"))]
+  "reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
@@ -1814,14 +1813,6 @@
 						   GEN_INT (4)));
 })
 
-(define_split
-  [(set (match_operand:DI 0 "push_operand")
-        (match_operand:DI 1 "general_operand"))]
-  "!TARGET_64BIT && reload_completed
-   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*pushsi2"
   [(set (match_operand:SI 0 "push_operand" "=<")
 	(match_operand:SI 1 "general_no_elim_operand" "ri*m"))]
@@ -2150,14 +2141,6 @@
 	       ]
 	       (const_string "TI")))])
 
-(define_split
-  [(set (match_operand:TI 0 "nonimmediate_operand")
-	(match_operand:TI 1 "general_operand"))]
-  "reload_completed
-   && !SSE_REG_P (operands[0]) && !SSE_REG_P (operands[1])"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
     "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,m,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r,*m")
@@ -2333,15 +2316,9 @@
 	   (const_string "*")))])
 
 (define_split
-  [(set (match_operand:DI 0 "nonimmediate_operand")
-        (match_operand:DI 1 "general_operand"))]
-  "!TARGET_64BIT && reload_completed
-   && !(MMX_REG_P (operands[0])
-	|| SSE_REG_P (operands[0])
-	|| MASK_REG_P (operands[0]))
-   && !(MMX_REG_P (operands[1])
-	|| SSE_REG_P (operands[1])
-	|| MASK_REG_P (operands[1]))"
+  [(set (match_operand:DWI 0 "nonimmediate_gr_operand")
+        (match_operand:DWI 1 "general_gr_operand"))]
+  "reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
@@ -3098,12 +3075,11 @@
 
 (define_split
   [(set (match_operand 0 "push_operand")
-	(match_operand 1 "general_operand"))]
+	(match_operand 1 "general_gr_operand"))]
   "reload_completed
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == DFmode)
-   && !ANY_FP_REG_P (operands[1])"
+       || GET_MODE (operands[0]) == DFmode)"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
@@ -3197,10 +3173,9 @@
 	       (const_string "TI")))])
 
 (define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand")
-        (match_operand:TF 1 "general_operand"))]
-  "reload_completed
-   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(set (match_operand:TF 0 "nonimmediate_gr_operand")
+        (match_operand:TF 1 "general_gr_operand"))]
+  "reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
@@ -3271,10 +3246,9 @@
 	   (const_string "*")))])
    
 (define_split
-  [(set (match_operand:XF 0 "nonimmediate_operand")
-        (match_operand:XF 1 "general_operand"))]
-  "reload_completed
-   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(set (match_operand:XF 0 "nonimmediate_gr_operand")
+        (match_operand:XF 1 "general_gr_operand"))]
+  "reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
@@ -3474,10 +3448,9 @@
 	   (const_string "*")))])
 
 (define_split
-  [(set (match_operand:DF 0 "nonimmediate_operand")
-        (match_operand:DF 1 "general_operand"))]
-  "!TARGET_64BIT && reload_completed
-   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(set (match_operand:DF 0 "nonimmediate_gr_operand")
+        (match_operand:DF 1 "general_gr_operand"))]
+  "!TARGET_64BIT && reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 9992233..37e6a28 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -205,11 +205,9 @@
 	   (const_string "DI")))])
 
 (define_split
-  [(set (match_operand:MMXMODE 0 "nonimmediate_operand")
-        (match_operand:MMXMODE 1 "general_operand"))]
-  "!TARGET_64BIT && reload_completed
-   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
-   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
+        (match_operand:MMXMODE 1 "general_gr_operand"))]
+  "!TARGET_64BIT && reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 5956690..4c45df6 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -38,6 +38,12 @@
     (match_test "GENERAL_REGNO_P (REGNO (op))")
     (match_operand 0 "nonimmediate_operand")))
 
+;; True if the operand is a general operand with GENERAL class register.
+(define_predicate "general_gr_operand"
+  (if_then_else (match_code "reg")
+    (match_test "GENERAL_REGNO_P (REGNO (op))")
+    (match_operand 0 "general_operand")))
+
 ;; True if the operand is an MMX register.
 (define_predicate "mmx_reg_operand"
   (and (match_code "reg")
-- 
cgit v1.1


From 9a38b8b940299eeceb51652bc070834809bb126e Mon Sep 17 00:00:00 2001
From: Vladimir Makarov <vmakarov@redhat.com>
Date: Tue, 6 Dec 2016 20:02:03 +0000
Subject: lra-lives.c (process_bb_lives): Update biggest mode for implicitly
 used hard reg.

2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>

	target/77761
	* lra-lives.c (process_bb_lives): Update biggest mode for
	implicitly used hard reg.

2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>

	target/77761
	* testsuite/gcc.target/i386/pr77761.c: New.

From-SVN: r243316
---
 gcc/ChangeLog                           |  6 ++++
 gcc/lra-lives.c                         | 21 ++++++++++---
 gcc/testsuite/ChangeLog                 |  5 +++
 gcc/testsuite/gcc.target/i386/pr77761.c | 55 +++++++++++++++++++++++++++++++++
 4 files changed, 83 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr77761.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 49dc646..b57a189 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>
+
+	target/77761
+	* lra-lives.c (process_bb_lives): Update biggest mode for
+	implicitly used hard reg.
+
 2016-12-06  Uros Bizjak  <ubizjak@gmail.com>
 
 	* config/i386/predicates.md (general_gr_operand): New predicate.
diff --git a/gcc/lra-lives.c b/gcc/lra-lives.c
index 5573ff8..3ffec90 100644
--- a/gcc/lra-lives.c
+++ b/gcc/lra-lives.c
@@ -702,11 +702,24 @@ process_bb_lives (basic_block bb, int &curr_point, bool dead_insn_p)
       /* Update max ref width and hard reg usage.  */
       for (reg = curr_id->regs; reg != NULL; reg = reg->next)
 	{
+	  int i, regno = reg->regno;
+	  
 	  if (GET_MODE_SIZE (reg->biggest_mode)
-	      > GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode))
-	    lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode;
-	  if (reg->regno < FIRST_PSEUDO_REGISTER)
-	    lra_hard_reg_usage[reg->regno] += freq;
+	      > GET_MODE_SIZE (lra_reg_info[regno].biggest_mode))
+	    lra_reg_info[regno].biggest_mode = reg->biggest_mode;
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    {
+	      lra_hard_reg_usage[regno] += freq;
+	      /* A hard register explicitly can be used in small mode,
+		 but implicitly it can be used in natural mode as a
+		 part of multi-register group.  Process this case
+		 here.  */
+	      for (i = 1; i < hard_regno_nregs[regno][reg->biggest_mode]; i++)
+		if (GET_MODE_SIZE (GET_MODE (regno_reg_rtx[regno + i]))
+		    > GET_MODE_SIZE (lra_reg_info[regno + i].biggest_mode))
+		  lra_reg_info[regno + i].biggest_mode
+		    = GET_MODE (regno_reg_rtx[regno + i]);
+	    }
 	}
 
       call_p = CALL_P (curr_insn);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b293dcf..95167b3 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>
+
+	target/77761
+	* testsuite/gcc.target/i386/pr77761.c: New.
+
 2016-12-06  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
 
 	PR fortran/78659
diff --git a/gcc/testsuite/gcc.target/i386/pr77761.c b/gcc/testsuite/gcc.target/i386/pr77761.c
new file mode 100644
index 0000000..a39b3af
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr77761.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -fno-guess-branch-probability -fschedule-insns -fno-tree-ter -mavx512f --param=max-pending-list-length=512" } */
+/* { dg-require-effective-target avx512f } */
+
+#include "avx512f-check.h"
+
+typedef unsigned char u8;
+typedef unsigned char v64u8 __attribute__ ((vector_size (64)));
+typedef unsigned short u16;
+typedef unsigned short v64u16 __attribute__ ((vector_size (64)));
+typedef unsigned int u32;
+typedef unsigned int v64u32 __attribute__ ((vector_size (64)));
+typedef unsigned long long u64;
+typedef unsigned long long v64u64 __attribute__ ((vector_size (64)));
+typedef unsigned __int128 u128;
+typedef unsigned __int128 v64u128 __attribute__ ((vector_size (64)));
+
+v64u128 __attribute__ ((noinline, noclone))
+foo(u8 x1, u16 x2, u32 x3, u64 x4, v64u8 x5, v64u16 x6, v64u32 x7, v64u64 x8, v64u128 x9)
+{
+  u8 *p = &x1;
+  x9[0] -= *p;
+  x5 %= (v64u8){ 1, -x4, 0, x3, x5[9], x7[1], 4, x6[1], 13 << 4} | 1;
+  x5[1] = x5[0];
+  x8 %= (v64u64){1, x1} | 1;
+  x9 /= x9 | 1;
+  x5 -= (v64u8){0, 0, 3, 0, 0, 0, 0, x4, x9[0], 0, 1};
+  return x1 + x2 + x3 + x4 + (v64u128) x5 + (v64u128) x6 + (v64u128) x7 +
+    (v64u128) x8 + x9;
+}
+
+int
+do_main ()
+{
+  v64u128 x = foo(1, 0, 0, 0, (v64u8){}, (v64u16){}, (v64u32){}, (v64u64){}, (v64u128){});
+
+  if ((u64)(x[0] >> 64) != 0x0000000000ff00ff)
+    __builtin_abort();
+  if ((u64)(x[0] >>  0) != 0x0000000000fd0002)
+    __builtin_abort();
+
+  if (x[1] != 1)
+    __builtin_abort();
+  if (x[2] != 1)
+    __builtin_abort();
+  if (x[3] != 1)
+    __builtin_abort();
+  return 0;
+}
+
+static void
+avx512f_test (void)
+{
+  do_main ();
+}
-- 
cgit v1.1


From 00439aef2dbc4f95ba6e4926d5f27d0bb28edcec Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Tue, 6 Dec 2016 21:03:33 +0000
Subject: print_rtx: implement support for reuse IDs (v2)

gcc/ChangeLog:
	* config/i386/i386.c: Include print-rtl.h.
	(selftest::ix86_test_dumping_memory_blockage): New function.
	(selftest::ix86_run_selftests): Call it.
	* print-rtl-function.c (print_rtx_function): Create an
	rtx_reuse_manager and use it.
	* print-rtl.c: Include "rtl-iter.h".
	(rtx_writer::rtx_writer): Add reuse_manager param.
	(rtx_reuse_manager::rtx_reuse_manager): New ctor.
	(uses_rtx_reuse_p): New function.
	(rtx_reuse_manager::preprocess): New function.
	(rtx_reuse_manager::has_reuse_id): New function.
	(rtx_reuse_manager::seen_def_p): New function.
	(rtx_reuse_manager::set_seen_def): New function.
	(rtx_writer::print_rtx): If "in_rtx" has a reuse ID, print it as a
	prefix the first time in_rtx is seen, and print reuse_rtx
	subsequently.
	(print_inline_rtx): Supply NULL for new reuse_manager param.
	(debug_rtx): Likewise.
	(print_rtl): Likewise.
	(print_rtl_single): Likewise.
	(rtx_writer::print_rtl_single_with_indent): Likewise.
	* print-rtl.h: Include bitmap.h when building for host.
	(rtx_writer::rtx_writer): Add reuse_manager param.
	(rtx_writer::m_rtx_reuse_manager): New field.
	(class rtx_reuse_manager): New class.
	* rtl-tests.c (selftest::assert_rtl_dump_eq): Add reuse_manager
	param and use it when constructing rtx_writer.
	(selftest::test_dumping_rtx_reuse): New function.
	(selftest::rtl_tests_c_tests): Call it.
	* selftest-rtl.h (class rtx_reuse_manager): New forward decl.
	(selftest::assert_rtl_dump_eq): Add reuse_manager param.
	(ASSERT_RTL_DUMP_EQ): Supply NULL for reuse_manager param.
	(ASSERT_RTL_DUMP_EQ_WITH_REUSE): New macro.

From-SVN: r243317
---
 gcc/ChangeLog            |  36 ++++++++++++
 gcc/config/i386/i386.c   |  24 ++++++++
 gcc/print-rtl-function.c |   7 ++-
 gcc/print-rtl.c          | 139 +++++++++++++++++++++++++++++++++++++++++++----
 gcc/print-rtl.h          |  81 ++++++++++++++++++++++++++-
 gcc/rtl-tests.c          |  53 +++++++++++++++++-
 gcc/selftest-rtl.h       |  13 ++++-
 7 files changed, 336 insertions(+), 17 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b57a189..85916c1 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,39 @@
+2016-12-06  David Malcolm  <dmalcolm@redhat.com>
+
+	* config/i386/i386.c: Include print-rtl.h.
+	(selftest::ix86_test_dumping_memory_blockage): New function.
+	(selftest::ix86_run_selftests): Call it.
+	* print-rtl-function.c (print_rtx_function): Create an
+	rtx_reuse_manager and use it.
+	* print-rtl.c: Include "rtl-iter.h".
+	(rtx_writer::rtx_writer): Add reuse_manager param.
+	(rtx_reuse_manager::rtx_reuse_manager): New ctor.
+	(uses_rtx_reuse_p): New function.
+	(rtx_reuse_manager::preprocess): New function.
+	(rtx_reuse_manager::has_reuse_id): New function.
+	(rtx_reuse_manager::seen_def_p): New function.
+	(rtx_reuse_manager::set_seen_def): New function.
+	(rtx_writer::print_rtx): If "in_rtx" has a reuse ID, print it as a
+	prefix the first time in_rtx is seen, and print reuse_rtx
+	subsequently.
+	(print_inline_rtx): Supply NULL for new reuse_manager param.
+	(debug_rtx): Likewise.
+	(print_rtl): Likewise.
+	(print_rtl_single): Likewise.
+	(rtx_writer::print_rtl_single_with_indent): Likewise.
+	* print-rtl.h: Include bitmap.h when building for host.
+	(rtx_writer::rtx_writer): Add reuse_manager param.
+	(rtx_writer::m_rtx_reuse_manager): New field.
+	(class rtx_reuse_manager): New class.
+	* rtl-tests.c (selftest::assert_rtl_dump_eq): Add reuse_manager
+	param and use it when constructing rtx_writer.
+	(selftest::test_dumping_rtx_reuse): New function.
+	(selftest::rtl_tests_c_tests): Call it.
+	* selftest-rtl.h (class rtx_reuse_manager): New forward decl.
+	(selftest::assert_rtl_dump_eq): Add reuse_manager param.
+	(ASSERT_RTL_DUMP_EQ): Supply NULL for reuse_manager param.
+	(ASSERT_RTL_DUMP_EQ_WITH_REUSE): New macro.
+
 2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>
 
 	target/77761
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 003439f..2e6be02 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -82,6 +82,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssanames.h"
 #include "selftest.h"
 #include "selftest-rtl.h"
+#include "print-rtl.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -51175,12 +51176,35 @@ ix86_test_dumping_hard_regs ()
   ASSERT_RTL_DUMP_EQ ("(reg:SI dx)", gen_raw_REG (SImode, 1));
 }
 
+/* Test dumping an insn with repeated references to the same SCRATCH,
+   to verify the rtx_reuse code.  */
+
+static void
+ix86_test_dumping_memory_blockage ()
+{
+  set_new_first_and_last_insn (NULL, NULL);
+
+  rtx pat = gen_memory_blockage ();
+  rtx_reuse_manager r;
+  r.preprocess (pat);
+
+  /* Verify that the repeated references to the SCRATCH show use
+     reuse IDS.  The first should be prefixed with a reuse ID,
+     and the second should be dumped as a "reuse_rtx" of that ID.  */
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE
+    ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
+     "        (unspec:BLK [\n"
+     "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
+     "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
+}
+
 /* Run all target-specific selftests.  */
 
 static void
 ix86_run_selftests (void)
 {
   ix86_test_dumping_hard_regs ();
+  ix86_test_dumping_memory_blockage ();
 }
 
 } // namespace selftest
diff --git a/gcc/print-rtl-function.c b/gcc/print-rtl-function.c
index 8842226..dea84fe 100644
--- a/gcc/print-rtl-function.c
+++ b/gcc/print-rtl-function.c
@@ -221,7 +221,12 @@ print_param (FILE *outfile, rtx_writer &w, tree arg)
 DEBUG_FUNCTION void
 print_rtx_function (FILE *outfile, function *fn, bool compact)
 {
-  rtx_writer w (outfile, 0, false, compact);
+  rtx_reuse_manager r;
+  rtx_writer w (outfile, 0, false, compact, &r);
+
+  /* Support "reuse_rtx" in the dump.  */
+  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    r.preprocess (insn);
 
   tree fdecl = fn->decl;
 
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index e7368c7..3bbd395 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -51,6 +51,7 @@ along with GCC; see the file COPYING3.  If not see
 #endif
 
 #include "print-rtl.h"
+#include "rtl-iter.h"
 
 /* String printed at beginning of each RTL when it is dumped.
    This string is set to ASM_COMMENT_START when the RTL is dumped in
@@ -74,13 +75,103 @@ int flag_dump_unnumbered_links = 0;
 
 /* Constructor for rtx_writer.  */
 
-rtx_writer::rtx_writer (FILE *outf, int ind, bool simple, bool compact)
+rtx_writer::rtx_writer (FILE *outf, int ind, bool simple, bool compact,
+			rtx_reuse_manager *reuse_manager)
 : m_outfile (outf), m_sawclose (0), m_indent (ind),
-  m_in_call_function_usage (false), m_simple (simple), m_compact (compact)
+  m_in_call_function_usage (false), m_simple (simple), m_compact (compact),
+  m_rtx_reuse_manager (reuse_manager)
 {
 }
 
 #ifndef GENERATOR_FILE
+
+/* rtx_reuse_manager's ctor.  */
+
+rtx_reuse_manager::rtx_reuse_manager ()
+: m_next_id (0)
+{
+  bitmap_initialize (&m_defs_seen, NULL);
+}
+
+/* Determine if X is of a kind suitable for dumping via reuse_rtx.  */
+
+static bool
+uses_rtx_reuse_p (const_rtx x)
+{
+  if (x == NULL)
+    return false;
+
+  switch (GET_CODE (x))
+    {
+    case DEBUG_EXPR:
+    case VALUE:
+    case SCRATCH:
+      return true;
+
+    /* We don't use reuse_rtx for consts.  */
+    CASE_CONST_UNIQUE:
+    default:
+      return false;
+    }
+}
+
+/* Traverse X and its descendents, determining if we see any rtx more than
+   once.  Any rtx suitable for "reuse_rtx" that is seen more than once is
+   assigned an ID.  */
+
+void
+rtx_reuse_manager::preprocess (const_rtx x)
+{
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, x, NONCONST)
+    if (uses_rtx_reuse_p (*iter))
+      {
+	if (int *count = m_rtx_occurrence_count.get (*iter))
+	  {
+	    if (*(count++) == 1)
+	      m_rtx_reuse_ids.put (*iter, m_next_id++);
+	  }
+	else
+	  m_rtx_occurrence_count.put (*iter, 1);
+      }
+}
+
+/* Return true iff X has been assigned a reuse ID.  If it has,
+   and OUT is non-NULL, then write the reuse ID to *OUT.  */
+
+bool
+rtx_reuse_manager::has_reuse_id (const_rtx x, int *out)
+{
+  int *id = m_rtx_reuse_ids.get (x);
+  if (id)
+    {
+      if (out)
+	*out = *id;
+      return true;
+    }
+  else
+    return false;
+}
+
+/* Determine if set_seen_def has been called for the given reuse ID.  */
+
+bool
+rtx_reuse_manager::seen_def_p (int reuse_id)
+{
+  return bitmap_bit_p (&m_defs_seen, reuse_id);
+}
+
+/* Record that the definition of the given reuse ID has been seen.  */
+
+void
+rtx_reuse_manager::set_seen_def (int reuse_id)
+{
+  bitmap_set_bit (&m_defs_seen, reuse_id);
+}
+
+#endif /* #ifndef GENERATOR_FILE */
+
+#ifndef GENERATOR_FILE
 void
 print_mem_expr (FILE *outfile, const_tree expr)
 {
@@ -631,8 +722,34 @@ rtx_writer::print_rtx (const_rtx in_rtx)
        return;
     }
 
+  fputc ('(', m_outfile);
+
   /* Print name of expression code.  */
 
+  /* Handle reuse.  */
+#ifndef GENERATOR_FILE
+  if (m_rtx_reuse_manager)
+    {
+      int reuse_id;
+      if (m_rtx_reuse_manager->has_reuse_id (in_rtx, &reuse_id))
+	{
+	  /* Have we already seen the defn of this rtx?  */
+	  if (m_rtx_reuse_manager->seen_def_p (reuse_id))
+	    {
+	      fprintf (m_outfile, "reuse_rtx %i)", reuse_id);
+	      m_sawclose = 1;
+	      return;
+	    }
+	  else
+	    {
+	      /* First time we've seen this reused-rtx.  */
+	      fprintf (m_outfile, "%i|", reuse_id);
+	      m_rtx_reuse_manager->set_seen_def (reuse_id);
+	    }
+	}
+    }
+#endif /* #ifndef GENERATOR_FILE */
+
   /* In compact mode, prefix the code of insns with "c",
      giving "cinsn", "cnote" etc.  */
   if (m_compact && is_a <const rtx_insn *, const struct rtx_def> (in_rtx))
@@ -641,14 +758,14 @@ rtx_writer::print_rtx (const_rtx in_rtx)
 	 just "clabel".  */
       rtx_code code = GET_CODE (in_rtx);
       if (code == CODE_LABEL)
-	fprintf (m_outfile, "(clabel");
+	fprintf (m_outfile, "clabel");
       else
-	fprintf (m_outfile, "(c%s", GET_RTX_NAME (code));
+	fprintf (m_outfile, "c%s", GET_RTX_NAME (code));
     }
   else if (m_simple && CONST_INT_P (in_rtx))
-    fputc ('(', m_outfile);
+    ; /* no code.  */
   else
-    fprintf (m_outfile, "(%s", GET_RTX_NAME (GET_CODE (in_rtx)));
+    fprintf (m_outfile, "%s", GET_RTX_NAME (GET_CODE (in_rtx)));
 
   if (! m_simple)
     {
@@ -819,7 +936,7 @@ rtx_writer::finish_directive ()
 void
 print_inline_rtx (FILE *outf, const_rtx x, int ind)
 {
-  rtx_writer w (outf, ind, false, false);
+  rtx_writer w (outf, ind, false, false, NULL);
   w.print_rtx (x);
 }
 
@@ -828,7 +945,7 @@ print_inline_rtx (FILE *outf, const_rtx x, int ind)
 DEBUG_FUNCTION void
 debug_rtx (const_rtx x)
 {
-  rtx_writer w (stderr, 0, false, false);
+  rtx_writer w (stderr, 0, false, false, NULL);
   w.print_rtx (x);
   fprintf (stderr, "\n");
 }
@@ -975,7 +1092,7 @@ rtx_writer::print_rtl (const_rtx rtx_first)
 void
 print_rtl (FILE *outf, const_rtx rtx_first)
 {
-  rtx_writer w (outf, 0, false, false);
+  rtx_writer w (outf, 0, false, false, NULL);
   w.print_rtl (rtx_first);
 }
 
@@ -985,7 +1102,7 @@ print_rtl (FILE *outf, const_rtx rtx_first)
 int
 print_rtl_single (FILE *outf, const_rtx x)
 {
-  rtx_writer w (outf, 0, false, false);
+  rtx_writer w (outf, 0, false, false, NULL);
   return w.print_rtl_single_with_indent (x, 0);
 }
 
@@ -1016,7 +1133,7 @@ rtx_writer::print_rtl_single_with_indent (const_rtx x, int ind)
 void
 print_simple_rtl (FILE *outf, const_rtx x)
 {
-  rtx_writer w (outf, 0, true, false);
+  rtx_writer w (outf, 0, true, false, NULL);
   w.print_rtl (x);
 }
 
diff --git a/gcc/print-rtl.h b/gcc/print-rtl.h
index e722038..5f7cefb 100644
--- a/gcc/print-rtl.h
+++ b/gcc/print-rtl.h
@@ -20,12 +20,19 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_PRINT_RTL_H
 #define GCC_PRINT_RTL_H
 
+#ifndef GENERATOR_FILE
+#include "bitmap.h"
+#endif /* #ifndef GENERATOR_FILE */
+
+class rtx_reuse_manager;
+
 /* A class for writing rtx to a FILE *.  */
 
 class rtx_writer
 {
  public:
-  rtx_writer (FILE *outfile, int ind, bool simple, bool compact);
+  rtx_writer (FILE *outfile, int ind, bool simple, bool compact,
+	      rtx_reuse_manager *reuse_manager);
 
   void print_rtx (const_rtx in_rtx);
   void print_rtl (const_rtx rtx_first);
@@ -60,6 +67,9 @@ class rtx_writer
        printed with a '%' sigil e.g. "%0" for (LAST_VIRTUAL_REGISTER + 1),
      - insn names are prefixed with "c" (e.g. "cinsn", "cnote", etc).  */
   bool m_compact;
+
+  /* An optional instance of rtx_reuse_manager.  */
+  rtx_reuse_manager *m_rtx_reuse_manager;
 };
 
 #ifdef BUFSIZ
@@ -80,4 +90,73 @@ extern const char *str_pattern_slim (const_rtx);
 
 extern void print_rtx_function (FILE *file, function *fn, bool compact);
 
+#ifndef GENERATOR_FILE
+
+/* For some rtx codes (such as SCRATCH), instances are defined to only be
+   equal for pointer equality: two distinct SCRATCH instances are non-equal.
+   copy_rtx preserves this equality by reusing the SCRATCH instance.
+
+   For example, in this x86 instruction:
+
+      (cinsn (set (mem/v:BLK (scratch:DI) [0  A8])
+                    (unspec:BLK [
+                            (mem/v:BLK (scratch:DI) [0  A8])
+                        ] UNSPEC_MEMORY_BLOCKAGE)) "test.c":2
+                 (nil))
+
+   the two instances of "(scratch:DI)" are actually the same underlying
+   rtx pointer (and thus "equal"), and the insn will only be recognized
+   (as "*memory_blockage") if this pointer-equality is preserved.
+
+   To be able to preserve this pointer-equality when round-tripping
+   through dumping/loading the rtl, we need some syntax.  The first
+   time a reused rtx is encountered in the dump, we prefix it with
+   a reuse ID:
+
+      (0|scratch:DI)
+
+   Subsequent references to the rtx in the dump can be expressed using
+   "reuse_rtx" e.g.:
+
+      (reuse_rtx 0)
+
+   This class is responsible for tracking a set of reuse IDs during a dump.
+
+   Dumping with reuse-support is done in two passes:
+
+   (a) a first pass in which "preprocess" is called on each top-level rtx
+       to be seen in the dump.  This traverses the rtx and its descendents,
+       identifying rtx that will be seen more than once in the actual dump,
+       and assigning them reuse IDs.
+
+   (b) the actual dump, via print_rtx etc.  print_rtx detect the presence
+       of a live rtx_reuse_manager and uses it if there is one.  Any rtx
+       that were assigned reuse IDs will be printed with it the first time
+       that they are seen, and then printed as "(reuse_rtx ID)" subsequently.
+
+   The first phase is needed since otherwise there would be no way to tell
+   if an rtx will be reused when first encountering it.  */
+
+class rtx_reuse_manager
+{
+ public:
+  rtx_reuse_manager ();
+
+  /* The first pass.  */
+  void preprocess (const_rtx x);
+
+  /* The second pass (within print_rtx).  */
+  bool has_reuse_id (const_rtx x, int *out);
+  bool seen_def_p (int reuse_id);
+  void set_seen_def (int reuse_id);
+
+ private:
+  hash_map<const_rtx, int> m_rtx_occurrence_count;
+  hash_map<const_rtx, int> m_rtx_reuse_ids;
+  bitmap_head m_defs_seen;
+  int m_next_id;
+};
+
+#endif /* #ifndef GENERATOR_FILE */
+
 #endif  // GCC_PRINT_RTL_H
diff --git a/gcc/rtl-tests.c b/gcc/rtl-tests.c
index 228226b..8edddfb 100644
--- a/gcc/rtl-tests.c
+++ b/gcc/rtl-tests.c
@@ -62,11 +62,12 @@ verify_print_pattern (const char *expected, rtx pat)
    Use LOC as the effective location when reporting errors.  */
 
 void
-assert_rtl_dump_eq (const location &loc, const char *expected_dump, rtx x)
+assert_rtl_dump_eq (const location &loc, const char *expected_dump, rtx x,
+		    rtx_reuse_manager *reuse_manager)
 {
   named_temp_file tmp_out (".rtl");
   FILE *outfile = fopen (tmp_out.get_filename (), "w");
-  rtx_writer w (outfile, 0, false, true);
+  rtx_writer w (outfile, 0, false, true, reuse_manager);
   w.print_rtl (x);
   fclose (outfile);
 
@@ -128,6 +129,53 @@ test_dumping_insns ()
   ASSERT_RTL_DUMP_EQ ("(clabel 0 42 (\"some_label\"))\n", label);
 }
 
+/* Manually exercise the rtx_reuse_manager code.  */
+
+static void
+test_dumping_rtx_reuse ()
+{
+  rtx_reuse_manager r;
+
+  rtx x = rtx_alloc (SCRATCH);
+  rtx y = rtx_alloc (SCRATCH);
+  rtx z = rtx_alloc (SCRATCH);
+
+  /* x and y will be seen more than once.  */
+  r.preprocess (x);
+  r.preprocess (x);
+  r.preprocess (y);
+  r.preprocess (y);
+
+  /* z will be only seen once.  */
+  r.preprocess (z);
+
+  /* Verify that x and y have been assigned reuse IDs.  */
+  int reuse_id_for_x;
+  ASSERT_TRUE (r.has_reuse_id (x, &reuse_id_for_x));
+  ASSERT_EQ (0, reuse_id_for_x);
+
+  int reuse_id_for_y;
+  ASSERT_TRUE (r.has_reuse_id (y, &reuse_id_for_y));
+  ASSERT_EQ (1, reuse_id_for_y);
+
+  /* z is only seen once and thus shouldn't get a reuse ID.  */
+  ASSERT_FALSE (r.has_reuse_id (z, NULL));
+
+  /* The first dumps of x and y should be prefixed by reuse ID;
+     all subsequent dumps of them should show up as "reuse_rtx".  */
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE ("(0|scratch)", x, &r);
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE ("(reuse_rtx 0)", x, &r);
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE ("(reuse_rtx 0)", x, &r);
+
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE ("(1|scratch)", y, &r);
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE ("(reuse_rtx 1)", y, &r);
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE ("(reuse_rtx 1)", y, &r);
+
+  /* z only appears once and thus shouldn't be prefixed with a
+     reuse ID.  */
+  ASSERT_RTL_DUMP_EQ_WITH_REUSE ("(scratch)", z, &r);
+}
+
 /* Unit testing of "single_set".  */
 
 static void
@@ -187,6 +235,7 @@ rtl_tests_c_tests ()
 {
   test_dumping_regs ();
   test_dumping_insns ();
+  test_dumping_rtx_reuse ();
   test_single_set ();
   test_uncond_jump ();
 
diff --git a/gcc/selftest-rtl.h b/gcc/selftest-rtl.h
index 0f0e167..f505018 100644
--- a/gcc/selftest-rtl.h
+++ b/gcc/selftest-rtl.h
@@ -25,18 +25,27 @@ along with GCC; see the file COPYING3.  If not see
 
 #if CHECKING_P
 
+class rtx_reuse_manager;
+
 namespace selftest {
 
 /* Verify that X is dumped as EXPECTED_DUMP, using compact mode.
    Use LOC as the effective location when reporting errors.  */
 
 extern void
-assert_rtl_dump_eq (const location &loc, const char *expected_dump, rtx x);
+assert_rtl_dump_eq (const location &loc, const char *expected_dump, rtx x,
+		    rtx_reuse_manager *reuse_manager);
 
 /* Verify that RTX is dumped as EXPECTED_DUMP, using compact mode.  */
 
 #define ASSERT_RTL_DUMP_EQ(EXPECTED_DUMP, RTX) \
-  assert_rtl_dump_eq (SELFTEST_LOCATION, (EXPECTED_DUMP), (RTX))
+  assert_rtl_dump_eq (SELFTEST_LOCATION, (EXPECTED_DUMP), (RTX), NULL)
+
+/* As above, but using REUSE_MANAGER when dumping.  */
+
+#define ASSERT_RTL_DUMP_EQ_WITH_REUSE(EXPECTED_DUMP, RTX, REUSE_MANAGER) \
+  assert_rtl_dump_eq (SELFTEST_LOCATION, (EXPECTED_DUMP), (RTX), \
+		      (REUSE_MANAGER))
 
 } /* end of namespace selftest.  */
 
-- 
cgit v1.1


From dd5af1d61761e9bf466cd64360e6321cde0a2c5f Mon Sep 17 00:00:00 2001
From: Michael Meissner <meissner@linux.vnet.ibm.com>
Date: Tue, 6 Dec 2016 22:15:31 +0000
Subject: re PR target/78658 (powerpc64le: ICE with -mcpu=power9 -Og)

[gcc]
2016-12-06  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/78658
	* config/rs6000/rs6000.md (zero_extendqi<mode>2): Use ^ instead of
	?* constraints for the ISA 3.0 patterns, so the register allocator
	is more likely to allocate QImode/HImode to vector registers for
	conversion to floating point unless a reload is needed.
	(zero_extendhi<mode>2): Likewise.
	(float<QHI:mode><FP_ISA3:mode>2_internal): Properly deal with the
	first alternative which is converting QImode/HImode to floating
	point and the QImode/HImode value is in a vector register, and
	does not allocate the second pseudo register.  Remove zero
	extending into traditional floating point registers, since the
	instruction used only works on traditional altivec registers.
	(floatuns<QHI:mode><FP_ISA3:mode>2_internal): Likewise.

[gcc/testsuite]
2016-12-06  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/78658
	* gcc.target/powerpc/pr78658.c: New test.

From-SVN: r243320
---
 gcc/ChangeLog                              | 16 ++++++++++++++++
 gcc/config/rs6000/rs6000.md                | 21 ++++++++++++++-------
 gcc/testsuite/ChangeLog                    |  5 +++++
 gcc/testsuite/gcc.target/powerpc/pr78658.c | 14 ++++++++++++++
 4 files changed, 49 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr78658.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 85916c1..463dc1a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2016-12-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/78658
+	* config/rs6000/rs6000.md (zero_extendqi<mode>2): Use ^ instead of
+	?* constraints for the ISA 3.0 patterns, so the register allocator
+	is more likely to allocate QImode/HImode to vector registers for
+	conversion to floating point unless a reload is needed.
+	(zero_extendhi<mode>2): Likewise.
+	(float<QHI:mode><FP_ISA3:mode>2_internal): Properly deal with the
+	first alternative which is converting QImode/HImode to floating
+	point and the QImode/HImode value is in a vector register, and
+	does not allocate the second pseudo register.  Remove zero
+	extending into traditional floating point registers, since the
+	instruction used only works on traditional altivec registers.
+	(floatuns<QHI:mode><FP_ISA3:mode>2_internal): Likewise.
+
 2016-12-06  David Malcolm  <dmalcolm@redhat.com>
 
 	* config/i386/i386.c: Include print-rtl.h.
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 5a453a0..4726d73 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -738,8 +738,8 @@
 ;; complex forms.  Basic data transfer is done later.
 
 (define_insn "zero_extendqi<mode>2"
-  [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,r,?*wJwK,?*wK")
-	(zero_extend:EXTQI (match_operand:QI 1 "reg_or_mem_operand" "m,r,Z,*wK")))]
+  [(set (match_operand:EXTQI 0 "gpc_reg_operand" "=r,r,^wJwK,^wK")
+	(zero_extend:EXTQI (match_operand:QI 1 "reg_or_mem_operand" "m,r,Z,wK")))]
   ""
   "@
    lbz%U1%X1 %0,%1
@@ -791,7 +791,7 @@
 
 
 (define_insn "zero_extendhi<mode>2"
-  [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r,?*wJwK,?*wK")
+  [(set (match_operand:EXTHI 0 "gpc_reg_operand" "=r,r,^wJwK,^wK")
 	(zero_extend:EXTHI (match_operand:HI 1 "reg_or_mem_operand" "m,r,Z,wK")))]
   ""
   "@
@@ -5413,11 +5413,13 @@
 
   if (!MEM_P (input))
     {
+      rtx tmp = operands[3];
       if (altivec_register_operand (input, <QHI:MODE>mode))
 	emit_insn (gen_extend<QHI:mode>di2 (di, input));
+      else if (GET_CODE (tmp) == SCRATCH)
+	emit_insn (gen_extend<QHI:mode>di2 (di, input));
       else
 	{
-	  rtx tmp = operands[3];
 	  emit_insn (gen_extend<QHI:mode>di2 (tmp, input));
 	  emit_move_insn (di, tmp);
 	}
@@ -5449,7 +5451,7 @@
 (define_insn_and_split "*floatuns<QHI:mode><FP_ISA3:mode>2_internal"
   [(set (match_operand:FP_ISA3 0 "vsx_register_operand" "=<Fv>,<Fv>,<Fv>")
 	(unsigned_float:FP_ISA3
-	 (match_operand:QHI 1 "reg_or_indexed_operand" "wJwK,r,Z")))
+	 (match_operand:QHI 1 "reg_or_indexed_operand" "wK,r,Z")))
    (clobber (match_scratch:DI 2 "=wK,wi,wJwK"))
    (clobber (match_scratch:DI 3 "=X,r,X"))]
   "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64
@@ -5467,8 +5469,13 @@
   else
     {
       rtx tmp = operands[3];
-      emit_insn (gen_zero_extend<QHI:mode>di2 (tmp, input));
-      emit_move_insn (di, tmp);
+      if (GET_CODE (tmp) == SCRATCH)
+	emit_insn (gen_extend<QHI:mode>di2 (di, input));
+      else
+	{
+	  emit_insn (gen_zero_extend<QHI:mode>di2 (tmp, input));
+	  emit_move_insn (di, tmp);
+	}
     }
 
   emit_insn (gen_floatdi<FP_ISA3:mode>2 (result, di));
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 95167b3..5adcdd2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/78658
+	* gcc.target/powerpc/pr78658.c: New test.
+
 2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>
 
 	target/77761
diff --git a/gcc/testsuite/gcc.target/powerpc/pr78658.c b/gcc/testsuite/gcc.target/powerpc/pr78658.c
new file mode 100644
index 0000000..fcea632
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr78658.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target { powerpc64*-*-* && lp64 } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+/* This caused an unrecognizable insn message on development versions of GCC 7.  */
+
+float a;
+char b;
+
+void c(void)
+{
+  a = b = a;
+}
-- 
cgit v1.1


From a5a56d88a1040a42a1bfc22587364f3bfb1efa76 Mon Sep 17 00:00:00 2001
From: Than McIntosh <thanm@google.com>
Date: Tue, 6 Dec 2016 22:31:25 +0000
Subject: compiler: pass lvalue/rvalue context to back end for var exprs

    Add a new flag on the Var_expression class that indicates
    whether the var reference appears in an "lvalue" context
    (for example, on the LHS of an assignment stmt) or an
    "rvalue" context (for example, as an argument of a call).

    Add a traversal pass that visits assignment stmt LHS subtrees
    so as to mark things prior to backend gen. Select the right
    context value in other places where Backend::var_expression is
    called.

    Reviewed-on: https://go-review.googlesource.com/33990

	* go-gcc.cc (Gcc_backend::var_expression): Add Varexpr_context
	parameter.

From-SVN: r243321
---
 gcc/go/ChangeLog                 |  5 +++
 gcc/go/go-gcc.cc                 |  4 +--
 gcc/go/gofrontend/MERGE          |  2 +-
 gcc/go/gofrontend/backend.h      |  2 +-
 gcc/go/gofrontend/expressions.cc | 44 +++++++++++++----------
 gcc/go/gofrontend/expressions.h  | 14 +++++++-
 gcc/go/gofrontend/gogo.cc        |  8 +++--
 gcc/go/gofrontend/operator.h     |  6 ++++
 gcc/go/gofrontend/statements.cc  | 77 ++++++++++++++++++++++++++++++++++++++++
 gcc/go/gofrontend/types.cc       |  5 +--
 10 files changed, 139 insertions(+), 28 deletions(-)

diff --git a/gcc/go/ChangeLog b/gcc/go/ChangeLog
index bbae1a9f..0b116eb 100644
--- a/gcc/go/ChangeLog
+++ b/gcc/go/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Than McIntosh  <thanm@google.com>
+
+	* go-gcc.cc (Gcc_backend::var_expression): Add Varexpr_context
+	parameter.
+
 2016-11-22  Than McIntosh  <thanm@google.com>
 
 	* go-gcc.cc (char_needs_encoding): Remove.
diff --git a/gcc/go/go-gcc.cc b/gcc/go/go-gcc.cc
index dc00413..f1ac522 100644
--- a/gcc/go/go-gcc.cc
+++ b/gcc/go/go-gcc.cc
@@ -276,7 +276,7 @@ class Gcc_backend : public Backend
   { return this->make_expression(null_pointer_node); }
 
   Bexpression*
-  var_expression(Bvariable* var, Location);
+  var_expression(Bvariable* var, Varexpr_context, Location);
 
   Bexpression*
   indirect_expression(Btype*, Bexpression* expr, bool known_valid, Location);
@@ -1243,7 +1243,7 @@ Gcc_backend::zero_expression(Btype* btype)
 // An expression that references a variable.
 
 Bexpression*
-Gcc_backend::var_expression(Bvariable* var, Location location)
+Gcc_backend::var_expression(Bvariable* var, Varexpr_context, Location location)
 {
   tree ret = var->get_tree(location);
   if (ret == error_mark_node)
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 5529002..0cb0f9c 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-b7bad96ce0af50a1129eaab9aa110d68a601917b
+2102112e26a21589455f940ec6b409766d942c62
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/backend.h b/gcc/go/gofrontend/backend.h
index e93cdfe..e9a1912 100644
--- a/gcc/go/gofrontend/backend.h
+++ b/gcc/go/gofrontend/backend.h
@@ -254,7 +254,7 @@ class Backend
 
   // Create a reference to a variable.
   virtual Bexpression*
-  var_expression(Bvariable* var, Location) = 0;
+  var_expression(Bvariable* var, Varexpr_context in_lvalue_pos, Location) = 0;
 
   // Create an expression that indirects through the pointer expression EXPR
   // (i.e., return the expression for *EXPR). KNOWN_VALID is true if the pointer
diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc
index 9740d32..24f6b12 100644
--- a/gcc/go/gofrontend/expressions.cc
+++ b/gcc/go/gofrontend/expressions.cc
@@ -760,7 +760,8 @@ Var_expression::do_get_backend(Translate_context* context)
   else
     go_unreachable();
 
-  Bexpression* ret = context->backend()->var_expression(bvar, loc);
+  Bexpression* ret =
+      context->backend()->var_expression(bvar, this->in_lvalue_pos_, loc);
   if (is_in_heap)
     ret = context->backend()->indirect_expression(btype, ret, true, loc);
   return ret;
@@ -887,7 +888,10 @@ Temporary_reference_expression::do_get_backend(Translate_context* context)
 {
   Gogo* gogo = context->gogo();
   Bvariable* bvar = this->statement_->get_backend_variable(context);
-  Bexpression* ret = gogo->backend()->var_expression(bvar, this->location());
+  Varexpr_context ve_ctxt = (this->is_lvalue_ ? VE_lvalue : VE_rvalue);
+
+  Bexpression* ret = gogo->backend()->var_expression(bvar, ve_ctxt,
+                                                     this->location());
 
   // The backend can't always represent the same set of recursive types
   // that the Go frontend can.  In some cases this means that a
@@ -958,11 +962,11 @@ Set_and_use_temporary_expression::do_get_backend(Translate_context* context)
   Location loc = this->location();
   Gogo* gogo = context->gogo();
   Bvariable* bvar = this->statement_->get_backend_variable(context);
-  Bexpression* var_ref = gogo->backend()->var_expression(bvar, loc);
+  Bexpression* lvar_ref = gogo->backend()->var_expression(bvar, VE_rvalue, loc);
 
   Bexpression* bexpr = this->expr_->get_backend(context);
-  Bstatement* set = gogo->backend()->assignment_statement(var_ref, bexpr, loc);
-  var_ref = gogo->backend()->var_expression(bvar, loc);
+  Bstatement* set = gogo->backend()->assignment_statement(lvar_ref, bexpr, loc);
+  Bexpression* var_ref = gogo->backend()->var_expression(bvar, VE_lvalue, loc);
   Bexpression* ret = gogo->backend()->compound_expression(set, var_ref, loc);
   return ret;
 }
@@ -1065,11 +1069,12 @@ Sink_expression::do_get_backend(Translate_context* context)
       this->bvar_ =
 	gogo->backend()->temporary_variable(fn_ctx, context->bblock(), bt, NULL,
 					    false, loc, &decl);
-      Bexpression* var_ref = gogo->backend()->var_expression(this->bvar_, loc);
+      Bexpression* var_ref =
+          gogo->backend()->var_expression(this->bvar_, VE_lvalue, loc);
       var_ref = gogo->backend()->compound_expression(decl, var_ref, loc);
       return var_ref;
     }
-  return gogo->backend()->var_expression(this->bvar_, loc);
+  return gogo->backend()->var_expression(this->bvar_, VE_lvalue, loc);
 }
 
 // Ast dump for sink expression.
@@ -1276,7 +1281,7 @@ Func_descriptor_expression::do_get_backend(Translate_context* context)
   Named_object* no = this->fn_;
   Location loc = no->location();
   if (this->dvar_ != NULL)
-    return context->backend()->var_expression(this->dvar_, loc);
+    return context->backend()->var_expression(this->dvar_, VE_rvalue, loc);
 
   Gogo* gogo = context->gogo();
   std::string var_name;
@@ -1330,7 +1335,7 @@ Func_descriptor_expression::do_get_backend(Translate_context* context)
     }
 
   this->dvar_ = bvar;
-  return gogo->backend()->var_expression(bvar, loc);
+  return gogo->backend()->var_expression(bvar, VE_rvalue, loc);
 }
 
 // Print a function descriptor expression.
@@ -4207,7 +4212,8 @@ Unary_expression::do_get_backend(Translate_context* context)
 	{
 	  Temporary_statement* temp = sut->temporary();
 	  Bvariable* bvar = temp->get_backend_variable(context);
-          Bexpression* bvar_expr = gogo->backend()->var_expression(bvar, loc);
+          Bexpression* bvar_expr =
+              gogo->backend()->var_expression(bvar, VE_lvalue, loc);
           Bexpression* bval = sut->expression()->get_backend(context);
 
           Bstatement* bassign =
@@ -4294,7 +4300,7 @@ Unary_expression::do_get_backend(Translate_context* context)
 	  gogo->backend()->implicit_variable_set_init(implicit, buf, btype,
 						      true, copy_to_heap, false,
 						      bexpr);
-	  bexpr = gogo->backend()->var_expression(implicit, loc);
+	  bexpr = gogo->backend()->var_expression(implicit, VE_lvalue, loc);
 
 	  // If we are not copying a slice initializer to the heap,
 	  // then it can be changed by the program, so if it can
@@ -4304,7 +4310,7 @@ Unary_expression::do_get_backend(Translate_context* context)
 	      && this->expr_->type()->has_pointer())
 	    {
 	      Bexpression* root =
-		gogo->backend()->var_expression(implicit, loc);
+                  gogo->backend()->var_expression(implicit, VE_lvalue, loc);
 	      root = gogo->backend()->address_expression(root, loc);
 	      Type* type = Type::make_pointer_type(this->expr_->type());
 	      gogo->add_gc_root(Expression::make_backend(root, type, loc));
@@ -4324,7 +4330,7 @@ Unary_expression::do_get_backend(Translate_context* context)
                                                 true, false, btype, loc);
           gogo->backend()->immutable_struct_set_init(decl, buf, true, false,
                                                      btype, loc, bexpr);
-          bexpr = gogo->backend()->var_expression(decl, loc);
+          bexpr = gogo->backend()->var_expression(decl, VE_lvalue, loc);
         }
 
       go_assert(!this->create_temp_ || this->expr_->is_variable());
@@ -14116,7 +14122,7 @@ Heap_expression::do_get_backend(Translate_context* context)
   Bvariable* space_temp =
     gogo->backend()->temporary_variable(fndecl, context->bblock(), btype,
 					space, true, loc, &decl);
-  space = gogo->backend()->var_expression(space_temp, loc);
+  space = gogo->backend()->var_expression(space_temp, VE_lvalue, loc);
   Btype* expr_btype = this->expr_->type()->get_backend(gogo);
   Bexpression* ref =
     gogo->backend()->indirect_expression(expr_btype, space, true, loc);
@@ -14124,7 +14130,7 @@ Heap_expression::do_get_backend(Translate_context* context)
   Bexpression* bexpr = this->expr_->get_backend(context);
   Bstatement* assn = gogo->backend()->assignment_statement(ref, bexpr, loc);
   decl = gogo->backend()->compound_statement(decl, assn);
-  space = gogo->backend()->var_expression(space_temp, loc);
+  space = gogo->backend()->var_expression(space_temp, VE_rvalue, loc);
   return gogo->backend()->compound_expression(decl, space, loc);
 }
 
@@ -15063,7 +15069,8 @@ Interface_mtable_expression::do_get_backend(Translate_context* context)
   Gogo* gogo = context->gogo();
   Location loc = Linemap::predeclared_location();
   if (this->bvar_ != NULL)
-    return gogo->backend()->var_expression(this->bvar_, this->location());
+    return gogo->backend()->var_expression(this->bvar_, VE_rvalue,
+                                           this->location());
 
   const Typed_identifier_list* interface_methods = this->itype_->methods();
   go_assert(!interface_methods->empty());
@@ -15099,7 +15106,8 @@ Interface_mtable_expression::do_get_backend(Translate_context* context)
       this->bvar_ =
           gogo->backend()->immutable_struct_reference(mangled_name, asm_name,
                                                       btype, loc);
-      return gogo->backend()->var_expression(this->bvar_, this->location());
+      return gogo->backend()->var_expression(this->bvar_, VE_rvalue,
+                                             this->location());
     }
 
   // The first element is the type descriptor.
@@ -15147,7 +15155,7 @@ Interface_mtable_expression::do_get_backend(Translate_context* context)
 						  !is_public, btype, loc);
   gogo->backend()->immutable_struct_set_init(this->bvar_, mangled_name, false,
                                              !is_public, btype, loc, ctor);
-  return gogo->backend()->var_expression(this->bvar_, loc);
+  return gogo->backend()->var_expression(this->bvar_, VE_lvalue, loc);
 }
 
 void
diff --git a/gcc/go/gofrontend/expressions.h b/gcc/go/gofrontend/expressions.h
index f31d4a6..98e2115 100644
--- a/gcc/go/gofrontend/expressions.h
+++ b/gcc/go/gofrontend/expressions.h
@@ -1282,7 +1282,7 @@ class Var_expression : public Expression
  public:
   Var_expression(Named_object* variable, Location location)
     : Expression(EXPRESSION_VAR_REFERENCE, location),
-      variable_(variable)
+      variable_(variable), in_lvalue_pos_(VE_rvalue)
   { }
 
   // Return the variable.
@@ -1290,6 +1290,16 @@ class Var_expression : public Expression
   named_object() const
   { return this->variable_; }
 
+  // Does this var expression appear in an lvalue (assigned-to) context?
+  bool
+  in_lvalue_pos() const
+  { return this->in_lvalue_pos_ == VE_lvalue; }
+
+  // Mark a var_expression as appearing in an lvalue context.
+  void
+  set_in_lvalue_pos()
+  { this->in_lvalue_pos_ = VE_lvalue; }
+
  protected:
   Expression*
   do_lower(Gogo*, Named_object*, Statement_inserter*, int);
@@ -1320,6 +1330,8 @@ class Var_expression : public Expression
  private:
   // The variable we are referencing.
   Named_object* variable_;
+  // Set to TRUE if var expression appears in lvalue context
+  Varexpr_context in_lvalue_pos_;
 };
 
 // A reference to a variable within an enclosing function.
diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc
index d685bca..e9cc6b4 100644
--- a/gcc/go/gofrontend/gogo.cc
+++ b/gcc/go/gofrontend/gogo.cc
@@ -1369,7 +1369,7 @@ Gogo::write_globals()
                 {
                   Location loc = var->location();
                   Bexpression* var_expr =
-                      this->backend()->var_expression(bvar, loc);
+                      this->backend()->var_expression(bvar, VE_lvalue, loc);
                   var_init_stmt =
                       this->backend()->assignment_statement(var_expr, var_binit,
                                                             loc);
@@ -5734,7 +5734,8 @@ Function::return_value(Gogo* gogo, Named_object* named_function,
     {
       Named_object* no = (*this->results_)[i];
       Bvariable* bvar = no->get_backend_variable(gogo, named_function);
-      Bexpression* val = gogo->backend()->var_expression(bvar, location);
+      Bexpression* val = gogo->backend()->var_expression(bvar, VE_rvalue,
+                                                         location);
       if (no->result_var_value()->is_in_heap())
 	{
 	  Btype* bt = no->result_var_value()->type()->get_backend(gogo);
@@ -6563,7 +6564,8 @@ Variable::get_init_block(Gogo* gogo, Named_object* function,
           Expression* val_expr =
               Expression::make_cast(this->type(), this->init_, loc);
           Bexpression* val = val_expr->get_backend(&context);
-          Bexpression* var_ref = gogo->backend()->var_expression(var_decl, loc);
+          Bexpression* var_ref =
+              gogo->backend()->var_expression(var_decl, VE_lvalue, loc);
           decl_init = gogo->backend()->assignment_statement(var_ref, val, loc);
 	}
     }
diff --git a/gcc/go/gofrontend/operator.h b/gcc/go/gofrontend/operator.h
index f3e0fd0..e0a97d0 100644
--- a/gcc/go/gofrontend/operator.h
+++ b/gcc/go/gofrontend/operator.h
@@ -63,4 +63,10 @@ enum Operator
   OPERATOR_RSQUARE	// ]
 };
 
+// Whether a variable expression appears in lvalue (assignment) context.
+enum Varexpr_context {
+  VE_rvalue,
+  VE_lvalue
+};
+
 #endif // !defined(GO_OPERATOR_H)
diff --git a/gcc/go/gofrontend/statements.cc b/gcc/go/gofrontend/statements.cc
index e25fd6b..c7b4fe8 100644
--- a/gcc/go/gofrontend/statements.cc
+++ b/gcc/go/gofrontend/statements.cc
@@ -825,6 +825,80 @@ Assignment_statement::do_flatten(Gogo*, Named_object*, Block*,
   return this;
 }
 
+
+// Helper class to locate a root Var_expression within an expression
+// tree and mark it as being in an "lvalue" or assignment
+// context. Examples:
+//
+//    x, y = 40, foo(w)
+//    x[2] = bar(v)
+//    x.z.w[blah(v + u)], y.another = 2, 3
+//
+// In the code above, vars "x" and "y" appear in lvalue / assignment
+// context, whereas the other vars "v", "u", etc are in rvalue context.
+//
+// Note: at the moment the Var_expression version of "do_copy()"
+// defaults to returning the original object, not a new object,
+// meaning that a given Var_expression can be referenced from more
+// than one place in the tree. This means that when we want to mark a
+// Var_expression as having lvalue semantics, we need to make a copy
+// of it. Example:
+//
+//    mystruct.myfield += 42
+//
+// When this is lowered to eliminate the += operator, we get a tree
+//
+//    mystruct.myfield = mystruct.field + 42
+//
+// in which the "mystruct" same Var_expression is referenced on both
+// LHS and RHS subtrees. This in turn means that if we try to mark the
+// LHS Var_expression the RHS Var_expression will also be marked.  To
+// address this issue, the code below clones any var_expression before
+// applying an lvalue marking.
+//
+
+class Mark_lvalue_varexprs : public Traverse
+{
+ public:
+  Mark_lvalue_varexprs()
+    : Traverse(traverse_expressions)
+  { }
+
+ protected:
+  int
+  expression(Expression**);
+
+ private:
+};
+
+int Mark_lvalue_varexprs::expression(Expression** ppexpr)
+{
+  Expression* e = *ppexpr;
+
+  Var_expression* ve = e->var_expression();
+  if (ve)
+    {
+      ve = new Var_expression(ve->named_object(), ve->location());
+      ve->set_in_lvalue_pos();
+      *ppexpr = ve;
+      return TRAVERSE_EXIT;
+    }
+
+  Field_reference_expression* fre = e->field_reference_expression();
+  if (fre != NULL)
+    return TRAVERSE_CONTINUE;
+
+  Array_index_expression* aie = e->array_index_expression();
+  if (aie != NULL)
+    {
+      Mark_lvalue_varexprs mlve;
+      aie->array()->traverse_subexpressions(&mlve);
+      return TRAVERSE_EXIT;
+    }
+
+  return TRAVERSE_EXIT;
+}
+
 // Convert an assignment statement to the backend representation.
 
 Bstatement*
@@ -836,6 +910,9 @@ Assignment_statement::do_get_backend(Translate_context* context)
       return context->backend()->expression_statement(rhs);
     }
 
+  Mark_lvalue_varexprs mlve;
+  Expression::traverse(&this->lhs_, &mlve);
+
   Bexpression* lhs = this->lhs_->get_backend(context);
   Expression* conv =
       Expression::convert_for_assignment(context->gogo(), this->lhs_->type(),
diff --git a/gcc/go/gofrontend/types.cc b/gcc/go/gofrontend/types.cc
index 33d3460..d540acb 100644
--- a/gcc/go/gofrontend/types.cc
+++ b/gcc/go/gofrontend/types.cc
@@ -1173,7 +1173,8 @@ Type::type_descriptor_pointer(Gogo* gogo, Location location)
       go_assert(t->type_descriptor_var_ != NULL);
     }
   Bexpression* var_expr =
-      gogo->backend()->var_expression(t->type_descriptor_var_, location);
+      gogo->backend()->var_expression(t->type_descriptor_var_,
+                                      VE_rvalue, location);
   return gogo->backend()->address_expression(var_expr, location);
 }
 
@@ -2146,7 +2147,7 @@ Type::gc_symbol_pointer(Gogo* gogo)
     }
   Location bloc = Linemap::predeclared_location();
   Bexpression* var_expr =
-      gogo->backend()->var_expression(t->gc_symbol_var_, bloc);
+      gogo->backend()->var_expression(t->gc_symbol_var_, VE_rvalue, bloc);
   return gogo->backend()->address_expression(var_expr, bloc);
 }
 
-- 
cgit v1.1


From 78bcf3dc722bce75c02eb05f351daf3447896c9d Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Tue, 6 Dec 2016 22:47:00 +0000
Subject: re PR middle-end/78700 (gccgo testcases stack.go, recover.go,
 crypto/tls fails)

	PR middle-end/78700
	* calls.c (expand_call): Move back call to prepare_call_address.

From-SVN: r243322
---
 gcc/ChangeLog |  5 +++++
 gcc/calls.c   | 10 +++-------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 463dc1a..61eeea3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Eric Botcazou  <ebotcazou@adacore.com>
+
+	PR middle-end/78700
+	* calls.c (expand_call): Move back call to prepare_call_address.
+
 2016-12-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/78658
diff --git a/gcc/calls.c b/gcc/calls.c
index 21385ce..7941869 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3427,13 +3427,6 @@ expand_call (tree exp, rtx target, int ignore)
       if (STRICT_ALIGNMENT)
 	store_unaligned_arguments_into_pseudos (args, num_actuals);
 
-      /* Prepare the address of the call.  This must be done before any
-	 register parameters is loaded for find_first_parameter_load to
-	 work properly in the presence of descriptors.  */
-      funexp = prepare_call_address (fndecl ? fndecl : fntype, funexp,
-				     static_chain_value, &call_fusage,
-				     reg_parm_seen, flags);
-
       /* Now store any partially-in-registers parm.
 	 This is the last place a block-move can happen.  */
       if (reg_parm_seen)
@@ -3544,6 +3537,9 @@ expand_call (tree exp, rtx target, int ignore)
 	}
 
       after_args = get_last_insn ();
+      funexp = prepare_call_address (fndecl ? fndecl : fntype, funexp,
+				     static_chain_value, &call_fusage,
+				     reg_parm_seen, flags);
       load_register_parameters (args, num_actuals, &call_fusage, flags,
 				pass == 0, &sibcall_failure);
 
-- 
cgit v1.1


From 6b8805cfce8847a0d944150845cb908b6ee0ba57 Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@gcc.gnu.org>
Date: Tue, 6 Dec 2016 22:51:32 +0000
Subject: Put back blank line

From-SVN: r243324
---
 gcc/calls.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/calls.c b/gcc/calls.c
index 7941869..530e7bf 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -3540,6 +3540,7 @@ expand_call (tree exp, rtx target, int ignore)
       funexp = prepare_call_address (fndecl ? fndecl : fntype, funexp,
 				     static_chain_value, &call_fusage,
 				     reg_parm_seen, flags);
+
       load_register_parameters (args, num_actuals, &call_fusage, flags,
 				pass == 0, &sibcall_failure);
 
-- 
cgit v1.1


From 8194dcdd37de477dcb0be39d912a7eb1d465a2c4 Mon Sep 17 00:00:00 2001
From: Tom de Vries <tom@codesourcery.com>
Date: Tue, 6 Dec 2016 23:18:17 +0000
Subject: re PR tree-optimization/67955 (tree-dse does not use pointer info)

	PR tree-optimization/67955
	* tree-ssa-alias.c (same_addr_size_stores_p): New function.
	(stmt_kills_ref_p): Use it.

	PR tree-optimization/67955
	* gcc.dg/tree-ssa/dse-points-to.c: New test.

From-SVN: r243325
---
 gcc/ChangeLog                                 |  6 +++
 gcc/testsuite/ChangeLog                       |  5 ++
 gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c | 15 ++++++
 gcc/tree-ssa-alias.c                          | 77 +++++++++++++++++++++++++++
 4 files changed, 103 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 61eeea3..797b711 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-06  Tom de Vries  <tom@codesourcery.com>
+
+	PR tree-optimization/67955
+	* tree-ssa-alias.c (same_addr_size_stores_p): New function.
+	(stmt_kills_ref_p): Use it.
+
 2016-12-06  Eric Botcazou  <ebotcazou@adacore.com>
 
 	PR middle-end/78700
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 5adcdd2..6090a96 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-06  Tom de Vries  <tom@codesourcery.com>
+
+	PR tree-optimization/67955
+	* gcc.dg/tree-ssa/dse-points-to.c: New test.
+
 2016-12-06  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/78658
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c b/gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c
new file mode 100644
index 0000000..8003556
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/dse-points-to.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-tree-ccp -fno-tree-forwprop -fno-tree-fre -fno-tree-vrp" } */
+/* { dg-additional-options "-fdump-tree-dse1-details" } */
+
+int
+f ()
+{
+  int a;
+  int *p = &a;
+  *p = 1;
+  a = 2;
+  return a;
+}
+
+/* { dg-final { scan-tree-dump-times "Deleted dead store.*p_1" 1 "dse1"} } */
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index 10f1677..37b581d 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -2316,6 +2316,78 @@ stmt_may_clobber_ref_p (gimple *stmt, tree ref)
   return stmt_may_clobber_ref_p_1 (stmt, &r);
 }
 
+/* Return true if store1 and store2 described by corresponding tuples
+   <BASE, OFFSET, SIZE, MAX_SIZE> have the same size and store to the same
+   address.  */
+
+static bool
+same_addr_size_stores_p (tree base1, HOST_WIDE_INT offset1, HOST_WIDE_INT size1,
+			 HOST_WIDE_INT max_size1,
+			 tree base2, HOST_WIDE_INT offset2, HOST_WIDE_INT size2,
+			 HOST_WIDE_INT max_size2)
+{
+  /* For now, just handle VAR_DECL.  */
+  bool base1_obj_p = VAR_P (base1);
+  bool base2_obj_p = VAR_P (base2);
+
+  /* We need one object.  */
+  if (base1_obj_p == base2_obj_p)
+    return false;
+  tree obj = base1_obj_p ? base1 : base2;
+
+  /* And we need one MEM_REF.  */
+  bool base1_memref_p = TREE_CODE (base1) == MEM_REF;
+  bool base2_memref_p = TREE_CODE (base2) == MEM_REF;
+  if (base1_memref_p == base2_memref_p)
+    return false;
+  tree memref = base1_memref_p ? base1 : base2;
+
+  /* Sizes need to be valid.  */
+  if (max_size1 == -1 || max_size2 == -1
+      || size1 == -1 || size2 == -1)
+    return false;
+
+  /* Max_size needs to match size.  */
+  if (max_size1 != size1
+      || max_size2 != size2)
+    return false;
+
+  /* Sizes need to match.  */
+  if (size1 != size2)
+    return false;
+
+  /* Offsets need to be 0.  */
+  if (offset1 != 0
+      || offset2 != 0)
+    return false;
+
+  /* Check that memref is a store to pointer with singleton points-to info.  */
+  if (!tree_int_cst_equal (TREE_OPERAND (memref, 1), integer_zero_node))
+    return false;
+  tree ptr = TREE_OPERAND (memref, 0);
+  if (TREE_CODE (ptr) != SSA_NAME)
+    return false;
+  struct ptr_info_def *pi = SSA_NAME_PTR_INFO (ptr);
+  unsigned int pt_uid;
+  if (pi == NULL
+      || !pt_solution_singleton_or_null_p (&pi->pt, &pt_uid))
+    return false;
+
+  /* Check that ptr points relative to obj.  */
+  unsigned int obj_uid = (DECL_PT_UID_SET_P (obj)
+			  ? DECL_PT_UID (obj)
+			  : DECL_UID (obj));
+  if (obj_uid != pt_uid)
+    return false;
+
+  /* Check that the object size is the same as the store size.  That ensures us
+     that ptr points to the start of obj.  */
+  if (!tree_fits_shwi_p (DECL_SIZE (obj)))
+    return false;
+  HOST_WIDE_INT obj_size = tree_to_shwi (DECL_SIZE (obj));
+  return obj_size == size1;
+}
+
 /* If STMT kills the memory reference REF return true, otherwise
    return false.  */
 
@@ -2393,6 +2465,11 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)
 	 so base == ref->base does not always hold.  */
       if (base != ref->base)
 	{
+	  /* Try using points-to info.  */
+	  if (same_addr_size_stores_p (base, offset, size, max_size, ref->base,
+				       ref->offset, ref->size, ref->max_size))
+	    return true;
+
 	  /* If both base and ref->base are MEM_REFs, only compare the
 	     first operand, and if the second operand isn't equal constant,
 	     try to add the offsets into offset and ref_offset.  */
-- 
cgit v1.1


From b9e8329d65e1779dcb15412a8048c4ad54ec6cb7 Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Wed, 7 Dec 2016 00:16:19 +0000
Subject: Daily bump.

From-SVN: r243328
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 1d0b8c53..f7c6d5c 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20161206
+20161207
-- 
cgit v1.1


From fd1c9a07d46aeece59c452105e3f2ece665bd7b8 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Wed, 7 Dec 2016 01:28:13 +0000
Subject: Fix x86_64-ism in selftest (PR bootstrap/78705)

In r243317 I accidentally introduced an assumption in a selftest
in i386.c that Pmode == DImode.  This patch conditionalizes the
overzealous string comparison to avoid breaking the build on
32-bit i386.c targets.

gcc/ChangeLog:
	PR bootstrap/78705
	* config/i386/i386.c (selftest::ix86_test_dumping_memory_blockage):
	Conditionalize the string comparison on Pmode == DImode.

From-SVN: r243332
---
 gcc/ChangeLog          |  6 ++++++
 gcc/config/i386/i386.c | 14 ++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 797b711..d30345f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-06  David Malcolm  <dmalcolm@redhat.com>
+
+	PR bootstrap/78705
+	* config/i386/i386.c (ix86_test_dumping_memory_blockage):
+	Conditionalize the string comparison on Pmode == DImode.
+
 2016-12-06  Tom de Vries  <tom@codesourcery.com>
 
 	PR tree-optimization/67955
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2e6be02..1cd1cd8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -51190,12 +51190,14 @@ ix86_test_dumping_memory_blockage ()
 
   /* Verify that the repeated references to the SCRATCH show use
      reuse IDS.  The first should be prefixed with a reuse ID,
-     and the second should be dumped as a "reuse_rtx" of that ID.  */
-  ASSERT_RTL_DUMP_EQ_WITH_REUSE
-    ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
-     "        (unspec:BLK [\n"
-     "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
-     "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
+     and the second should be dumped as a "reuse_rtx" of that ID.
+     The expected string assumes Pmode == DImode.  */
+  if (Pmode == DImode)
+    ASSERT_RTL_DUMP_EQ_WITH_REUSE
+      ("(cinsn 1 (set (mem/v:BLK (0|scratch:DI) [0  A8])\n"
+       "        (unspec:BLK [\n"
+       "                (mem/v:BLK (reuse_rtx 0) [0  A8])\n"
+       "            ] UNSPEC_MEMORY_BLOCKAGE)))\n", pat, &r);
 }
 
 /* Run all target-specific selftests.  */
-- 
cgit v1.1


From 7df767471109ff30770dfb9aeea46f52d7c85d20 Mon Sep 17 00:00:00 2001
From: "Naveen H.S" <Naveen.Hurugalawadi@cavium.com>
Date: Wed, 7 Dec 2016 03:10:59 +0000
Subject: 2016-12-07  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>

gcc
	* config/aarch64/aarch64.c
	(aarch64_builtin_support_vector_misalignment): New.
	(TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT): Define.
gcc/testsuite
	* gcc.target/aarch64/pr71727.c : New Testcase.

From-SVN: r243333
---
 gcc/ChangeLog                              |  6 +++++
 gcc/config/aarch64/aarch64.c               | 39 ++++++++++++++++++++++++++++++
 gcc/testsuite/ChangeLog                    |  4 +++
 gcc/testsuite/gcc.target/aarch64/pr71727.c | 33 +++++++++++++++++++++++++
 4 files changed, 82 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr71727.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d30345f..c1c148a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
+
+	* config/aarch64/aarch64.c
+	(aarch64_builtin_support_vector_misalignment): New.
+	(TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT): Define.
+
 2016-12-06  David Malcolm  <dmalcolm@redhat.com>
 
 	PR bootstrap/78705
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index af3aa0b..dab46b5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -141,6 +141,10 @@ static bool aarch64_vector_mode_supported_p (machine_mode);
 static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
 						 const unsigned char *sel);
 static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
+static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
+							 const_tree type,
+							 int misalignment,
+							 bool is_packed);
 
 /* Major revision number of the ARM Architecture implemented by the target.  */
 unsigned aarch64_architecture_version;
@@ -11412,6 +11416,37 @@ aarch64_simd_vector_alignment_reachable (const_tree type, bool is_packed)
   return true;
 }
 
+/* Return true if the vector misalignment factor is supported by the
+   target.  */
+static bool
+aarch64_builtin_support_vector_misalignment (machine_mode mode,
+					     const_tree type, int misalignment,
+					     bool is_packed)
+{
+  if (TARGET_SIMD && STRICT_ALIGNMENT)
+    {
+      /* Return if movmisalign pattern is not supported for this mode.  */
+      if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
+        return false;
+
+      if (misalignment == -1)
+	{
+	  /* Misalignment factor is unknown at compile time but we know
+	     it's word aligned.  */
+	  if (aarch64_simd_vector_alignment_reachable (type, is_packed))
+            {
+              int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
+
+              if (element_size != 64)
+                return true;
+            }
+	  return false;
+	}
+    }
+  return default_builtin_support_vector_misalignment (mode, type, misalignment,
+						      is_packed);
+}
+
 /* If VALS is a vector constant that can be loaded into a register
    using DUP, generate instructions to do so and return an RTX to
    assign to the register.  Otherwise return NULL_RTX.  */
@@ -14824,6 +14859,10 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P aarch64_vector_mode_supported_p
 
+#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
+#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
+  aarch64_builtin_support_vector_misalignment
+
 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 #define TARGET_ARRAY_MODE_SUPPORTED_P aarch64_array_mode_supported_p
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6090a96..60f239f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-07  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
+
+	* gcc.target/aarch64/pr71727.c : New Testcase.
+
 2016-12-06  Tom de Vries  <tom@codesourcery.com>
 
 	PR tree-optimization/67955
diff --git a/gcc/testsuite/gcc.target/aarch64/pr71727.c b/gcc/testsuite/gcc.target/aarch64/pr71727.c
new file mode 100644
index 0000000..05eef3e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr71727.c
@@ -0,0 +1,33 @@
+/* { dg-do compile } */
+/* { dg-options "-mstrict-align -O3" } */
+
+struct test_struct_s
+{
+  long a;
+  long b;
+  long c;
+  long d;
+  unsigned long e;
+};
+
+
+char _a;
+struct test_struct_s xarray[128];
+
+void
+_start (void)
+{
+  struct test_struct_s *new_entry;
+
+  new_entry = &xarray[0];
+  new_entry->a = 1;
+  new_entry->b = 2;
+  new_entry->c = 3;
+  new_entry->d = 4;
+  new_entry->e = 5;
+
+  return;
+}
+
+/* { dg-final { scan-assembler-times "mov\tx" 5 {target lp64} } } */
+/* { dg-final { scan-assembler-not "add\tx0, x0, :" {target lp64} } } */
-- 
cgit v1.1


From 81a58ffbb7d6dd26bf13a09d293e6082aceda520 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
Date: Wed, 7 Dec 2016 08:53:45 +0000
Subject: Compile gcc.target/i386/pr70322-?.c with -mno-stackrealign

	* gcc.target/i386/pr70322-1.c: Add -mno-stackrealign to dg-options.
	* gcc.target/i386/pr70322-2.c: Likewise.
	* gcc.target/i386/pr70322-3.c: Likewise.
	* gcc.target/i386/pr70322-4.c: Likewise.

From-SVN: r243334
---
 gcc/testsuite/ChangeLog                   | 7 +++++++
 gcc/testsuite/gcc.target/i386/pr70322-1.c | 2 +-
 gcc/testsuite/gcc.target/i386/pr70322-2.c | 2 +-
 gcc/testsuite/gcc.target/i386/pr70322-3.c | 2 +-
 gcc/testsuite/gcc.target/i386/pr70322-4.c | 2 +-
 5 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 60f239f..2b9aad0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-07  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
+
+	* gcc.target/i386/pr70322-1.c: Add -mno-stackrealign to dg-options.
+	* gcc.target/i386/pr70322-2.c: Likewise.
+	* gcc.target/i386/pr70322-3.c: Likewise.
+	* gcc.target/i386/pr70322-4.c: Likewise.
+
 2016-12-07  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
 
 	* gcc.target/aarch64/pr71727.c : New Testcase.
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-1.c b/gcc/testsuite/gcc.target/i386/pr70322-1.c
index bc10675..0bbd215 100644
--- a/gcc/testsuite/gcc.target/i386/pr70322-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr70322-1.c
@@ -1,6 +1,6 @@
 /* PR target/70322 */
 /* { dg-do compile { target ia32 } } */
-/* { dg-options "-O2 -msse2 -mstv -mbmi" } */
+/* { dg-options "-O2 -msse2 -mstv -mbmi -mno-stackrealign" } */
 /* { dg-final { scan-assembler "pandn" } } */
 
 extern long long z;
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-2.c b/gcc/testsuite/gcc.target/i386/pr70322-2.c
index a683b6d..d2a764e 100644
--- a/gcc/testsuite/gcc.target/i386/pr70322-2.c
+++ b/gcc/testsuite/gcc.target/i386/pr70322-2.c
@@ -1,6 +1,6 @@
 /* PR target/70322 */
 /* { dg-do compile { target ia32 } } */
-/* { dg-options "-O2 -msse2 -mstv -mno-bmi" } */
+/* { dg-options "-O2 -msse2 -mstv -mno-bmi -mno-stackrealign" } */
 /* { dg-final { scan-assembler "pandn" } } */
 
 extern long long z;
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-3.c b/gcc/testsuite/gcc.target/i386/pr70322-3.c
index 89a8da3..466708f 100644
--- a/gcc/testsuite/gcc.target/i386/pr70322-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr70322-3.c
@@ -1,6 +1,6 @@
 /* PR target/70322 */
 /* { dg-do compile { target ia32 } } */
-/* { dg-options "-O2 -msse2 -mstv" } */
+/* { dg-options "-O2 -msse2 -mstv -mno-stackrealign" } */
 /* { dg-final { scan-assembler "pxor" } } */
 /* { dg-final { scan-assembler "por" } } */
 
diff --git a/gcc/testsuite/gcc.target/i386/pr70322-4.c b/gcc/testsuite/gcc.target/i386/pr70322-4.c
index 8a02b9b..32ee73e 100644
--- a/gcc/testsuite/gcc.target/i386/pr70322-4.c
+++ b/gcc/testsuite/gcc.target/i386/pr70322-4.c
@@ -1,6 +1,6 @@
 /* PR target/70322 */
 /* { dg-do compile { target ia32 } } */
-/* { dg-options "-O2 -msse2 -mstv" } */
+/* { dg-options "-O2 -msse2 -mstv -mno-stackrealign" } */
 /* { dg-final { scan-assembler "psub" } } */
 /* { dg-final { scan-assembler "por" } } */
 
-- 
cgit v1.1


From ebf417348808be4608fd44ba3b2863b735b4324e Mon Sep 17 00:00:00 2001
From: Bin Cheng <bin.cheng@arm.com>
Date: Wed, 7 Dec 2016 10:14:58 +0000
Subject: re PR middle-end/78691 (ICE compiling Linux boot code)

	PR tree-optimization/78691
	* match.pd ((convert1 (minmax ((convert2 (x) c)))) -> minmax (x c)):
	Require integral type for the outer expression.
	gcc/testsuite
	PR tree-optimization/78691
	* gcc.target/i386/pr78691-i386.c: New test.
	* gcc.target/powerpc/pr78691-ppc.c: New test.

From-SVN: r243335
---
 gcc/ChangeLog                                  |  6 ++++++
 gcc/match.pd                                   |  3 ++-
 gcc/testsuite/ChangeLog                        |  6 ++++++
 gcc/testsuite/gcc.target/i386/pr78691-i386.c   | 14 ++++++++++++++
 gcc/testsuite/gcc.target/powerpc/pr78691-ppc.c | 18 ++++++++++++++++++
 5 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr78691-i386.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr78691-ppc.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c1c148a..4f76570 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/78691
+	* match.pd ((convert1 (minmax ((convert2 (x) c)))) -> minmax (x c)):
+	Require integral type for the outer expression.
+
 2016-12-07  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
 
 	* config/aarch64/aarch64.c
diff --git a/gcc/match.pd b/gcc/match.pd
index 1fe003b..feaa4a1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1378,7 +1378,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (for minmax (min max)
  (simplify
   (convert (minmax@0 (convert @1) INTEGER_CST@2))
-  (if (types_match (@1, type) && int_fits_type_p (@2, type)
+  (if (INTEGRAL_TYPE_P (type)
+       && types_match (@1, type) && int_fits_type_p (@2, type)
        && TYPE_SIGN (TREE_TYPE (@0)) == TYPE_SIGN (type)
        && TYPE_PRECISION (TREE_TYPE (@0)) > TYPE_PRECISION (type))
    (minmax @1 (convert @2)))))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2b9aad0..eeeae2e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/78691
+	* gcc.target/i386/pr78691-i386.c: New test.
+	* gcc.target/powerpc/pr78691-ppc.c: New test.
+
 2016-12-07  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	* gcc.target/i386/pr70322-1.c: Add -mno-stackrealign to dg-options.
diff --git a/gcc/testsuite/gcc.target/i386/pr78691-i386.c b/gcc/testsuite/gcc.target/i386/pr78691-i386.c
new file mode 100644
index 0000000..f4b8855
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr78691-i386.c
@@ -0,0 +1,14 @@
+/* PR tree-optimization/78691 */
+/* { dg-options "-Os -m16" } */
+
+int fn1(char *p1, char *p2) {
+  int a;
+  for (;;)
+    switch (*p2) {
+    case 'c':
+      while (--a > 0)
+        *p1++ = ' ';
+      p1++;
+      a--;
+    }
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/pr78691-ppc.c b/gcc/testsuite/gcc.target/powerpc/pr78691-ppc.c
new file mode 100644
index 0000000..73e3c87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr78691-ppc.c
@@ -0,0 +1,18 @@
+/* PR tree-optimization/78691 */
+/* { dg-options "-Os -m32" } */
+
+int *b;
+int fn1() {
+  char *c;
+  int a;
+  for (;;)
+    switch (*b) {
+    case 'c':
+      while (--a > 0)
+        *c++ = ' ';
+      c++;
+      if (a)
+        a = sizeof(void *);
+    }
+}
+
-- 
cgit v1.1


From 2c1f5c0a344e2f127decf13eb851794401b54e29 Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Wed, 7 Dec 2016 11:25:37 +0000
Subject: decl.c (gnat_to_gnu_entity): Also call finish_character_type on
 Character subtypes.

	* gcc-interface/decl.c (gnat_to_gnu_entity) <E_Enumeration_Subtype>:
	Also call finish_character_type on Character subtypes.
	* gcc-interface/utils.c (finish_character_type): Deal with subtypes.

From-SVN: r243336
---
 gcc/ada/ChangeLog             |  6 ++++++
 gcc/ada/gcc-interface/decl.c  | 10 ++++++++--
 gcc/ada/gcc-interface/utils.c | 28 +++++++++++++++++++++++-----
 3 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog
index 858f8cb..3087a1b 100644
--- a/gcc/ada/ChangeLog
+++ b/gcc/ada/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Eric Botcazou  <ebotcazou@adacore.com>
+
+	* gcc-interface/decl.c (gnat_to_gnu_entity) <E_Enumeration_Subtype>:
+	Also call finish_character_type on Character subtypes.
+	* gcc-interface/utils.c (finish_character_type): Deal with subtypes.
+
 2016-12-05  Mikael Pettersson  <mikpe@it.uu.se>
 
 	PR ada/48835
diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
index 9de85ef..2412a36 100644
--- a/gcc/ada/gcc-interface/decl.c
+++ b/gcc/ada/gcc-interface/decl.c
@@ -1859,8 +1859,14 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
       TYPE_BIASED_REPRESENTATION_P (gnu_type)
 	= Has_Biased_Representation (gnat_entity);
 
-      /* Set TYPE_STRING_FLAG for Character and Wide_Character subtypes.  */
-      TYPE_STRING_FLAG (gnu_type) = TYPE_STRING_FLAG (TREE_TYPE (gnu_type));
+      /* Do the same processing for Character subtypes as for types.  */
+      if (TYPE_STRING_FLAG (TREE_TYPE (gnu_type)))
+	{
+	  TYPE_NAME (gnu_type) = gnu_entity_name;
+	  TYPE_STRING_FLAG (gnu_type) = 1;
+	  TYPE_ARTIFICIAL (gnu_type) = artificial_p;
+	  finish_character_type (gnu_type);
+	}
 
       /* Inherit our alias set from what we're a subtype of.  Subtypes
 	 are not different types and a pointer can designate any instance
diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c
index cde17fe..c00d1fa 100644
--- a/gcc/ada/gcc-interface/utils.c
+++ b/gcc/ada/gcc-interface/utils.c
@@ -1641,10 +1641,7 @@ record_builtin_type (const char *name, tree type, bool artificial_p)
   character subtypes with RM_Size = Esize = CHAR_TYPE_SIZE into signed
   types.  The idea is to ensure that the bit pattern contained in the
   Esize'd objects is not changed, even though the numerical value will
-  be interpreted differently depending on the signedness.
-
-  For character types, the bounds are implicit and, therefore, need to
-  be adjusted.  Morever, the debug info needs the unsigned version.  */
+  be interpreted differently depending on the signedness.  */
 
 void
 finish_character_type (tree char_type)
@@ -1658,11 +1655,32 @@ finish_character_type (tree char_type)
        ? unsigned_char_type_node
        : copy_type (gnat_unsigned_type_for (char_type)));
 
+  /* Create an unsigned version of the type and set it as debug type.  */
   TYPE_NAME (unsigned_char_type) = TYPE_NAME (char_type);
   TYPE_STRING_FLAG (unsigned_char_type) = TYPE_STRING_FLAG (char_type);
   TYPE_ARTIFICIAL (unsigned_char_type) = TYPE_ARTIFICIAL (char_type);
-
   SET_TYPE_DEBUG_TYPE (char_type, unsigned_char_type);
+
+  /* If this is a subtype, make the debug type a subtype of the debug type
+     of the base type and convert literal bounds to unsigned.  */
+  if (TREE_TYPE (char_type))
+    {
+      tree base_unsigned_char_type = TYPE_DEBUG_TYPE (TREE_TYPE (char_type));
+      tree min_value = TYPE_MIN_VALUE (char_type);
+      tree max_value = TYPE_MAX_VALUE (char_type);
+
+      if (TREE_CODE (min_value) == INTEGER_CST)
+	min_value = fold_convert (base_unsigned_char_type, min_value);
+      if (TREE_CODE (max_value) == INTEGER_CST)
+	max_value = fold_convert (base_unsigned_char_type, max_value);
+
+      TREE_TYPE (unsigned_char_type) = base_unsigned_char_type;
+      SET_TYPE_RM_MIN_VALUE (unsigned_char_type, min_value);
+      SET_TYPE_RM_MAX_VALUE (unsigned_char_type, max_value);
+    }
+
+  /* Adjust the bounds of the original type to unsigned; that's especially
+     important for types since they are implicit in this case.  */
   SET_TYPE_RM_MIN_VALUE (char_type, TYPE_MIN_VALUE (unsigned_char_type));
   SET_TYPE_RM_MAX_VALUE (char_type, TYPE_MAX_VALUE (unsigned_char_type));
 }
-- 
cgit v1.1


From aa35e3ba8ef4cd36991a877063632edef552068b Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@gcc.gnu.org>
Date: Wed, 7 Dec 2016 11:29:28 +0000
Subject: Adjust comment

From-SVN: r243338
---
 gcc/ada/gcc-interface/utils.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/ada/gcc-interface/utils.c b/gcc/ada/gcc-interface/utils.c
index c00d1fa..0ae381f 100644
--- a/gcc/ada/gcc-interface/utils.c
+++ b/gcc/ada/gcc-interface/utils.c
@@ -1662,12 +1662,12 @@ finish_character_type (tree char_type)
   SET_TYPE_DEBUG_TYPE (char_type, unsigned_char_type);
 
   /* If this is a subtype, make the debug type a subtype of the debug type
-     of the base type and convert literal bounds to unsigned.  */
+     of the base type and convert literal RM bounds to unsigned.  */
   if (TREE_TYPE (char_type))
     {
       tree base_unsigned_char_type = TYPE_DEBUG_TYPE (TREE_TYPE (char_type));
-      tree min_value = TYPE_MIN_VALUE (char_type);
-      tree max_value = TYPE_MAX_VALUE (char_type);
+      tree min_value = TYPE_RM_MIN_VALUE (char_type);
+      tree max_value = TYPE_RM_MAX_VALUE (char_type);
 
       if (TREE_CODE (min_value) == INTEGER_CST)
 	min_value = fold_convert (base_unsigned_char_type, min_value);
@@ -1679,7 +1679,7 @@ finish_character_type (tree char_type)
       SET_TYPE_RM_MAX_VALUE (unsigned_char_type, max_value);
     }
 
-  /* Adjust the bounds of the original type to unsigned; that's especially
+  /* Adjust the RM bounds of the original type to unsigned; that's especially
      important for types since they are implicit in this case.  */
   SET_TYPE_RM_MIN_VALUE (char_type, TYPE_MIN_VALUE (unsigned_char_type));
   SET_TYPE_RM_MAX_VALUE (char_type, TYPE_MAX_VALUE (unsigned_char_type));
-- 
cgit v1.1


From f2e04c79ac2c4ffff6bc527afdc1b0311b79e36a Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Wed, 7 Dec 2016 11:47:25 +0000
Subject: decl.c (gnat_to_gnu_entity): Translate System.Address into
 ptr_type_node for every foreign convention.

	* gcc-interface/decl.c (gnat_to_gnu_entity) <E_Variable>: Translate
	System.Address into ptr_type_node for every foreign convention.
	(gnat_to_gnu_subprog_type): Likewise for result and parameter types.
	(gnat_to_gnu_param): Do not do it here for GCC builtins.
	(intrin_return_compatible_p): Likewise.

From-SVN: r243340
---
 gcc/ada/ChangeLog            |  8 ++++++++
 gcc/ada/gcc-interface/decl.c | 20 +++++++-------------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog
index 3087a1b..2380adc 100644
--- a/gcc/ada/ChangeLog
+++ b/gcc/ada/ChangeLog
@@ -1,5 +1,13 @@
 2016-12-07  Eric Botcazou  <ebotcazou@adacore.com>
 
+	* gcc-interface/decl.c (gnat_to_gnu_entity) <E_Variable>: Translate
+	System.Address into ptr_type_node for every foreign convention.
+	(gnat_to_gnu_subprog_type): Likewise for result and parameter types.
+	(gnat_to_gnu_param): Do not do it here for GCC builtins.
+	(intrin_return_compatible_p): Likewise.
+
+2016-12-07  Eric Botcazou  <ebotcazou@adacore.com>
+
 	* gcc-interface/decl.c (gnat_to_gnu_entity) <E_Enumeration_Subtype>:
 	Also call finish_character_type on Character subtypes.
 	* gcc-interface/utils.c (finish_character_type): Deal with subtypes.
diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
index 2412a36..a6e8367 100644
--- a/gcc/ada/gcc-interface/decl.c
+++ b/gcc/ada/gcc-interface/decl.c
@@ -645,7 +645,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 	  }
 
 	/* Get the type after elaborating the renamed object.  */
-	if (Convention (gnat_entity) == Convention_C
+	if (Has_Foreign_Convention (gnat_entity)
 	    && Is_Descendant_Of_Address (gnat_type))
 	  gnu_type = ptr_type_node;
 	else
@@ -5404,12 +5404,6 @@ gnat_to_gnu_param (Entity_Id gnat_param, tree gnu_param_type, bool first,
     gnu_param_type
       = TREE_TYPE (TREE_TYPE (TYPE_FIELDS (TREE_TYPE (gnu_param_type))));
 
-  /* For GCC builtins, pass Address integer types as (void *)  */
-  if (Convention (gnat_subprog) == Convention_Intrinsic
-      && Present (Interface_Name (gnat_subprog))
-      && Is_Descendant_Of_Address (gnat_param_type))
-    gnu_param_type = ptr_type_node;
-
   /* Arrays are passed as pointers to element type for foreign conventions.  */
   if (foreign && mech != By_Copy && TREE_CODE (gnu_param_type) == ARRAY_TYPE)
     {
@@ -5784,7 +5778,9 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition,
 
   else
     {
-      if (Convention (gnat_subprog) == Convention_C
+      /* For foreign convention subprograms, return System.Address as void *
+	 or equivalent.  Note that this comprises GCC builtins.  */
+      if (Has_Foreign_Convention (gnat_subprog)
 	  && Is_Descendant_Of_Address (gnat_return_type))
 	gnu_return_type = ptr_type_node;
       else
@@ -5949,7 +5945,9 @@ gnat_to_gnu_subprog_type (Entity_Id gnat_subprog, bool definition,
 	{
 	  Entity_Id gnat_param_type = Etype (gnat_param);
 
-	  if (Convention (gnat_subprog) == Convention_C
+	  /* For foreign convention subprograms, pass System.Address as void *
+	     or equivalent.  Note that this comprises GCC builtins.  */
+	  if (Has_Foreign_Convention (gnat_subprog)
 	      && Is_Descendant_Of_Address (gnat_param_type))
 	    gnu_param_type = ptr_type_node;
 	  else
@@ -8910,10 +8908,6 @@ intrin_return_compatible_p (intrin_binding_t * inb)
       && !VOID_TYPE_P (btin_return_type))
     return true;
 
-  /* If return type is Address (integer type), map it to void *.  */
-  if (Is_Descendant_Of_Address (Etype (inb->gnat_entity)))
-    ada_return_type = ptr_type_node;
-
   /* Check return types compatibility otherwise.  Note that this
      handles void/void as well.  */
   if (intrin_types_incompatible_p (btin_return_type, ada_return_type))
-- 
cgit v1.1


From bbe9a71dff7c5684dbdca1c1befc93099b411636 Mon Sep 17 00:00:00 2001
From: Pierre-Marie de Rodat <derodat@adacore.com>
Date: Wed, 7 Dec 2016 12:04:01 +0000
Subject: decl.c (gnat_to_gnu_entity): When they are global...

	* gcc-interface/decl.c (gnat_to_gnu_entity): When they are global,
	consider ___XR GNAT encodings variables for renamings as static so
	they have a location in the debug info.

From-SVN: r243341
---
 gcc/ada/ChangeLog            | 6 ++++++
 gcc/ada/gcc-interface/decl.c | 1 +
 2 files changed, 7 insertions(+)

diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog
index 2380adc..0acae57 100644
--- a/gcc/ada/ChangeLog
+++ b/gcc/ada/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Pierre-Marie de Rodat  <derodat@adacore.com>
+
+	* gcc-interface/decl.c (gnat_to_gnu_entity): When they are global,
+	consider ___XR GNAT encodings variables for renamings as static so
+	they have a location in the debug info.
+
 2016-12-07  Eric Botcazou  <ebotcazou@adacore.com>
 
 	* gcc-interface/decl.c (gnat_to_gnu_entity) <E_Variable>: Translate
diff --git a/gcc/ada/gcc-interface/decl.c b/gcc/ada/gcc-interface/decl.c
index a6e8367..18ec63d 100644
--- a/gcc/ada/gcc-interface/decl.c
+++ b/gcc/ada/gcc-interface/decl.c
@@ -672,6 +672,7 @@ gnat_to_gnu_entity (Entity_Id gnat_entity, tree gnu_expr, bool definition)
 				   VAR_DECL, gnu_entity_name, gnu_type);
 	    SET_DECL_VALUE_EXPR (gnu_decl, value);
 	    DECL_HAS_VALUE_EXPR_P (gnu_decl) = 1;
+	    TREE_STATIC (gnu_decl) = global_bindings_p ();
 	    gnat_pushdecl (gnu_decl, gnat_entity);
 	    break;
 	  }
-- 
cgit v1.1


From b3235e974fee0ce362afafed93a8e5916ae790e1 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Wed, 7 Dec 2016 12:52:39 +0000
Subject: cp-tree.h (enum cp_tree_index): Add CPTI_AUTO_IDENTIFIER &
 CPTI_DECLTYPE_AUTO_IDENTIFIER.

	* cp-tree.h (enum cp_tree_index): Add CPTI_AUTO_IDENTIFIER &
	CPTI_DECLTYPE_AUTO_IDENTIFIER.
	(auto_identifier, decltype_auto_identifier): New.
	*decl.c (initialize_predefined_identifiers): Add 'auto' and
	'decltype(auto)'.
	(grokdeclarator): Use cached identifier.
	* pt.c (make_decltype_auto, make_auto, make_constrained_auto,
	is_auto): Likewise.

From-SVN: r243342
---
 gcc/cp/ChangeLog | 11 +++++++++++
 gcc/cp/cp-tree.h |  5 +++++
 gcc/cp/decl.c    |  4 +++-
 gcc/cp/pt.c      | 10 +++++-----
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index f16813d..3964cfd 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,14 @@
+2016-12-07  Nathan Sidwell  <nathan@acm.org>
+
+	* cp-tree.h (enum cp_tree_index): Add CPTI_AUTO_IDENTIFIER &
+	CPTI_DECLTYPE_AUTO_IDENTIFIER.
+	(auto_identifier, decltype_auto_identifier): New.
+	*decl.c (initialize_predefined_identifiers): Add 'auto' and
+	'decltype(auto)'.
+	(grokdeclarator): Use cached identifier.
+	* pt.c (make_decltype_auto, make_auto, make_constrained_auto,
+	is_auto): Likewise.
+
 2016-12-02  Jakub Jelinek  <jakub@redhat.com>
 
 	PR c++/78649
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 5674886..f7da76a 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -1117,6 +1117,8 @@ enum cp_tree_index
     CPTI_PFN_IDENTIFIER,
     CPTI_VPTR_IDENTIFIER,
     CPTI_STD_IDENTIFIER,
+    CPTI_AUTO_IDENTIFIER,
+    CPTI_DECLTYPE_AUTO_IDENTIFIER,
 
     CPTI_LANG_NAME_C,
     CPTI_LANG_NAME_CPLUSPLUS,
@@ -1200,6 +1202,9 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
 #define vptr_identifier			cp_global_trees[CPTI_VPTR_IDENTIFIER]
 /* The name of the std namespace.  */
 #define std_identifier			cp_global_trees[CPTI_STD_IDENTIFIER]
+/* auto and declspec(auto) identifiers.  */
+#define auto_identifier			cp_global_trees[CPTI_AUTO_IDENTIFIER]
+#define decltype_auto_identifier	cp_global_trees[CPTI_DECLTYPE_AUTO_IDENTIFIER]
 /* The name of a C++17 deduction guide.  */
 #define lang_name_c			cp_global_trees[CPTI_LANG_NAME_C]
 #define lang_name_cplusplus		cp_global_trees[CPTI_LANG_NAME_CPLUSPLUS]
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index f0850d7..d7dbf94 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4034,6 +4034,8 @@ initialize_predefined_identifiers (void)
     { "__vtt_parm", &vtt_parm_identifier, 0 },
     { "::", &global_scope_name, 0 },
     { "std", &std_identifier, 0 },
+    { "auto", &auto_identifier, 0 },
+    { "decltype(auto)", &decltype_auto_identifier, 0 },
     { NULL, NULL, 0 }
   };
 
@@ -10600,7 +10602,7 @@ grokdeclarator (const cp_declarator *declarator,
 	  gcc_unreachable ();
 	}
       if (TREE_CODE (type) != TEMPLATE_TYPE_PARM
-	  || TYPE_IDENTIFIER (type) != get_identifier ("auto"))
+	  || TYPE_IDENTIFIER (type) != auto_identifier)
 	{
 	  if (type != error_mark_node)
 	    {
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index b51e580..3b80ca4 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -24302,13 +24302,13 @@ make_auto_1 (tree name, bool set_canonical)
 tree
 make_decltype_auto (void)
 {
-  return make_auto_1 (get_identifier ("decltype(auto)"), true);
+  return make_auto_1 (decltype_auto_identifier, true);
 }
 
 tree
 make_auto (void)
 {
-  return make_auto_1 (get_identifier ("auto"), true);
+  return make_auto_1 (auto_identifier, true);
 }
 
 /* Return a C++17 deduction placeholder for class template TMPL.  */
@@ -24330,7 +24330,7 @@ make_template_placeholder (tree tmpl)
 tree
 make_constrained_auto (tree con, tree args)
 {
-  tree type = make_auto_1 (get_identifier ("auto"), false);
+  tree type = make_auto_1 (auto_identifier, false);
 
   /* Build the constraint. */
   tree tmpl = DECL_TI_TEMPLATE (con);
@@ -25016,8 +25016,8 @@ bool
 is_auto (const_tree type)
 {
   if (TREE_CODE (type) == TEMPLATE_TYPE_PARM
-      && (TYPE_IDENTIFIER (type) == get_identifier ("auto")
-	  || TYPE_IDENTIFIER (type) == get_identifier ("decltype(auto)")
+      && (TYPE_IDENTIFIER (type) == auto_identifier
+	  || TYPE_IDENTIFIER (type) == decltype_auto_identifier
 	  || CLASS_PLACEHOLDER_TEMPLATE (type)))
     return true;
   else
-- 
cgit v1.1


From 098eae0067631ae7a8d98ac19ff47cb89ec2ba25 Mon Sep 17 00:00:00 2001
From: Nathan Sidwell <nathan@acm.org>
Date: Wed, 7 Dec 2016 13:00:02 +0000
Subject: pt.c (tsubst <{NON,}TYPE_ARGUMENT_PACK>): Simplify control flow and
 avoid re-tsubsting type.

	* pt.c (tsubst <{NON,}TYPE_ARGUMENT_PACK>: Simplify control flow
	and avoid re-tsubsting type.

From-SVN: r243343
---
 gcc/cp/ChangeLog |  3 +++
 gcc/cp/pt.c      | 31 ++++++++++++++++---------------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 3964cfd..c666f16 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-07  Nathan Sidwell  <nathan@acm.org>
 
+	* pt.c (tsubst <{NON,}TYPE_ARGUMENT_PACK>: Simplify control flow
+	and avoid re-tsubsting type.
+
 	* cp-tree.h (enum cp_tree_index): Add CPTI_AUTO_IDENTIFIER &
 	CPTI_DECLTYPE_AUTO_IDENTIFIER.
 	(auto_identifier, decltype_auto_identifier): New.
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 3b80ca4..97d0b48 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -13795,22 +13795,23 @@ tsubst (tree t, tree args, tsubst_flags_t complain, tree in_decl)
     case TYPE_ARGUMENT_PACK:
     case NONTYPE_ARGUMENT_PACK:
       {
-        tree r = TYPE_P (t) ? cxx_make_type (code) : make_node (code);
-        tree packed_out = 
-          tsubst_template_args (ARGUMENT_PACK_ARGS (t), 
-                                args,
-                                complain,
-                                in_decl);
-        SET_ARGUMENT_PACK_ARGS (r, packed_out);
-
-        /* For template nontype argument packs, also substitute into
-           the type.  */
-        if (code == NONTYPE_ARGUMENT_PACK)
-          TREE_TYPE (r) = tsubst (TREE_TYPE (t), args, complain, in_decl);
-
-        return r;
+        tree r;
+
+	if (code == NONTYPE_ARGUMENT_PACK)
+	  {
+	    r = make_node (code);
+	    /* Set the already-substituted type.  */
+	    TREE_TYPE (r) = type;
+	  }
+	else
+	  r = cxx_make_type (code);
+
+	tree pack_args = ARGUMENT_PACK_ARGS (t);
+	pack_args = tsubst_template_args (pack_args, args, complain, in_decl);
+	SET_ARGUMENT_PACK_ARGS (r, pack_args);
+
+	return r;
       }
-      break;
 
     case VOID_CST:
     case INTEGER_CST:
-- 
cgit v1.1


From 716c5aced1f3e998df4a5d78eba4e090a332acee Mon Sep 17 00:00:00 2001
From: Martin Jambor <mjambor@suse.cz>
Date: Wed, 7 Dec 2016 14:09:07 +0100
Subject: Use dump_function_name rather than emit <built-in>

2016-12-07  Martin Jambor  <mjambor@suse.cz>

	PR c++/78589
	* error.c (dump_decl): Use dump_function_name to dump
	!DECL_LANG_SPECIFIC function decls with no or self-referencing
	abstract origin.

From-SVN: r243344
---
 gcc/cp/ChangeLog | 7 +++++++
 gcc/cp/error.c   | 5 +++--
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index c666f16..a59d25b 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-07  Martin Jambor  <mjambor@suse.cz>
+
+	PR c++/78589
+	* error.c (dump_decl): Use dump_function_name to dump
+	!DECL_LANG_SPECIFIC function decls with no or self-referencing
+	abstract origin.
+
 2016-12-07  Nathan Sidwell  <nathan@acm.org>
 
 	* pt.c (tsubst <{NON,}TYPE_ARGUMENT_PACK>: Simplify control flow
diff --git a/gcc/cp/error.c b/gcc/cp/error.c
index 7bf07c3..5f8fb2a 100644
--- a/gcc/cp/error.c
+++ b/gcc/cp/error.c
@@ -1216,10 +1216,11 @@ dump_decl (cxx_pretty_printer *pp, tree t, int flags)
     case FUNCTION_DECL:
       if (! DECL_LANG_SPECIFIC (t))
 	{
-	  if (DECL_ABSTRACT_ORIGIN (t))
+	  if (DECL_ABSTRACT_ORIGIN (t)
+	      && DECL_ABSTRACT_ORIGIN (t) != t)
 	    dump_decl (pp, DECL_ABSTRACT_ORIGIN (t), flags);
 	  else
-	    pp_string (pp, M_("<built-in>"));
+	    dump_function_name (pp, t, flags);
 	}
       else if (DECL_GLOBAL_CTOR_P (t) || DECL_GLOBAL_DTOR_P (t))
 	dump_global_iord (pp, t);
-- 
cgit v1.1


From da88ea0265af5c877429a0c096cfdc0d4b3fecaa Mon Sep 17 00:00:00 2001
From: James Greenhalgh <james.greenhalgh@arm.com>
Date: Wed, 7 Dec 2016 14:01:59 +0000
Subject: [Patch PR78561 PowerPC] Revert to old behaviour for counting constant
 pools

gcc/

	PR rtl-optimization/78561
	* config/rs6000/rs6000.c (rs6000_reg_live_or_pic_offset_p) Use
	constant_pool_empty_p in place of get_pool_size_upper_bound.
	(rs6000_stack_info): Likewise.
	(rs6000_emit_prologue): Likewise.
	(rs6000_elf_declare_function_name): Likewise.
	(rs6000_set_up_by_prologue): Likewise.
	(rs6000_can_eliminate): Likewise.
	* output.h (get_pool_size_upper_bound): Delete.
	(constant_pool_empty_p): New.
	* varasm.c (get_pool_size_upper_bound): Delete
	(constant_pool_empty_p): New.

From-SVN: r243345
---
 gcc/ChangeLog              | 15 +++++++++++++++
 gcc/config/rs6000/rs6000.c | 14 +++++++-------
 gcc/output.h               |  9 ++++-----
 gcc/varasm.c               | 10 ++++++----
 4 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4f76570..fbe5bbe 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,18 @@
+2016-12-07  James Greenhalgh  <james.greenhalgh@arm.com>
+
+	PR rtl-optimization/78561
+	* config/rs6000/rs6000.c (rs6000_reg_live_or_pic_offset_p) Use
+	constant_pool_empty_p in place of get_pool_size_upper_bound.
+	(rs6000_stack_info): Likewise.
+	(rs6000_emit_prologue): Likewise.
+	(rs6000_elf_declare_function_name): Likewise.
+	(rs6000_set_up_by_prologue): Likewise.
+	(rs6000_can_eliminate): Likewise.
+	* output.h (get_pool_size_upper_bound): Delete.
+	(constant_pool_empty_p): New.
+	* varasm.c (get_pool_size_upper_bound): Delete
+	(constant_pool_empty_p): New.
+
 2016-12-07  Bin Cheng  <bin.cheng@arm.com>
 
 	PR tree-optimization/78691
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 59bd3fe..b75a290 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -25460,7 +25460,7 @@ rs6000_reg_live_or_pic_offset_p (int reg)
       if (TARGET_TOC && TARGET_MINIMAL_TOC
 	  && (crtl->calls_eh_return
 	      || df_regs_ever_live_p (reg)
-	      || get_pool_size_upper_bound ()))
+	      || !constant_pool_empty_p ()))
 	return true;
 
       if ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
@@ -26266,7 +26266,7 @@ rs6000_stack_info (void)
 #ifdef TARGET_RELOCATABLE
       || (DEFAULT_ABI == ABI_V4
 	  && (TARGET_RELOCATABLE || flag_pic > 1)
-	  && get_pool_size_upper_bound () != 0)
+	  && !constant_pool_empty_p ())
 #endif
       || rs6000_ra_ever_killed ())
     info->lr_save_p = 1;
@@ -28044,7 +28044,7 @@ rs6000_emit_prologue (void)
 
       /* With -mminimal-toc we may generate an extra use of r2 below.  */
       if (TARGET_TOC && TARGET_MINIMAL_TOC
-	  && get_pool_size_upper_bound () != 0)
+	  && !constant_pool_empty_p ())
 	cfun->machine->r2_setup_needed = true;
     }
 
@@ -28900,7 +28900,7 @@ rs6000_emit_prologue (void)
   /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up.  */
   if (!TARGET_SINGLE_PIC_BASE
       && ((TARGET_TOC && TARGET_MINIMAL_TOC
-	   && get_pool_size_upper_bound () != 0)
+	   && !constant_pool_empty_p ())
 	  || (DEFAULT_ABI == ABI_V4
 	      && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
 	      && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
@@ -34967,7 +34967,7 @@ rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
   if (DEFAULT_ABI == ABI_V4
       && (TARGET_RELOCATABLE || flag_pic > 1)
       && !TARGET_SECURE_PLT
-      && (get_pool_size_upper_bound () != 0 || crtl->profile)
+      && (!constant_pool_empty_p () || crtl->profile)
       && uses_TOC ())
     {
       char buf[256];
@@ -37453,7 +37453,7 @@ rs6000_can_eliminate (const int from, const int to)
 	  ? ! frame_pointer_needed
 	  : from == RS6000_PIC_OFFSET_TABLE_REGNUM
 	    ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC
-		|| get_pool_size_upper_bound () == 0
+		|| constant_pool_empty_p ()
 	    : true);
 }
 
@@ -38990,7 +38990,7 @@ rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
   if (!TARGET_SINGLE_PIC_BASE
       && TARGET_TOC
       && TARGET_MINIMAL_TOC
-      && get_pool_size_upper_bound () != 0)
+      && !constant_pool_empty_p ())
     add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
   if (cfun->machine->split_stack_argp_used)
     add_to_hard_reg_set (&set->set, Pmode, 12);
diff --git a/gcc/output.h b/gcc/output.h
index 7186dc1..6c99381 100644
--- a/gcc/output.h
+++ b/gcc/output.h
@@ -287,11 +287,10 @@ extern void assemble_real (REAL_VALUE_TYPE, machine_mode, unsigned,
 /* Write the address of the entity given by SYMBOL to SEC.  */
 extern void assemble_addr_to_section (rtx, section *);
 
-/* Return the maximum size of the constant pool.  This may be larger
-   than the final size of the constant pool, as entries may be added to
-   the constant pool which become unreferenced, or otherwise not need
-   output by the time we actually emit the pool.  */
-extern int get_pool_size_upper_bound (void);
+/* Return TRUE if and only if the constant pool has no entries.  Note
+   that even entries we might end up choosing not to emit are counted
+   here, so there is the potential for missed optimizations.  */
+extern bool constant_pool_empty_p (void);
 
 extern rtx_insn *peephole (rtx_insn *);
 
diff --git a/gcc/varasm.c b/gcc/varasm.c
index f3cd70a..5b15847 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -3808,12 +3808,14 @@ get_pool_mode (const_rtx addr)
   return SYMBOL_REF_CONSTANT (addr)->mode;
 }
 
-/* Return the size of the constant pool.  */
+/* Return TRUE if and only if the constant pool has no entries.  Note
+   that even entries we might end up choosing not to emit are counted
+   here, so there is the potential for missed optimizations.  */
 
-int
-get_pool_size_upper_bound (void)
+bool
+constant_pool_empty_p (void)
 {
-  return crtl->varasm.pool->offset;
+  return crtl->varasm.pool->first == NULL;
 }
 
 /* Worker function for output_constant_pool_1.  Emit assembly for X
-- 
cgit v1.1


From 8734dfacbbd841ccf1fb9682a631896013442807 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wdijkstr@arm.com>
Date: Wed, 7 Dec 2016 14:44:45 +0000
Subject: Improve TI mode address offsets - these may either use LDP of 64-bit
 or LDR of 128-bit...

Improve TI mode address offsets - these may either use LDP of 64-bit or
LDR of 128-bit, so we need to use the correct intersection of offsets.
When splitting a large offset into base and offset, use a signed 9-bit
unscaled offset.

Remove the Ump constraint on movti and movtf instructions as this blocks
the reload optimizer from merging address CSEs (is this supposed to work
only on 'm' constraints?).  The result is improved codesize, especially
wrf and gamess in SPEC2006.

    gcc/
	* config/aarch64/aarch64.md (movti_aarch64): Change Ump to m.
	(movtf_aarch64): Likewise.
	* config/aarch64/aarch64.c (aarch64_classify_address):
	Use correct intersection of offsets.
	(aarch64_legitimize_address_displacement): Use 9-bit signed offsets.
	(aarch64_legitimize_address): Use 9-bit signed offsets for TI/TF mode.
	Use 7-bit signed scaled mode for modes > 16 bytes.

From-SVN: r243346
---
 gcc/ChangeLog                 | 10 ++++++++++
 gcc/config/aarch64/aarch64.c  | 28 +++++++++++++++-------------
 gcc/config/aarch64/aarch64.md |  8 ++++----
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index fbe5bbe..c1b8784 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2016-12-07  Wilco Dijkstra  <wdijkstr@arm.com>
+
+	* config/aarch64/aarch64.md (movti_aarch64): Change Ump to m.
+	(movtf_aarch64): Likewise.
+	* config/aarch64/aarch64.c (aarch64_classify_address):
+	Use correct intersection of offsets.
+	(aarch64_legitimize_address_displacement): Use 9-bit signed offsets.
+	(aarch64_legitimize_address): Use 9-bit signed offsets for TI/TF mode.
+	Use 7-bit signed scaled mode for modes > 16 bytes.
+
 2016-12-07  James Greenhalgh  <james.greenhalgh@arm.com>
 
 	PR rtl-optimization/78561
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index dab46b5..128f32b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4330,7 +4330,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	     instruction memory accesses.  */
 	  if (mode == TImode || mode == TFmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
-		    && offset_9bit_signed_unscaled_p (mode, offset));
+		    && (offset_9bit_signed_unscaled_p (mode, offset)
+			|| offset_12bit_unsigned_scaled_p (mode, offset)));
 
 	  /* A 7bit offset check because OImode will emit a ldp/stp
 	     instruction (only big endian will get here).
@@ -4534,18 +4535,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
 /* Split an out-of-range address displacement into a base and offset.
    Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
    to increase opportunities for sharing the base address of different sizes.
-   For TI/TFmode and unaligned accesses use a 256-byte range.  */
+   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
 static bool
 aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
 {
-  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+  HOST_WIDE_INT offset = INTVAL (*disp);
+  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
 
-  if (mode == TImode || mode == TFmode ||
-      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
-    mask = 0xff;
+  if (mode == TImode || mode == TFmode
+      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
+    base = (offset + 0x100) & ~0x1ff;
 
-  *off = GEN_INT (INTVAL (*disp) & ~mask);
-  *disp = GEN_INT (INTVAL (*disp) & mask);
+  *off = GEN_INT (base);
+  *disp = GEN_INT (offset - base);
   return true;
 }
 
@@ -5412,12 +5414,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
 	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
 	}
 
-      /* Does it look like we'll need a load/store-pair operation?  */
+      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
       HOST_WIDE_INT base_offset;
-      if (GET_MODE_SIZE (mode) > 16
-	  || mode == TImode)
-	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
-		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
+      if (GET_MODE_SIZE (mode) > 16)
+	base_offset = (offset + 0x400) & ~0x7f0;
       /* For offsets aren't a multiple of the access size, the limit is
 	 -256...255.  */
       else if (offset & (GET_MODE_SIZE (mode) - 1))
@@ -5431,6 +5431,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
       /* Small negative offsets are supported.  */
       else if (IN_RANGE (offset, -256, 0))
 	base_offset = 0;
+      else if (mode == TImode || mode == TFmode)
+	base_offset = (offset + 0x100) & ~0x1ff;
       /* Use 12-bit offset by access size.  */
       else
 	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 1e6b6f5..811a078 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1120,9 +1120,9 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
+	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
 	(match_operand:TI 1
-	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
+	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
@@ -1237,9 +1237,9 @@
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
+	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
 	(match_operand:TF 1
-	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
+	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@
-- 
cgit v1.1


From 0064f49e76154354fa41c13403cac1da9069093a Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wdijkstr@arm.com>
Date: Wed, 7 Dec 2016 14:55:31 +0000
Subject: GCC caches the whether a function is a leaf in crtl->is_leaf.

GCC caches the whether a function is a leaf in crtl->is_leaf. Using this
in the backend is best as leaf_function_p may not work correctly (eg. while
emitting prolog or epilog code).  There are many reads of crtl->is_leaf
before it is initialized.  Many targets do in targetm.frame_pointer_required
(eg. arm, aarch64, i386, mips, sparc), which is called before register
allocation by ira_setup_eliminable_regset and sched_init.

Additionally, SHRINK_WRAPPING_ENABLED calls targetm.have_simple_return,
which evaluates the condition of the simple_return instruction.  On ARM
this results in a call to use_simple_return_p which requires crtl->is_leaf
to be set correctly.

To fix this, initialize crtl->is_leaf in ira_setup_eliminable_regset and
early on in ira.  A bootstrap did not find any uninitialized reads of
crtl->is_leaf on Thumb-2.  A follow-up patch will remove incorrect uses
of leaf_function_p from the ARM backend.

    gcc/
	* gcc/ira.c (ira_setup_eliminable_regset): Initialize crtl->is_leaf.
	(ira): Move initialization of crtl->is_leaf earlier.

From-SVN: r243347
---
 gcc/ChangeLog |  5 +++++
 gcc/ira.c     | 16 +++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c1b8784..5556c78 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,10 @@
 2016-12-07  Wilco Dijkstra  <wdijkstr@arm.com>
 
+	* gcc/ira.c (ira_setup_eliminable_regset): Initialize crtl->is_leaf.
+	(ira): Move initialization of crtl->is_leaf earlier.
+
+2016-12-07  Wilco Dijkstra  <wdijkstr@arm.com>
+
 	* config/aarch64/aarch64.md (movti_aarch64): Change Ump to m.
 	(movtf_aarch64): Likewise.
 	* config/aarch64/aarch64.c (aarch64_classify_address):
diff --git a/gcc/ira.c b/gcc/ira.c
index ab32288..4a95e3d 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -2266,6 +2266,10 @@ ira_setup_eliminable_regset (void)
   int i;
   static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
 
+  /* Setup is_leaf as frame_pointer_required may use it.  This function
+     is called by sched_init before ira if scheduling is enabled.  */
+  crtl->is_leaf = leaf_function_p ();
+
   /* FIXME: If EXIT_IGNORE_STACK is set, we will not save and restore
      sp for alloca.  So we can't eliminate the frame pointer in that
      case.  At some point, we should improve this by emitting the
@@ -5079,6 +5083,13 @@ ira (FILE *f)
 
   clear_bb_flags ();
 
+  /* Determine if the current function is a leaf before running IRA
+     since this can impact optimizations done by the prologue and
+     epilogue thus changing register elimination offsets.
+     Other target callbacks may use crtl->is_leaf too, including
+     SHRINK_WRAPPING_ENABLED, so initialize as early as possible.  */
+  crtl->is_leaf = leaf_function_p ();
+
   /* Perform target specific PIC register initialization.  */
   targetm.init_pic_reg ();
 
@@ -5164,11 +5175,6 @@ ira (FILE *f)
   if (warn_clobbered)
     generate_setjmp_warnings ();
 
-  /* Determine if the current function is a leaf before running IRA
-     since this can impact optimizations done by the prologue and
-     epilogue thus changing register elimination offsets.  */
-  crtl->is_leaf = leaf_function_p ();
-
   if (resize_reg_info () && flag_ira_loop_pressure)
     ira_set_pseudo_classes (true, ira_dump_file);
 
-- 
cgit v1.1


From 63915a91ab7120c3d604fa0a90ca357875ec22a6 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Wed, 7 Dec 2016 15:22:24 +0000
Subject: Improve tests for contents of <new> header

	* testsuite/18_support/headers/new/synopsis.cc: Add C++14 and C++17
	declarations.
	* testsuite/18_support/headers/new/synopsis_cxx98.cc: New test.

From-SVN: r243349
---
 libstdc++-v3/ChangeLog                             |  6 +++
 .../testsuite/18_support/headers/new/synopsis.cc   | 44 ++++++++++++++++++++--
 .../18_support/headers/new/synopsis_cxx98.cc       | 43 +++++++++++++++++++++
 3 files changed, 89 insertions(+), 4 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/18_support/headers/new/synopsis_cxx98.cc

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 138a020..e92e6d8 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Jonathan Wakely  <jwakely@redhat.com>
+
+	* testsuite/18_support/headers/new/synopsis.cc: Add C++14 and C++17
+	declarations.
+	* testsuite/18_support/headers/new/synopsis_cxx98.cc: New test.
+
 2016-12-06  Felipe Magno de Almeida  <felipe@expertisesolutions.com.br>
 
 	* src/c++11/cow-stdexcept.cc: Add special case for 16 bit pointers.
diff --git a/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc b/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc
index f193bf2..8013eea 100644
--- a/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc
+++ b/libstdc++-v3/testsuite/18_support/headers/new/synopsis.cc
@@ -21,18 +21,25 @@
 
 namespace std {
   class bad_alloc;
+  class bad_array_new_length;
   struct nothrow_t;
   extern const nothrow_t nothrow;
   typedef void (*new_handler)();
-  new_handler set_new_handler(new_handler new_p) throw();
   new_handler get_new_handler() noexcept;
+  new_handler set_new_handler(new_handler new_p) noexcept;
+
+#if __cplusplus > 201402L
+  enum class align_val_t : size_t;
+  // static constexpr size_t hardware_destructive_interference_size;
+  // static constexpr size_t hardware_constructive_interference_size;
+#endif
 }
 
-void* operator new(std::size_t size) throw(std::bad_alloc);
-void* operator new(std::size_t size, const std::nothrow_t&) throw();
+void* operator new(std::size_t size);
+void* operator new(std::size_t size, const std::nothrow_t&) noexcept;
 void  operator delete(void* ptr) throw();
 void  operator delete(void* ptr, const std::nothrow_t&) throw();
-void* operator new[](std::size_t size) throw(std::bad_alloc);
+void* operator new[](std::size_t size);
 void* operator new[](std::size_t size, const std::nothrow_t&) throw();
 void  operator delete[](void* ptr) throw();
 void  operator delete[](void* ptr, const std::nothrow_t&) throw();
@@ -41,3 +48,32 @@ void* operator new  (std::size_t size, void* ptr) throw();
 void* operator new[](std::size_t size, void* ptr) throw();
 void  operator delete  (void* ptr, void*) throw();
 void  operator delete[](void* ptr, void*) throw();
+
+#if __cplusplus >= 201402L
+// C++14 sized deallocation functions
+void  operator delete(void* ptr, std::size_t size) noexcept;
+void  operator delete(void* ptr, std::size_t size,
+                      const std::nothrow_t&) noexcept;
+void  operator delete[](void* ptr, std::size_t size) noexcept;
+void  operator delete[](void* ptr, std::size_t size,
+                        const std::nothrow_t&) noexcept;
+#endif
+
+#if __cplusplus > 201402L
+// C++17 (de)allocation functions for types with new-extended alignment
+void* operator new(std::size_t, std::align_val_t);
+void* operator new(std::size_t, std::align_val_t,
+                   const std::nothrow_t&) noexcept;
+void  operator delete(void*, std::align_val_t) noexcept;
+void  operator delete(void*, std::size_t, std::align_val_t) noexcept;
+void  operator delete(void*, std::align_val_t,
+                      const std::nothrow_t&) noexcept;
+
+void* operator new[](std::size_t, std::align_val_t);
+void* operator new[](std::size_t, std::align_val_t,
+                     const std::nothrow_t&) noexcept;
+void  operator delete[](void*, std::align_val_t) noexcept;
+void  operator delete[](void*, std::size_t, std::align_val_t) noexcept;
+void  operator delete[](void*, std::align_val_t,
+                        const std::nothrow_t&) noexcept;
+#endif
diff --git a/libstdc++-v3/testsuite/18_support/headers/new/synopsis_cxx98.cc b/libstdc++-v3/testsuite/18_support/headers/new/synopsis_cxx98.cc
new file mode 100644
index 0000000..2b2abd2
--- /dev/null
+++ b/libstdc++-v3/testsuite/18_support/headers/new/synopsis_cxx98.cc
@@ -0,0 +1,43 @@
+// { dg-options "-std=gnu++98" }
+// { dg-do compile }
+
+// Copyright (C) 2007-2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#include <new>
+
+namespace std {
+  class bad_alloc;
+  struct nothrow_t;
+  extern const nothrow_t nothrow;
+  typedef void (*new_handler)();
+  new_handler set_new_handler(new_handler new_p) throw();
+}
+
+void* operator new(std::size_t size) throw(std::bad_alloc);
+void* operator new(std::size_t size, const std::nothrow_t&) throw();
+void  operator delete(void* ptr) throw();
+void  operator delete(void* ptr, const std::nothrow_t&) throw();
+void* operator new[](std::size_t size) throw(std::bad_alloc);
+void* operator new[](std::size_t size, const std::nothrow_t&) throw();
+void  operator delete[](void* ptr) throw();
+void  operator delete[](void* ptr, const std::nothrow_t&) throw();
+
+void* operator new  (std::size_t size, void* ptr) throw();
+void* operator new[](std::size_t size, void* ptr) throw();
+void  operator delete  (void* ptr, void*) throw();
+void  operator delete[](void* ptr, void*) throw();
-- 
cgit v1.1


From 101ee3cc73dc50f11233b857614b786fcab06c98 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Wed, 7 Dec 2016 15:22:37 +0000
Subject: Disable test using std::set_unexcepted for C++17

	* testsuite/18_support/bad_exception/59392.cc: Disable for C++17.

From-SVN: r243351
---
 libstdc++-v3/ChangeLog                                   | 2 ++
 libstdc++-v3/testsuite/18_support/bad_exception/59392.cc | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index e92e6d8..c9cf692 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,7 @@
 2016-12-07  Jonathan Wakely  <jwakely@redhat.com>
 
+	* testsuite/18_support/bad_exception/59392.cc: Disable for C++17.
+
 	* testsuite/18_support/headers/new/synopsis.cc: Add C++14 and C++17
 	declarations.
 	* testsuite/18_support/headers/new/synopsis_cxx98.cc: New test.
diff --git a/libstdc++-v3/testsuite/18_support/bad_exception/59392.cc b/libstdc++-v3/testsuite/18_support/bad_exception/59392.cc
index 1f11e7a..717f229 100644
--- a/libstdc++-v3/testsuite/18_support/bad_exception/59392.cc
+++ b/libstdc++-v3/testsuite/18_support/bad_exception/59392.cc
@@ -15,6 +15,9 @@
 // with this library; see the file COPYING3.  If not see
 // <http://www.gnu.org/licenses/>.
 
+// { dg-options "-Wno-deprecated" }
+// { dg-do run { target c++14_down } }
+
 #include <exception>
 #include <cstdlib>
 
-- 
cgit v1.1


From ba454dfbbec81fafed8f14b2fe59d895bd0255ac Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Wed, 7 Dec 2016 15:22:44 +0000
Subject: Use _GLIBCXX_THROW macro in bitmap_allocator

	* include/ext/bitmap_allocator.h (bitmap_allocator::_S_refill_pool)
	(bitmap_allocator::_M_allocate_single_object)
	(bitmap_allocator::_M_get): Use _GLIBCXX_THROW macro.

From-SVN: r243352
---
 libstdc++-v3/ChangeLog                      | 4 ++++
 libstdc++-v3/include/ext/bitmap_allocator.h | 6 +++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index c9cf692..53b4511 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,9 @@
 2016-12-07  Jonathan Wakely  <jwakely@redhat.com>
 
+	* include/ext/bitmap_allocator.h (bitmap_allocator::_S_refill_pool)
+	(bitmap_allocator::_M_allocate_single_object)
+	(bitmap_allocator::_M_get): Use _GLIBCXX_THROW macro.
+
 	* testsuite/18_support/bad_exception/59392.cc: Disable for C++17.
 
 	* testsuite/18_support/headers/new/synopsis.cc: Add C++14 and C++17
diff --git a/libstdc++-v3/include/ext/bitmap_allocator.h b/libstdc++-v3/include/ext/bitmap_allocator.h
index 836abc8..80ea64f 100644
--- a/libstdc++-v3/include/ext/bitmap_allocator.h
+++ b/libstdc++-v3/include/ext/bitmap_allocator.h
@@ -648,7 +648,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
      *  equal to that requested.
      */
     size_t*
-    _M_get(size_t __sz) throw(std::bad_alloc);
+    _M_get(size_t __sz) _GLIBCXX_THROW(std::bad_alloc);
 
     /** @brief  This function just clears the internal Free List, and
      *  gives back all the memory to the OS.
@@ -766,7 +766,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        *  the newly acquired block. Having a tight bound.
        */
       void 
-      _S_refill_pool() throw(std::bad_alloc)
+      _S_refill_pool() _GLIBCXX_THROW(std::bad_alloc)
       {
 #if defined _GLIBCXX_DEBUG
 	_S_check_for_free_blocks();
@@ -824,7 +824,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
        *  Amortized Constant time.
        */
       pointer 
-      _M_allocate_single_object() throw(std::bad_alloc)
+      _M_allocate_single_object() _GLIBCXX_THROW(std::bad_alloc)
       {
 #if defined __GTHREADS
 	__scoped_lock __bit_lock(_S_mut);
-- 
cgit v1.1


From 1f153a1d2a97feaca914787f3d08b7d952b4bfe0 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Wed, 7 Dec 2016 15:22:51 +0000
Subject: Replace dynamic exception specifications in testsuite

	* testsuite/util/testsuite_hooks.h (THROW): Define.
	* testsuite/util/replacement_memory_operators.h: Include
	testsuite_hooks.h and use THROW macro.
	* testsuite/util/testsuite_tr1.h: Likewise.
	* testsuite/20_util/allocator/1.cc: Use THROW macro.
	* testsuite/22_locale/locale/cons/12352.cc: Likewise.
	* testsuite/23_containers/vector/zero_sized_allocations.cc: Likewise.
	* testsuite/30_threads/lock_guard/cons/1.cc: Replace dynamic exception
	specification with noexcept-specifier.
	* testsuite/ext/pool_allocator/allocate_chunk.cc: Include
	testsuite_hooks.h and use THROW macro.
	* testsuite/ext/profile/replace_new.cc: Likewise.

From-SVN: r243353
---
 libstdc++-v3/ChangeLog                                    | 13 +++++++++++++
 libstdc++-v3/testsuite/20_util/allocator/1.cc             |  2 +-
 libstdc++-v3/testsuite/22_locale/locale/cons/12352.cc     |  4 ++--
 .../23_containers/vector/zero_sized_allocations.cc        |  2 +-
 libstdc++-v3/testsuite/30_threads/lock_guard/cons/1.cc    |  2 +-
 .../testsuite/ext/pool_allocator/allocate_chunk.cc        |  3 ++-
 libstdc++-v3/testsuite/ext/profile/replace_new.cc         |  3 ++-
 .../testsuite/util/replacement_memory_operators.h         |  5 +++--
 libstdc++-v3/testsuite/util/testsuite_hooks.h             |  6 ++++++
 libstdc++-v3/testsuite/util/testsuite_tr1.h               | 15 ++++++++-------
 10 files changed, 39 insertions(+), 16 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 53b4511..c102e60 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,18 @@
 2016-12-07  Jonathan Wakely  <jwakely@redhat.com>
 
+	* testsuite/util/testsuite_hooks.h (THROW): Define.
+	* testsuite/util/replacement_memory_operators.h: Include
+	testsuite_hooks.h and use THROW macro.
+	* testsuite/util/testsuite_tr1.h: Likewise.
+	* testsuite/20_util/allocator/1.cc: Use THROW macro.
+	* testsuite/22_locale/locale/cons/12352.cc: Likewise.
+	* testsuite/23_containers/vector/zero_sized_allocations.cc: Likewise.
+	* testsuite/30_threads/lock_guard/cons/1.cc: Replace dynamic exception
+	specification with noexcept-specifier.
+	* testsuite/ext/pool_allocator/allocate_chunk.cc: Include
+	testsuite_hooks.h and use THROW macro.
+	* testsuite/ext/profile/replace_new.cc: Likewise.
+
 	* include/ext/bitmap_allocator.h (bitmap_allocator::_S_refill_pool)
 	(bitmap_allocator::_M_allocate_single_object)
 	(bitmap_allocator::_M_get): Use _GLIBCXX_THROW macro.
diff --git a/libstdc++-v3/testsuite/20_util/allocator/1.cc b/libstdc++-v3/testsuite/20_util/allocator/1.cc
index 6905c86..0c06839 100644
--- a/libstdc++-v3/testsuite/20_util/allocator/1.cc
+++ b/libstdc++-v3/testsuite/20_util/allocator/1.cc
@@ -30,7 +30,7 @@ bool check_new = false;
 bool check_delete = false;
 
 void* 
-operator new(std::size_t n) throw(std::bad_alloc)
+operator new(std::size_t n) THROW(std::bad_alloc)
 {
   check_new = true;
   return std::malloc(n);
diff --git a/libstdc++-v3/testsuite/22_locale/locale/cons/12352.cc b/libstdc++-v3/testsuite/22_locale/locale/cons/12352.cc
index 8faf714..d514b2a 100644
--- a/libstdc++-v3/testsuite/22_locale/locale/cons/12352.cc
+++ b/libstdc++-v3/testsuite/22_locale/locale/cons/12352.cc
@@ -45,7 +45,7 @@ void deallocate(void* p)
     std::free(p);
 }
 
-void* operator new(std::size_t n) throw (std::bad_alloc)
+void* operator new(std::size_t n) THROW (std::bad_alloc)
 {
   void* ret = allocate(n);
   if (!ret)
@@ -53,7 +53,7 @@ void* operator new(std::size_t n) throw (std::bad_alloc)
   return ret;
 }
 
-void* operator new[](std::size_t n) throw (std::bad_alloc)
+void* operator new[](std::size_t n) THROW (std::bad_alloc)
 {
   void* ret = allocate(n);
   if (!ret)
diff --git a/libstdc++-v3/testsuite/23_containers/vector/zero_sized_allocations.cc b/libstdc++-v3/testsuite/23_containers/vector/zero_sized_allocations.cc
index 272caf3..524241b 100644
--- a/libstdc++-v3/testsuite/23_containers/vector/zero_sized_allocations.cc
+++ b/libstdc++-v3/testsuite/23_containers/vector/zero_sized_allocations.cc
@@ -22,7 +22,7 @@
 
 unsigned int zero_sized_news = 0;
 
-void *operator new(std::size_t size) throw (std::bad_alloc)
+void *operator new(std::size_t size) THROW (std::bad_alloc)
 {
   /* malloc(0) is unpredictable; avoid it.  */
   if (size == 0)
diff --git a/libstdc++-v3/testsuite/30_threads/lock_guard/cons/1.cc b/libstdc++-v3/testsuite/30_threads/lock_guard/cons/1.cc
index 1296760..414b678 100644
--- a/libstdc++-v3/testsuite/30_threads/lock_guard/cons/1.cc
+++ b/libstdc++-v3/testsuite/30_threads/lock_guard/cons/1.cc
@@ -26,7 +26,7 @@ struct Mutex
 {
   Mutex() : locked(false) { }
 
-  ~Mutex() throw(int)
+  ~Mutex() noexcept(false)
   {
     if (locked)
       throw 0;
diff --git a/libstdc++-v3/testsuite/ext/pool_allocator/allocate_chunk.cc b/libstdc++-v3/testsuite/ext/pool_allocator/allocate_chunk.cc
index c751739..7838039 100644
--- a/libstdc++-v3/testsuite/ext/pool_allocator/allocate_chunk.cc
+++ b/libstdc++-v3/testsuite/ext/pool_allocator/allocate_chunk.cc
@@ -20,6 +20,7 @@
 // 20.4.1.1 allocator members
 
 #include <ext/pool_allocator.h>
+#include <testsuite_hooks.h>
 
 struct small
 {
@@ -32,7 +33,7 @@ struct big
 };
 
 void*
-operator new(size_t n) throw(std::bad_alloc)
+operator new(size_t n) THROW(std::bad_alloc)
 {
   static bool first = true;
   if (!first)
diff --git a/libstdc++-v3/testsuite/ext/profile/replace_new.cc b/libstdc++-v3/testsuite/ext/profile/replace_new.cc
index be5ec09..9c94594 100644
--- a/libstdc++-v3/testsuite/ext/profile/replace_new.cc
+++ b/libstdc++-v3/testsuite/ext/profile/replace_new.cc
@@ -20,10 +20,11 @@
 // { dg-require-profile-mode "" }
 
 #include <vector>
+#include <testsuite_hooks.h>
 
 using std::vector;
 
-void* operator new(std::size_t size) throw(std::bad_alloc)
+void* operator new(std::size_t size) THROW(std::bad_alloc)
 {
   void* p = std::malloc(size);
   if (!p)
diff --git a/libstdc++-v3/testsuite/util/replacement_memory_operators.h b/libstdc++-v3/testsuite/util/replacement_memory_operators.h
index d063edf..5ea6753 100644
--- a/libstdc++-v3/testsuite/util/replacement_memory_operators.h
+++ b/libstdc++-v3/testsuite/util/replacement_memory_operators.h
@@ -20,6 +20,7 @@
 #include <stdexcept>
 #include <cstdlib>
 #include <cstdio>
+#include <testsuite_hooks.h>
 
 namespace __gnu_test
 {
@@ -32,7 +33,7 @@ namespace __gnu_test
 
     counter() : _M_count(0), _M_throw(true) { }
 
-    ~counter() throw (counter_error)
+    ~counter() THROW (counter_error)
     {
       if (_M_throw && _M_count != 0)
 	throw counter_error();
@@ -86,7 +87,7 @@ namespace __gnu_test
     }
 } // namespace __gnu_test
 
-void* operator new(std::size_t size) throw(std::bad_alloc)
+void* operator new(std::size_t size) THROW(std::bad_alloc)
 {
   std::printf("operator new is called \n");
   void* p = std::malloc(size);
diff --git a/libstdc++-v3/testsuite/util/testsuite_hooks.h b/libstdc++-v3/testsuite/util/testsuite_hooks.h
index e4c4866..9974faa 100644
--- a/libstdc++-v3/testsuite/util/testsuite_hooks.h
+++ b/libstdc++-v3/testsuite/util/testsuite_hooks.h
@@ -75,6 +75,12 @@
          #langTERR ".ISO8859-" #part "@euro" : #langTERR ".ISO8859-" #part)
 #endif
 
+#if __cplusplus < 201103L
+# define THROW(X) throw(X)
+#else
+# define THROW(X) noexcept(false)
+#endif
+
 namespace __gnu_test
 {
   // All macros are defined in GLIBCXX_CONFIGURE_TESTSUITE and imported
diff --git a/libstdc++-v3/testsuite/util/testsuite_tr1.h b/libstdc++-v3/testsuite/util/testsuite_tr1.h
index 9f2c632..c6a4986 100644
--- a/libstdc++-v3/testsuite/util/testsuite_tr1.h
+++ b/libstdc++-v3/testsuite/util/testsuite_tr1.h
@@ -23,6 +23,7 @@
 #define _GLIBCXX_TESTSUITE_TR1_H
 
 #include <ext/type_traits.h>
+#include <testsuite_hooks.h>
 
 namespace __gnu_test
 {
@@ -146,25 +147,25 @@ namespace __gnu_test
 
   struct ThrowExplicitClass
   {
-    ThrowExplicitClass(double&) throw(int);
-    explicit ThrowExplicitClass(int&) throw(int);
-    ThrowExplicitClass(double&, int&, double&) throw(int);
+    ThrowExplicitClass(double&) THROW(int);
+    explicit ThrowExplicitClass(int&) THROW(int);
+    ThrowExplicitClass(double&, int&, double&) THROW(int);
   };
 
   struct ThrowDefaultClass
   {
-    ThrowDefaultClass() throw(int);
+    ThrowDefaultClass() THROW(int);
   };
 
   struct ThrowCopyConsClass
   {
-    ThrowCopyConsClass(const ThrowCopyConsClass&) throw(int);
+    ThrowCopyConsClass(const ThrowCopyConsClass&) THROW(int);
   };
 
 #if __cplusplus >= 201103L
   struct ThrowMoveConsClass
   {
-    ThrowMoveConsClass(ThrowMoveConsClass&&) throw(int);
+    ThrowMoveConsClass(ThrowMoveConsClass&&) THROW(int);
   };
 
   struct NoexceptExplicitClass
@@ -558,7 +559,7 @@ namespace __gnu_test
 
     struct TD2
     {
-      ~TD2() throw(int);
+      ~TD2() THROW(int);
     };
 
     struct Aggr
-- 
cgit v1.1


From d71f5aa799d0cce6863147512dd14db4b1763ad2 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Wed, 7 Dec 2016 15:22:57 +0000
Subject: Replace use of C++14 std::exchange in C++11 testcase

	* testsuite/28_regex/traits/char/user_defined.cc: Replace uses of
	C++14 std::exchange function.

From-SVN: r243354
---
 libstdc++-v3/ChangeLog                                      | 3 +++
 libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index c102e60..6012328 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-07  Jonathan Wakely  <jwakely@redhat.com>
 
+	* testsuite/28_regex/traits/char/user_defined.cc: Replace uses of
+	C++14 std::exchange function.
+
 	* testsuite/util/testsuite_hooks.h (THROW): Define.
 	* testsuite/util/replacement_memory_operators.h: Include
 	testsuite_hooks.h and use THROW macro.
diff --git a/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc b/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc
index 5888ce1..d4f4abc 100644
--- a/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc
+++ b/libstdc++-v3/testsuite/28_regex/traits/char/user_defined.cc
@@ -72,13 +72,15 @@ test01()
     VERIFY(!called_transform);
     basic_regex<char, MyRegexTraits<char>> re("[a]", regex::collate);
     VERIFY(regex_match("a", re));
-    VERIFY(exchange(called_transform, false));
+    VERIFY(called_transform);
+    called_transform = false;
   }
   {
     VERIFY(!called_nocase);
     basic_regex<char, MyRegexTraits<char>> re("[a]", regex::icase);
     VERIFY(regex_match("A", re));
-    VERIFY(exchange(called_nocase, false));
+    VERIFY(called_nocase);
+    called_nocase = false;
   }
   {
     basic_regex<char, MyRegexTraits<char>> re("[T-f]", regex::icase);
-- 
cgit v1.1


From 435f4342880e048a63f7c139d5b2b3b7e7bc6b89 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Wed, 7 Dec 2016 15:23:01 +0000
Subject: Replace uses of C++14 remove_cv_t alias in C++11 header

	* include/experimental/bits/fs_path.h (path::_S_convert): Replace
	uses of C++14 std::remove_cv_t alias template.

From-SVN: r243355
---
 libstdc++-v3/ChangeLog                           | 3 +++
 libstdc++-v3/include/experimental/bits/fs_path.h | 5 +++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 6012328..28c5d9d 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-07  Jonathan Wakely  <jwakely@redhat.com>
 
+	* include/experimental/bits/fs_path.h (path::_S_convert): Replace
+	uses of C++14 std::remove_cv_t alias template.
+
 	* testsuite/28_regex/traits/char/user_defined.cc: Replace uses of
 	C++14 std::exchange function.
 
diff --git a/libstdc++-v3/include/experimental/bits/fs_path.h b/libstdc++-v3/include/experimental/bits/fs_path.h
index 70a5445..a69fb9d 100644
--- a/libstdc++-v3/include/experimental/bits/fs_path.h
+++ b/libstdc++-v3/include/experimental/bits/fs_path.h
@@ -418,7 +418,8 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       _S_convert(_Iter __first, _Iter __last)
       {
 	using __value_type = typename std::iterator_traits<_Iter>::value_type;
-	return _Cvt<remove_cv_t<__value_type>>::_S_convert(__first, __last);
+	return _Cvt<typename remove_cv<__value_type>::type>::
+	  _S_convert(__first, __last);
       }
 
     template<typename _InputIterator>
@@ -426,7 +427,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CXX11
       _S_convert(_InputIterator __src, __null_terminated)
       {
 	using _Tp = typename std::iterator_traits<_InputIterator>::value_type;
-	std::basic_string<remove_cv_t<_Tp>> __tmp;
+	std::basic_string<typename remove_cv<_Tp>::type> __tmp;
 	for (; *__src != _Tp{}; ++__src)
 	  __tmp.push_back(*__src);
 	return _S_convert(__tmp.c_str(), __tmp.c_str() + __tmp.size());
-- 
cgit v1.1


From 4dfad1fb0d38174a3c51a4762ab690a11f966212 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <uros@gcc.gnu.org>
Date: Wed, 7 Dec 2016 16:56:30 +0100
Subject: pr77761.c: Require int128 effective target.

	* gcc.target/i386/pr77761.c: Require int128 effective target.
	(avx512f_test): Delete.
	(do_main): Rename to avx512f_test.

From-SVN: r243367
---
 gcc/ChangeLog                           |  2 +-
 gcc/testsuite/ChangeLog                 |  8 +++++++-
 gcc/testsuite/gcc.target/i386/pr77761.c | 12 +++---------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5556c78..966af5c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -111,7 +111,7 @@
 
 2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>
 
-	target/77761
+	PR target/77761
 	* lra-lives.c (process_bb_lives): Update biggest mode for
 	implicitly used hard reg.
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index eeeae2e..1d61958 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Uros Bizjak  <ubizjak@gmail.com>
+
+	* gcc.target/i386/pr77761.c: Require int128 effective target.
+	(avx512f_test): Delete.
+	(do_main): Rename to avx512f_test.
+
 2016-12-07  Bin Cheng  <bin.cheng@arm.com>
 
 	PR tree-optimization/78691
@@ -27,7 +33,7 @@
 
 2016-12-06  Vladimir Makarov  <vmakarov@redhat.com>
 
-	target/77761
+	PR target/77761
 	* testsuite/gcc.target/i386/pr77761.c: New.
 
 2016-12-06  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
diff --git a/gcc/testsuite/gcc.target/i386/pr77761.c b/gcc/testsuite/gcc.target/i386/pr77761.c
index a39b3af..213a93f 100644
--- a/gcc/testsuite/gcc.target/i386/pr77761.c
+++ b/gcc/testsuite/gcc.target/i386/pr77761.c
@@ -1,5 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -fno-guess-branch-probability -fschedule-insns -fno-tree-ter -mavx512f --param=max-pending-list-length=512" } */
+/* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target avx512f } */
 
 #include "avx512f-check.h"
@@ -29,8 +30,8 @@ foo(u8 x1, u16 x2, u32 x3, u64 x4, v64u8 x5, v64u16 x6, v64u32 x7, v64u64 x8, v6
     (v64u128) x8 + x9;
 }
 
-int
-do_main ()
+static void
+avx512f_test (void)
 {
   v64u128 x = foo(1, 0, 0, 0, (v64u8){}, (v64u16){}, (v64u32){}, (v64u64){}, (v64u128){});
 
@@ -45,11 +46,4 @@ do_main ()
     __builtin_abort();
   if (x[3] != 1)
     __builtin_abort();
-  return 0;
-}
-
-static void
-avx512f_test (void)
-{
-  do_main ();
 }
-- 
cgit v1.1


From 27e7087f51cef38da59c87e210af64f240a3ff23 Mon Sep 17 00:00:00 2001
From: Carl Love <cel@us.ibm.com>
Date: Wed, 7 Dec 2016 16:21:16 +0000
Subject: builtins-3.c: Move built-in tests for P8 and P9 to their own test
 file.

2016-12-07  Carl Love  <cel@us.ibm.com>

	* gcc.target/powerpc/builtins-3.c: Move built-in tests for P8 and
	P9 to their own test file.  This allows precise constraints on the
	effective target and compile options.
	* gcc.target/powerpc/builtins-3-p8.c: New file for the vector
	compare P8 built-in tests.
	* gcc.target/powerpc/builtins-3-p9.c: New file for the vector
	compare P9 built-in tests.

From-SVN: r243370
---
 gcc/testsuite/ChangeLog                          | 10 +++++
 gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c | 17 ++++++++
 gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c | 42 +++++++++++++++++++
 gcc/testsuite/gcc.target/powerpc/builtins-3.c    | 51 ++++--------------------
 4 files changed, 77 insertions(+), 43 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1d61958..6153fe7 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,13 @@
+2016-12-07  Carl Love  <cel@us.ibm.com>
+
+	* gcc.target/powerpc/builtins-3.c: Move built-in tests for P8 and
+	P9 to their own test file.  This allows precise constraints on the
+	effective target and compile options.
+	* gcc.target/powerpc/builtins-3-p8.c: New file for the vector
+	compare P8 built-in tests.
+	* gcc.target/powerpc/builtins-3-p9.c: New file for the vector
+	compare P9 built-in tests.
+
 2016-12-07  Uros Bizjak  <ubizjak@gmail.com>
 
 	* gcc.target/i386/pr77761.c: Require int128 effective target.
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c b/gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c
new file mode 100644
index 0000000..e52795c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p8.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8" } */
+
+#include <altivec.h>
+
+vector bool long long
+test_eq_long_long (vector bool long long x, vector bool long long y)
+{
+	return vec_cmpeq (x, y);
+}
+
+/* Expected test results:
+
+     test_eq_long_long          1 vcmpequd inst */
+
+/* { dg-final { scan-assembler-times "vcmpequd" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
new file mode 100644
index 0000000..d846e29
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-p9.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9" } */
+
+#include <altivec.h>
+
+vector bool char
+test_ne_char (vector bool char x, vector bool char y)
+{
+	return vec_cmpne (x, y);
+}
+
+vector bool short
+test_ne_short (vector bool short x, vector bool short y)
+{
+	return vec_cmpne (x, y);
+}
+
+vector bool int
+test_ne_int (vector bool int x, vector bool int y)
+{
+	return vec_cmpne (x, y);
+}
+
+vector bool long
+test_ne_long (vector bool long x, vector bool long y)
+{
+	return vec_cmpne (x, y);
+}
+
+/* Expected test results:
+
+     test_ne_char              1 vcmpneb
+     test_ne_short             1 vcmpneh
+     test_ne_int               1 vcmpnew
+     test_ne_long              1 vcmpequd, 1 xxlnor inst */
+
+/* { dg-final { scan-assembler-times "vcmpneb"  1 } } */
+/* { dg-final { scan-assembler-times "vcmpneh"  1 } } */
+/* { dg-final { scan-assembler-times "vcmpnew"  1 } } */
+/* { dg-final { scan-assembler-times "vcmpequd" 1 } } */
+/* { dg-final { scan-assembler-times "xxlnor"   1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3.c b/gcc/testsuite/gcc.target/powerpc/builtins-3.c
index 1a09654..1d243ce 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-3.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
 /* { dg-options "-maltivec -mvsx" } */
 
 #include <altivec.h>
@@ -22,48 +22,13 @@ test_eq_int (vector bool int x, vector bool int y)
 	return vec_cmpeq (x, y);
 }
 
-vector bool long
-test_eq_long (vector bool long x, vector bool long y)
-{
-	return vec_cmpeq (x, y);
-}
-
-vector bool char
-test_ne_char (vector bool char x, vector bool char y)
-{
-	return vec_cmpne (x, y);
-}
-
-vector bool short
-test_ne_short (vector bool short x, vector bool short y)
-{
-	return vec_cmpne (x, y);
-}
-
-vector bool int
-test_ne_int (vector bool int x, vector bool int y)
-{
-	return vec_cmpne (x, y);
-}
-
-vector bool long
-test_ne_long (vector bool long x, vector bool long y)
-{
-	return vec_cmpne (x, y);
-}
 
-/* Note: vec_cmpne is implemented as vcmpeq and then NOT'ed
-   using the xxlnor instruction.
+/* Expected test results:
 
-   Expected test results:
-   test_eq_char              1 vcmpeq inst
-   test_eq_short             1 vcmpeq inst
-   test_eq_int               1 vcmpeq inst
-   test_eq_long              1 vcmpeq inst
-   test_ne_char              1 vcmpeq, 1 xxlnor inst
-   test_ne_short             1 vcmpeq, 1 xxlnor inst
-   test_ne_int               1 vcmpeq, 1 xxlnor inst
-   test_ne_long              1 vcmpeq, 1 xxlnor inst */
+     test_eq_char              1 vcmpequb inst
+     test_eq_short             1 vcmpequh inst
+     test_eq_int               1 vcmpequw inst */
 
-/* { dg-final { scan-assembler-times "vcmpeq" 8 } } */
-/* { dg-final { scan-assembler-times "xxlnor" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpequb" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpequh" 1 } } */
+/* { dg-final { scan-assembler-times "vcmpequw" 1 } } */
-- 
cgit v1.1


From 8194c537463de4a5e6bd368c1c5fab2fafc40bdf Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Wed, 7 Dec 2016 16:41:05 +0000
Subject: Introduce rtl_data::init_stack_alignment

Move this part of "expand"'s initialization of crtl into its own
method so that it can used by the RTL frontend when postprocessing
RTL dumps.

gcc/ChangeLog:
	* cfgexpand.c (pass_expand::execute): Move stack initializations
	to rtl_data::init_stack_alignment and call it.
	* emit-rtl.c (rtl_data::init_stack_alignment): New method.
	* emit-rtl.h (rtl_data::init_stack_alignment): New method.

From-SVN: r243371
---
 gcc/ChangeLog   |  7 +++++++
 gcc/cfgexpand.c |  5 +----
 gcc/emit-rtl.c  | 12 ++++++++++++
 gcc/emit-rtl.h  |  2 ++
 4 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 966af5c..1cf31bc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-07  David Malcolm  <dmalcolm@redhat.com>
+
+	* cfgexpand.c (pass_expand::execute): Move stack initializations
+	to rtl_data::init_stack_alignment and call it.
+	* emit-rtl.c (rtl_data::init_stack_alignment): New method.
+	* emit-rtl.h (rtl_data::init_stack_alignment): New method.
+
 2016-12-07  Wilco Dijkstra  <wdijkstr@arm.com>
 
 	* gcc/ira.c (ira_setup_eliminable_regset): Initialize crtl->is_leaf.
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index c3aca59..97dc648 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -6213,10 +6213,7 @@ pass_expand::execute (function *fun)
   discover_nonconstant_array_refs ();
 
   targetm.expand_to_rtl_hook ();
-  crtl->stack_alignment_needed = STACK_BOUNDARY;
-  crtl->max_used_stack_slot_alignment = STACK_BOUNDARY;
-  crtl->stack_alignment_estimated = 0;
-  crtl->preferred_stack_boundary = STACK_BOUNDARY;
+  crtl->init_stack_alignment ();
   fun->cfg->max_jumptable_ents = 0;
 
   /* Resovle the function section.  Some targets, like ARM EABI rely on knowledge
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 5201bd0..9eccd68 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -6291,5 +6291,17 @@ need_atomic_barrier_p (enum memmodel model, bool pre)
       gcc_unreachable ();
     }
 }
+
+/* Initialize fields of rtl_data related to stack alignment.  */
+
+void
+rtl_data::init_stack_alignment ()
+{
+  stack_alignment_needed = STACK_BOUNDARY;
+  max_used_stack_slot_alignment = STACK_BOUNDARY;
+  stack_alignment_estimated = 0;
+  preferred_stack_boundary = STACK_BOUNDARY;
+}
+
 
 #include "gt-emit-rtl.h"
diff --git a/gcc/emit-rtl.h b/gcc/emit-rtl.h
index a919bf0..88ba589 100644
--- a/gcc/emit-rtl.h
+++ b/gcc/emit-rtl.h
@@ -55,6 +55,8 @@ struct GTY(()) incoming_args {
 
 /* Datastructures maintained for currently processed function in RTL form.  */
 struct GTY(()) rtl_data {
+  void init_stack_alignment ();
+
   struct expr_status expr;
   struct emit_status emit;
   struct varasm_status varasm;
-- 
cgit v1.1


From 8e1d640fcdf694fe1d83818b123b550e0da96705 Mon Sep 17 00:00:00 2001
From: Thomas Preud'homme <thomas.preudhomme@arm.com>
Date: Wed, 7 Dec 2016 17:56:53 +0000
Subject: re PR rtl-optimization/78617 (LRA clobbers live register during
 rematerialization)

2016-12-07  Thomas Preud'homme  <thomas.preudhomme@arm.com>

    gcc/
    PR rtl-optimization/78617
    * lra-remat.c (do_remat): Initialize live_hard_regs from live in
    registers, also setting hard registers mapped to pseudo registers.

    gcc/testsuite/
    PR rtl-optimization/78617
    * gcc.c-torture/execute/pr78617.c: New test.

From-SVN: r243374
---
 gcc/ChangeLog                                 |  6 ++++++
 gcc/lra-remat.c                               | 12 +++++++++++-
 gcc/testsuite/ChangeLog                       |  5 +++++
 gcc/testsuite/gcc.c-torture/execute/pr78617.c | 25 +++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/pr78617.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1cf31bc..489ec77 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-07  Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	PR rtl-optimization/78617
+	* lra-remat.c (do_remat): Initialize live_hard_regs from live in
+	registers, also setting hard registers mapped to pseudo registers.
+
 2016-12-07  David Malcolm  <dmalcolm@redhat.com>
 
 	* cfgexpand.c (pass_expand::execute): Move stack initializations
diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c
index f01c664..cdd7057 100644
--- a/gcc/lra-remat.c
+++ b/gcc/lra-remat.c
@@ -1047,6 +1047,7 @@ update_scratch_ops (rtx_insn *remat_insn)
 static bool
 do_remat (void)
 {
+  unsigned regno;
   rtx_insn *insn;
   basic_block bb;
   bitmap_head avail_cands;
@@ -1054,12 +1055,21 @@ do_remat (void)
   bool changed_p = false;
   /* Living hard regs and hard registers of living pseudos.  */
   HARD_REG_SET live_hard_regs;
+  bitmap_iterator bi;
 
   bitmap_initialize (&avail_cands, &reg_obstack);
   bitmap_initialize (&active_cands, &reg_obstack);
   FOR_EACH_BB_FN (bb, cfun)
     {
-      REG_SET_TO_HARD_REG_SET (live_hard_regs, df_get_live_out (bb));
+      CLEAR_HARD_REG_SET (live_hard_regs);
+      EXECUTE_IF_SET_IN_BITMAP (df_get_live_in (bb), 0, regno, bi)
+	{
+	  int hard_regno = regno < FIRST_PSEUDO_REGISTER
+			   ? regno
+			   : reg_renumber[regno];
+	  if (hard_regno >= 0)
+	    SET_HARD_REG_BIT (live_hard_regs, hard_regno);
+	}
       bitmap_and (&avail_cands, &get_remat_bb_data (bb)->avin_cands,
 		  &get_remat_bb_data (bb)->livein_cands);
       /* Activating insns are always in the same block as their corresponding
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 6153fe7..71bf506 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-07  Thomas Preud'homme  <thomas.preudhomme@arm.com>
+
+	PR rtl-optimization/78617
+	* gcc.c-torture/execute/pr78617.c: New test.
+
 2016-12-07  Carl Love  <cel@us.ibm.com>
 
 	* gcc.target/powerpc/builtins-3.c: Move built-in tests for P8 and
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr78617.c b/gcc/testsuite/gcc.c-torture/execute/pr78617.c
new file mode 100644
index 0000000..89c4f6d
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr78617.c
@@ -0,0 +1,25 @@
+int a = 0;
+int d = 1;
+int f = 1;
+
+int fn1() {
+  return a || 1 >> a;
+}
+
+int fn2(int p1, int p2) {
+  return p2 >= 2 ? p1 : p1 >> 1;
+}
+
+int fn3(int p1) {
+  return d ^ p1;
+}
+
+int fn4(int p1, int p2) {
+  return fn3(!d > fn2((f = fn1() - 1000) || p2, p1));
+}
+
+int main() {
+  if (fn4(0, 0) != 1)
+    __builtin_abort ();
+  return 0;
+}
-- 
cgit v1.1


From 9c4e96eb1e305a9f35e4dbafb0a1fe29ef226265 Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Wed, 7 Dec 2016 18:16:32 +0000
Subject: Add some functions for use by the RTL frontend.

gcc/ChangeLog:
	* read-md.c (rtx_reader::require_char): New method.
	(require_char_ws): Convert from function to...
	(rtx_reader::require_char_ws): ...method.
	(rtx_reader::require_word_ws): New method.
	* read-md.h (rtx_reader::require_char): New method decl.
	(require_char_ws): Remove global decl in favor of...
	(rtx_reader::require_char_ws): ...new method decl.
	(rtx_reader::require_word_ws): New method decl.
	(rtx_reader::peek_char): New method decl.

From-SVN: r243376
---
 gcc/ChangeLog | 12 ++++++++++++
 gcc/read-md.c | 35 ++++++++++++++++++++++++++++++++++-
 gcc/read-md.h |  5 ++++-
 3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 489ec77..c4055dc 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2016-12-07  David Malcolm  <dmalcolm@redhat.com>
+
+	* read-md.c (rtx_reader::require_char): New method.
+	(require_char_ws): Convert from function to...
+	(rtx_reader::require_char_ws): ...method.
+	(rtx_reader::require_word_ws): New method.
+	* read-md.h (rtx_reader::require_char): New method decl.
+	(require_char_ws): Remove global decl in favor of...
+	(rtx_reader::require_char_ws): ...new method decl.
+	(rtx_reader::require_word_ws): New method decl.
+	(rtx_reader::peek_char): New method decl.
+
 2016-12-07  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
 	PR rtl-optimization/78617
diff --git a/gcc/read-md.c b/gcc/read-md.c
index 6fe2600..095075f 100644
--- a/gcc/read-md.c
+++ b/gcc/read-md.c
@@ -340,17 +340,40 @@ read_skip_spaces (void)
     }
 }
 
+/* Consume the next character, issuing a fatal error if it is not
+   EXPECTED.  */
+
+void
+rtx_reader::require_char (char expected)
+{
+  int ch = read_char ();
+  if (ch != expected)
+    fatal_expected_char (expected, ch);
+}
+
 /* Consume any whitespace, then consume the next non-whitespace
    character, issuing a fatal error if it is not EXPECTED.  */
 
 void
-require_char_ws (char expected)
+rtx_reader::require_char_ws (char expected)
 {
   int ch = read_skip_spaces ();
   if (ch != expected)
     fatal_expected_char (expected, ch);
 }
 
+/* Consume any whitespace, then consume the next word (as per read_name),
+   issuing a fatal error if it is not EXPECTED.  */
+
+void
+rtx_reader::require_word_ws (const char *expected)
+{
+  struct md_name name;
+  read_name (&name);
+  if (strcmp (name.string, expected))
+    fatal_with_file_and_line ("missing '%s'", expected);
+}
+
 /* Read the next character from the file.  */
 
 int
@@ -386,6 +409,16 @@ rtx_reader::unread_char (int ch)
   ungetc (ch, m_read_md_file);
 }
 
+/* Peek at the next character from the file without consuming it.  */
+
+int
+rtx_reader::peek_char (void)
+{
+  int ch = read_char ();
+  unread_char (ch);
+  return ch;
+}
+
 /* Read an rtx code name into NAME.  It is terminated by any of the
    punctuation chars of rtx printed syntax.  */
 
diff --git a/gcc/read-md.h b/gcc/read-md.h
index 996b514..06b89b4 100644
--- a/gcc/read-md.h
+++ b/gcc/read-md.h
@@ -116,6 +116,10 @@ class rtx_reader
   char *read_braced_string ();
   char *read_string (int star_if_braced);
   void read_skip_construct (int depth, file_location loc);
+  void require_char (char expected);
+  void require_char_ws (char expected);
+  void require_word_ws (const char *expected);
+  int peek_char (void);
 
   void set_md_ptr_loc (const void *ptr, const char *filename, int lineno);
   const struct ptr_loc *get_md_ptr_loc (const void *ptr);
@@ -269,7 +273,6 @@ extern void fatal_with_file_and_line (const char *, ...)
   ATTRIBUTE_PRINTF_1 ATTRIBUTE_NORETURN;
 extern void fatal_expected_char (int, int) ATTRIBUTE_NORETURN;
 extern int read_skip_spaces (void);
-extern void require_char_ws (char expected);
 extern int n_comma_elts (const char *);
 extern const char *scan_comma_elt (const char **);
 extern void upcase_string (char *);
-- 
cgit v1.1


From 77f1efdbe8fe401040adb9b2b43aac85916682ac Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 7 Dec 2016 20:10:50 +0100
Subject: re PR tree-optimization/78692 (ICE (segfault))

	PR c++/78692
	* cgraph.c (cgraph_edge::redirect_call_stmt_to_callee): Set lhs
	var to lhs of new_stmt right before noreturn handling rather than to
	lhs of e->call_stmt early.

	* g++.dg/torture/pr78692.C: New test.

From-SVN: r243377
---
 gcc/ChangeLog                          |  7 +++++++
 gcc/cgraph.c                           |  2 +-
 gcc/testsuite/ChangeLog                |  5 +++++
 gcc/testsuite/g++.dg/torture/pr78692.C | 26 ++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr78692.C

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c4055dc..b14b790 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-07  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/78692
+	* cgraph.c (cgraph_edge::redirect_call_stmt_to_callee): Set lhs
+	var to lhs of new_stmt right before noreturn handling rather than to
+	lhs of e->call_stmt early.
+
 2016-12-07  David Malcolm  <dmalcolm@redhat.com>
 
 	* read-md.c (rtx_reader::require_char): New method.
diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 867e371..fd2465e 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -1271,7 +1271,6 @@ cgraph_edge::redirect_call_stmt_to_callee (void)
   cgraph_edge *e = this;
 
   tree decl = gimple_call_fndecl (e->call_stmt);
-  tree lhs = gimple_call_lhs (e->call_stmt);
   gcall *new_stmt;
   gimple_stmt_iterator gsi;
   bool skip_bounds = false;
@@ -1526,6 +1525,7 @@ cgraph_edge::redirect_call_stmt_to_callee (void)
     gimple_call_set_fntype (new_stmt, TREE_TYPE (e->callee->decl));
 
   /* If the call becomes noreturn, remove the LHS if possible.  */
+  tree lhs = gimple_call_lhs (new_stmt);
   if (lhs
       && gimple_call_noreturn_p (new_stmt)
       && (VOID_TYPE_P (TREE_TYPE (gimple_call_fntype (new_stmt)))
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 71bf506..2706d0e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-07  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/78692
+	* g++.dg/torture/pr78692.C: New test.
+
 2016-12-07  Thomas Preud'homme  <thomas.preudhomme@arm.com>
 
 	PR rtl-optimization/78617
diff --git a/gcc/testsuite/g++.dg/torture/pr78692.C b/gcc/testsuite/g++.dg/torture/pr78692.C
new file mode 100644
index 0000000..57a0d2f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr78692.C
@@ -0,0 +1,26 @@
+// PR c++/78692
+
+int a;
+void *b;
+extern "C" {
+struct C {
+  virtual int d ();
+};
+struct E {
+  virtual int operator () (int, const void *, int) = 0;
+};
+class F {
+  int g ();
+  int h;
+  E &i;
+};
+struct : C, E {
+  int operator () (int, const void *, int) { throw int(); }
+} j;
+
+int
+F::g ()
+{
+  a = i (h, b, 0);
+}
+}
-- 
cgit v1.1


From c89529306c91eafa81c762e9050d51c747c14af0 Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Wed, 7 Dec 2016 20:45:45 +0100
Subject: builtins.c (fold_builtin_strstr): Removed.

	* builtins.c (fold_builtin_strstr): Removed.
	(fold_builtin_2): Don't call fold_builtin_strstr.
	* gimple-fold.c (gimple_fold_builtin_strchr): Check is_strrchr
	earlier in the strrchr (x, 0) -> strchr (x, 0) optimization.
	(gimple_fold_builtin_strstr): New function.
	(gimple_fold_builtin): Call it.
	* fold-const-call.c (fold_const_call): Handle CFN_BUILT_IN_STRSTR.

	* gcc.dg/builtin-strstr-1.c: New test.
	* g++.dg/cpp0x/constexpr-strstr.C: New test.

From-SVN: r243378
---
 gcc/ChangeLog                                 |  8 +++
 gcc/builtins.c                                | 70 ---------------------------
 gcc/fold-const-call.c                         | 16 ++++++
 gcc/gimple-fold.c                             | 68 +++++++++++++++++++++++++-
 gcc/testsuite/ChangeLog                       |  3 ++
 gcc/testsuite/g++.dg/cpp0x/constexpr-strstr.C | 12 +++++
 gcc/testsuite/gcc.dg/builtin-strstr-1.c       | 31 ++++++++++++
 7 files changed, 136 insertions(+), 72 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/constexpr-strstr.C
 create mode 100644 gcc/testsuite/gcc.dg/builtin-strstr-1.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b14b790..cc6dc71 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,13 @@
 2016-12-07  Jakub Jelinek  <jakub@redhat.com>
 
+	* builtins.c (fold_builtin_strstr): Removed.
+	(fold_builtin_2): Don't call fold_builtin_strstr.
+	* gimple-fold.c (gimple_fold_builtin_strchr): Check is_strrchr
+	earlier in the strrchr (x, 0) -> strchr (x, 0) optimization.
+	(gimple_fold_builtin_strstr): New function.
+	(gimple_fold_builtin): Call it.
+	* fold-const-call.c (fold_const_call): Handle CFN_BUILT_IN_STRSTR.
+
 	PR c++/78692
 	* cgraph.c (cgraph_edge::redirect_call_stmt_to_callee): Set lhs
 	var to lhs of new_stmt right before noreturn handling rather than to
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 1316c27..58ed469 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -163,7 +163,6 @@ static tree fold_builtin_3 (location_t, tree, tree, tree, tree);
 static tree fold_builtin_varargs (location_t, tree, tree*, int);
 
 static tree fold_builtin_strpbrk (location_t, tree, tree, tree);
-static tree fold_builtin_strstr (location_t, tree, tree, tree);
 static tree fold_builtin_strspn (location_t, tree, tree);
 static tree fold_builtin_strcspn (location_t, tree, tree);
 
@@ -8303,9 +8302,6 @@ fold_builtin_2 (location_t loc, tree fndecl, tree arg0, tree arg1)
     CASE_FLT_FN (BUILT_IN_MODF):
       return fold_builtin_modf (loc, arg0, arg1, type);
 
-    case BUILT_IN_STRSTR:
-      return fold_builtin_strstr (loc, arg0, arg1, type);
-
     case BUILT_IN_STRSPN:
       return fold_builtin_strspn (loc, arg0, arg1);
 
@@ -8729,72 +8725,6 @@ readonly_data_expr (tree exp)
     return false;
 }
 
-/* Simplify a call to the strstr builtin.  S1 and S2 are the arguments
-   to the call, and TYPE is its return type.
-
-   Return NULL_TREE if no simplification was possible, otherwise return the
-   simplified form of the call as a tree.
-
-   The simplified form may be a constant or other expression which
-   computes the same value, but in a more efficient manner (including
-   calls to other builtin functions).
-
-   The call may contain arguments which need to be evaluated, but
-   which are not useful to determine the result of the call.  In
-   this case we return a chain of COMPOUND_EXPRs.  The LHS of each
-   COMPOUND_EXPR will be an argument which must be evaluated.
-   COMPOUND_EXPRs are chained through their RHS.  The RHS of the last
-   COMPOUND_EXPR in the chain will contain the tree for the simplified
-   form of the builtin function call.  */
-
-static tree
-fold_builtin_strstr (location_t loc, tree s1, tree s2, tree type)
-{
-  if (!validate_arg (s1, POINTER_TYPE)
-      || !validate_arg (s2, POINTER_TYPE))
-    return NULL_TREE;
-  else
-    {
-      tree fn;
-      const char *p1, *p2;
-
-      p2 = c_getstr (s2);
-      if (p2 == NULL)
-	return NULL_TREE;
-
-      p1 = c_getstr (s1);
-      if (p1 != NULL)
-	{
-	  const char *r = strstr (p1, p2);
-	  tree tem;
-
-	  if (r == NULL)
-	    return build_int_cst (TREE_TYPE (s1), 0);
-
-	  /* Return an offset into the constant string argument.  */
-	  tem = fold_build_pointer_plus_hwi_loc (loc, s1, r - p1);
-	  return fold_convert_loc (loc, type, tem);
-	}
-
-      /* The argument is const char *, and the result is char *, so we need
-	 a type conversion here to avoid a warning.  */
-      if (p2[0] == '\0')
-	return fold_convert_loc (loc, type, s1);
-
-      if (p2[1] != '\0')
-	return NULL_TREE;
-
-      fn = builtin_decl_implicit (BUILT_IN_STRCHR);
-      if (!fn)
-	return NULL_TREE;
-
-      /* New argument list transforming strstr(s1, s2) to
-	 strchr(s1, s2[0]).  */
-      return build_call_expr_loc (loc, fn, 2, s1,
-				  build_int_cst (integer_type_node, p2[0]));
-    }
-}
-
 /* Simplify a call to the strpbrk builtin.  S1 and S2 are the arguments
    to the call, and TYPE is its return type.
 
diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
index c85fb41..f978da3 100644
--- a/gcc/fold-const-call.c
+++ b/gcc/fold-const-call.c
@@ -1434,6 +1434,22 @@ fold_const_call (combined_fn fn, tree type, tree arg0, tree arg1)
 	}
       return NULL_TREE;
 
+    case CFN_BUILT_IN_STRSTR:
+      if ((p1 = c_getstr (arg1)))
+	{
+	  if ((p0 = c_getstr (arg0)))
+	    {
+	      const char *r = strstr (p0, p1);
+	      if (r == NULL)
+		return build_int_cst (type, 0);
+	      return fold_convert (type,
+				   fold_build_pointer_plus_hwi (arg0, r - p0));
+	    }
+	  if (*p1 == '\0')
+	    return fold_convert (type, arg0);
+	}
+      return NULL_TREE;
+
     default:
       return fold_const_call_1 (fn, type, arg0, arg1);
     }
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index f586c09..d00625b 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -1506,11 +1506,11 @@ gimple_fold_builtin_strchr (gimple_stmt_iterator *gsi, bool is_strrchr)
     return false;
 
   /* Transform strrchr (s, 0) to strchr (s, 0) when optimizing for size.  */
-  if (optimize_function_for_size_p (cfun))
+  if (is_strrchr && optimize_function_for_size_p (cfun))
     {
       tree strchr_fn = builtin_decl_implicit (BUILT_IN_STRCHR);
 
-      if (is_strrchr && strchr_fn)
+      if (strchr_fn)
 	{
 	  gimple *repl = gimple_build_call (strchr_fn, 2, str, c);
 	  replace_call_with_call_and_fold (gsi, repl);
@@ -1549,6 +1549,68 @@ gimple_fold_builtin_strchr (gimple_stmt_iterator *gsi, bool is_strrchr)
   return true;
 }
 
+/* Fold function call to builtin strstr.
+   If both arguments are constant, evaluate and fold the result,
+   additionally fold strstr (x, "") into x and strstr (x, "c")
+   into strchr (x, 'c').  */
+static bool
+gimple_fold_builtin_strstr (gimple_stmt_iterator *gsi)
+{
+  gimple *stmt = gsi_stmt (*gsi);
+  tree haystack = gimple_call_arg (stmt, 0);
+  tree needle = gimple_call_arg (stmt, 1);
+  const char *p, *q;
+
+  if (!gimple_call_lhs (stmt))
+    return false;
+
+  q = c_getstr (needle);
+  if (q == NULL)
+    return false;
+
+  if ((p = c_getstr (haystack)))
+    {
+      const char *r = strstr (p, q);
+
+      if (r == NULL)
+	{
+	  replace_call_with_value (gsi, integer_zero_node);
+	  return true;
+	}
+
+      tree len = build_int_cst (size_type_node, r - p);
+      gimple_seq stmts = NULL;
+      gimple *new_stmt
+	= gimple_build_assign (gimple_call_lhs (stmt), POINTER_PLUS_EXPR,
+			       haystack, len);
+      gimple_seq_add_stmt_without_update (&stmts, new_stmt);
+      gsi_replace_with_seq_vops (gsi, stmts);
+      return true;
+    }
+
+  /* For strstr (x, "") return x.  */
+  if (q[0] == '\0')
+    {
+      replace_call_with_value (gsi, haystack);
+      return true;
+    }
+
+  /* Transform strstr (x, "c") into strchr (x, 'c').  */
+  if (q[1] == '\0')
+    {
+      tree strchr_fn = builtin_decl_implicit (BUILT_IN_STRCHR);
+      if (strchr_fn)
+	{
+	  tree c = build_int_cst (integer_type_node, q[0]);
+	  gimple *repl = gimple_build_call (strchr_fn, 2, haystack, c);
+	  replace_call_with_call_and_fold (gsi, repl);
+	  return true;
+	}
+    }
+
+  return false;
+}
+
 /* Simplify a call to the strcat builtin.  DST and SRC are the arguments
    to the call.
 
@@ -3236,6 +3298,8 @@ gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case BUILT_IN_RINDEX:
     case BUILT_IN_STRRCHR:
       return gimple_fold_builtin_strchr (gsi, true);
+    case BUILT_IN_STRSTR:
+      return gimple_fold_builtin_strstr (gsi);
     case BUILT_IN_STRCMP:
     case BUILT_IN_STRCASECMP:
     case BUILT_IN_STRNCMP:
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2706d0e..1c3f290 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-07  Jakub Jelinek  <jakub@redhat.com>
 
+	* gcc.dg/builtin-strstr-1.c: New test.
+	* g++.dg/cpp0x/constexpr-strstr.C: New test.
+
 	PR c++/78692
 	* g++.dg/torture/pr78692.C: New test.
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-strstr.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-strstr.C
new file mode 100644
index 0000000..4268909
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-strstr.C
@@ -0,0 +1,12 @@
+// { dg-do compile { target c++11 } }
+
+constexpr const char *f1 (const char *p, const char *q) { return __builtin_strstr (p, q); }
+constexpr const char a[] = "abcdefedcbaaaaab";
+constexpr const char b[] = "fed";
+constexpr const char c[] = "aaab";
+static_assert (f1 ("abcde", "ee") == nullptr, "");
+static_assert (f1 (a, b) == a + 5, "");
+static_assert (f1 (a, c) == a + 12, "");
+static_assert (f1 (a, "") == a, "");
+static_assert (f1 (a, "aaaaaab") == nullptr, "");
+static_assert (f1 (a, "aaa") == a + 10, "");
diff --git a/gcc/testsuite/gcc.dg/builtin-strstr-1.c b/gcc/testsuite/gcc.dg/builtin-strstr-1.c
new file mode 100644
index 0000000..9d584b6
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-strstr-1.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__builtin_strstr" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "return p_\[0-9]*.D.;" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "__builtin_strchr" 1 "optimized" } } */
+
+extern void link_error (void);
+
+void
+foo (void)
+{
+  const char *p = "abcdef";
+  const char *q = "def";
+  p++;
+  q++;
+  if (__builtin_strstr (p, q) != p + 3)
+    link_error ();
+}
+
+char *
+bar (const char *p)
+{
+  return __builtin_strstr (p, "");
+}
+
+char *
+baz (const char *p)
+{
+  return __builtin_strstr (p, "d");
+}
-- 
cgit v1.1


From a4dec0d6de97348e71932f7080fe4a3bb8730096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Dumont?= <fdumont@gcc.gnu.org>
Date: Wed, 7 Dec 2016 21:12:49 +0000
Subject: stl_map.h (map(const map&)): Make default.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

2016-12-07  François Dumont  <fdumont@gcc.gnu.org>

	* include/bits/stl_map.h (map(const map&)): Make default.
	(map(map&&)): Likewise.
	(~map()): Likewise.
	(operator=(const map&)): Likewise.
	* include/bits/stl_multimap.h (multimap(const multimap&)): Make default.
	(multimap(multimap&&)): Likewise.
	(~multimap()): Likewise.
	(operator=(const multimap&)): Likewise.
	* include/bits/stl_set.h (set(const set&)): Make default.
	(set(set&&)): Likewise.
	(~set()): Likewise.
	(operator=(const set&)): Likewise.
	* include/bits/stl_multiset.h (multiset(const multiset&)): Make default.
	(multiset(multiset&&)): Likewise.
	(~multiset()): Likewise.
	(operator=(const multiset&)): Likewise.
	* include/bits/stl_tree.h (_Rb_tree_key_compare<>): New.
	(_Rb_tree_header): New.
	(_Rb_tree_impl): Inherit from latters.
	(_Rb_tree_impl()): Make default.
	(_Rb_tree_impl(const _Rb_tree_impl&)): New.
	(_Rb_tree<>(const _Rb_tree&): Use latter.
	(_Rb_tree_impl(_Rb_tree_impl&&)): New, default.
	(_Rb_tree_impl(const _Key_compare&, const _Node_allocator&)): Delete.
	(_Rb_tree_impl::_M_reset): Move...
	(_Rb_tree_header::_M_reset): ...here.
	(_Rb_tree_impl::_M_initialize): Delete.
	(_Rb_tree(_Rb_tree&&)): Make default.
	(_Rb_tree_header::_M_move_data(_Rb_tree_header&)): New.
	(_Rb_tree<>::_M_move_data(_Rb_tree&, true_type)): Use latter.
	(_Rb_tree<>(_Rb_tree&&)): Make default.

From-SVN: r243379
---
 libstdc++-v3/ChangeLog                   |  34 ++++++
 libstdc++-v3/include/bits/stl_map.h      |  31 +++---
 libstdc++-v3/include/bits/stl_multimap.h |  34 +++---
 libstdc++-v3/include/bits/stl_multiset.h |  33 +++---
 libstdc++-v3/include/bits/stl_set.h      |  32 +++---
 libstdc++-v3/include/bits/stl_tree.h     | 184 +++++++++++++++++--------------
 6 files changed, 200 insertions(+), 148 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 28c5d9d..3feef85 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,37 @@
+2016-12-07  François Dumont  <fdumont@gcc.gnu.org>
+
+	* include/bits/stl_map.h (map(const map&)): Make default.
+	(map(map&&)): Likewise.
+	(~map()): Likewise.
+	(operator=(const map&)): Likewise.
+	* include/bits/stl_multimap.h (multimap(const multimap&)): Make default.
+	(multimap(multimap&&)): Likewise.
+	(~multimap()): Likewise.
+	(operator=(const multimap&)): Likewise.
+	* include/bits/stl_set.h (set(const set&)): Make default.
+	(set(set&&)): Likewise.
+	(~set()): Likewise.
+	(operator=(const set&)): Likewise.
+	* include/bits/stl_multiset.h (multiset(const multiset&)): Make default.
+	(multiset(multiset&&)): Likewise.
+	(~multiset()): Likewise.
+	(operator=(const multiset&)): Likewise.
+	* include/bits/stl_tree.h (_Rb_tree_key_compare<>): New.
+	(_Rb_tree_header): New.
+	(_Rb_tree_impl): Inherit from latters.
+	(_Rb_tree_impl()): Make default.
+	(_Rb_tree_impl(const _Rb_tree_impl&)): New.
+	(_Rb_tree<>(const _Rb_tree&): Use latter.
+	(_Rb_tree_impl(_Rb_tree_impl&&)): New, default.
+	(_Rb_tree_impl(const _Key_compare&, const _Node_allocator&)): Delete.
+	(_Rb_tree_impl::_M_reset): Move...
+	(_Rb_tree_header::_M_reset): ...here.
+	(_Rb_tree_impl::_M_initialize): Delete.
+	(_Rb_tree(_Rb_tree&&)): Make default.
+	(_Rb_tree_header::_M_move_data(_Rb_tree_header&)): New.
+	(_Rb_tree<>::_M_move_data(_Rb_tree&, true_type)): Use latter.
+	(_Rb_tree<>(_Rb_tree&&)): Make default.
+
 2016-12-07  Jonathan Wakely  <jwakely@redhat.com>
 
 	* include/experimental/bits/fs_path.h (path::_S_convert): Replace
diff --git a/libstdc++-v3/include/bits/stl_map.h b/libstdc++-v3/include/bits/stl_map.h
index dea7d5b..bbd0a97 100644
--- a/libstdc++-v3/include/bits/stl_map.h
+++ b/libstdc++-v3/include/bits/stl_map.h
@@ -185,25 +185,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
       /**
        *  @brief  %Map copy constructor.
-       *  @param  __x  A %map of identical element and allocator types.
        *
-       *  The newly-created %map uses a copy of the allocator object used
-       *  by @a __x (unless the allocator traits dictate a different object).
+       *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       map(const map& __x)
       : _M_t(__x._M_t) { }
+#else
+      map(const map&) = default;
 
-#if __cplusplus >= 201103L
       /**
        *  @brief  %Map move constructor.
-       *  @param  __x  A %map of identical element and allocator types.
        *
-       *  The newly-created %map contains the exact contents of @a __x.
-       *  The contents of @a __x are a valid, but unspecified %map.
+       *  The newly-created %map contains the exact contents of the moved
+       *  instance. The moved instance is a valid, but unspecified, %map.
        */
-      map(map&& __x)
-      noexcept(is_nothrow_copy_constructible<_Compare>::value)
-      : _M_t(std::move(__x._M_t)) { }
+      map(map&&) = default;
 
       /**
        *  @brief  Builds a %map from an initializer_list.
@@ -284,31 +281,31 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 	: _M_t(__comp, _Pair_alloc_type(__a))
         { _M_t._M_insert_unique(__first, __last); }
 
-      // FIXME There is no dtor declared, but we should have something
-      // generated by Doxygen.  I don't know what tags to add to this
-      // paragraph to make that happen:
+#if __cplusplus >= 201103L
       /**
        *  The dtor only erases the elements, and note that if the elements
        *  themselves are pointers, the pointed-to memory is not touched in any
        *  way.  Managing the pointer is the user's responsibility.
        */
+      ~map() = default;
+#endif
 
       /**
        *  @brief  %Map assignment operator.
-       *  @param  __x  A %map of identical element and allocator types.
-       *
-       *  All the elements of @a __x are copied.
        *
        *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       map&
       operator=(const map& __x)
       {
 	_M_t = __x._M_t;
 	return *this;
       }
+#else
+      map&
+      operator=(const map&) = default;
 
-#if __cplusplus >= 201103L
       /// Move assignment operator.
       map&
       operator=(map&&) = default;
diff --git a/libstdc++-v3/include/bits/stl_multimap.h b/libstdc++-v3/include/bits/stl_multimap.h
index 7e86b76..a5f775b 100644
--- a/libstdc++-v3/include/bits/stl_multimap.h
+++ b/libstdc++-v3/include/bits/stl_multimap.h
@@ -182,25 +182,23 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
       /**
        *  @brief  %Multimap copy constructor.
-       *  @param  __x  A %multimap of identical element and allocator types.
        *
-       *  The newly-created %multimap uses a copy of the allocator object used
-       *  by @a __x (unless the allocator traits dictate a different object).
+       *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       multimap(const multimap& __x)
       : _M_t(__x._M_t) { }
+#else
+      multimap(const multimap&) = default;
 
-#if __cplusplus >= 201103L
       /**
        *  @brief  %Multimap move constructor.
-       *  @param   __x  A %multimap of identical element and allocator types.
        *
-       *  The newly-created %multimap contains the exact contents of @a __x.
-       *  The contents of @a __x are a valid, but unspecified %multimap.
+       *  The newly-created %multimap contains the exact contents of the
+       *  moved instance. The moved instance is a valid, but unspecified
+       *  %multimap.
        */
-      multimap(multimap&& __x)
-      noexcept(is_nothrow_copy_constructible<_Compare>::value)
-      : _M_t(std::move(__x._M_t)) { }
+      multimap(multimap&&) = default;
 
       /**
        *  @brief  Builds a %multimap from an initializer_list.
@@ -278,31 +276,31 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 	: _M_t(__comp, _Pair_alloc_type(__a))
         { _M_t._M_insert_equal(__first, __last); }
 
-      // FIXME There is no dtor declared, but we should have something generated
-      // by Doxygen.  I don't know what tags to add to this paragraph to make
-      // that happen:
+#if __cplusplus >= 201103L
       /**
        *  The dtor only erases the elements, and note that if the elements
        *  themselves are pointers, the pointed-to memory is not touched in any
-       *  way.  Managing the pointer is the user's responsibility.
+       *  way. Managing the pointer is the user's responsibility.
        */
+      ~multimap() = default;
+#endif
 
       /**
        *  @brief  %Multimap assignment operator.
-       *  @param  __x  A %multimap of identical element and allocator types.
-       *
-       *  All the elements of @a __x are copied.
        *
        *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       multimap&
       operator=(const multimap& __x)
       {
 	_M_t = __x._M_t;
 	return *this;
       }
+#else
+      multimap&
+      operator=(const multimap&) = default;
 
-#if __cplusplus >= 201103L
       /// Move assignment operator.
       multimap&
       operator=(multimap&&) = default;
diff --git a/libstdc++-v3/include/bits/stl_multiset.h b/libstdc++-v3/include/bits/stl_multiset.h
index 7fe2fbd..8a83b17 100644
--- a/libstdc++-v3/include/bits/stl_multiset.h
+++ b/libstdc++-v3/include/bits/stl_multiset.h
@@ -194,25 +194,23 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
       /**
        *  @brief  %Multiset copy constructor.
-       *  @param  __x  A %multiset of identical element and allocator types.
        *
-       *  The newly-created %multiset uses a copy of the allocator object used
-       *  by @a __x (unless the allocator traits dictate a different object).
+       *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       multiset(const multiset& __x)
       : _M_t(__x._M_t) { }
+#else
+      multiset(const multiset&) = default;
 
-#if __cplusplus >= 201103L
      /**
        *  @brief  %Multiset move constructor.
-       *  @param  __x  A %multiset of identical element and allocator types.
        *
-       *  The newly-created %multiset contains the exact contents of @a __x.
-       *  The contents of @a __x are a valid, but unspecified %multiset.
+       *  The newly-created %multiset contains the exact contents of the
+       *  moved instance. The moved instance is a valid, but unspecified
+       *  %multiset.
        */
-      multiset(multiset&& __x)
-      noexcept(is_nothrow_copy_constructible<_Compare>::value)
-      : _M_t(std::move(__x._M_t)) { }
+      multiset(multiset&&) = default;
 
       /**
        *  @brief  Builds a %multiset from an initializer_list.
@@ -256,24 +254,31 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 		 const allocator_type& __a)
 	: _M_t(_Compare(), _Key_alloc_type(__a))
         { _M_t._M_insert_equal(__first, __last); }
+
+      /**
+       *  The dtor only erases the elements, and note that if the elements
+       *  themselves are pointers, the pointed-to memory is not touched in any
+       *  way. Managing the pointer is the user's responsibility.
+       */
+      ~multiset() = default;
 #endif
 
       /**
        *  @brief  %Multiset assignment operator.
-       *  @param  __x  A %multiset of identical element and allocator types.
-       *
-       *  All the elements of @a __x are copied.
        *
        *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       multiset&
       operator=(const multiset& __x)
       {
 	_M_t = __x._M_t;
 	return *this;
       }
+#else
+      multiset&
+      operator=(const multiset&) = default;
 
-#if __cplusplus >= 201103L
       /// Move assignment operator.
       multiset&
       operator=(multiset&&) = default;
diff --git a/libstdc++-v3/include/bits/stl_set.h b/libstdc++-v3/include/bits/stl_set.h
index 5ed9672..db1e031 100644
--- a/libstdc++-v3/include/bits/stl_set.h
+++ b/libstdc++-v3/include/bits/stl_set.h
@@ -199,25 +199,22 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
       /**
        *  @brief  %Set copy constructor.
-       *  @param  __x  A %set of identical element and allocator types.
        *
-       *  The newly-created %set uses a copy of the allocator object used
-       *  by @a __x (unless the allocator traits dictate a different object).
+       *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       set(const set& __x)
       : _M_t(__x._M_t) { }
+#else
+      set(const set&) = default;
 
-#if __cplusplus >= 201103L
      /**
        *  @brief %Set move constructor
-       *  @param __x  A %set of identical element and allocator types.
        *
-       *  The newly-created %set contains the exact contents of @a x.
-       *  The contents of @a x are a valid, but unspecified %set.
+       *  The newly-created %set contains the exact contents of the moved
+       *  instance. The moved instance is a valid, but unspecified, %set.
        */
-      set(set&& __x)
-      noexcept(is_nothrow_copy_constructible<_Compare>::value)
-      : _M_t(std::move(__x._M_t)) { }
+      set(set&&) = default;
 
       /**
        *  @brief  Builds a %set from an initializer_list.
@@ -261,24 +258,31 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 	    const allocator_type& __a)
 	: _M_t(_Compare(), _Key_alloc_type(__a))
         { _M_t._M_insert_unique(__first, __last); }
+
+      /**
+       *  The dtor only erases the elements, and note that if the elements
+       *  themselves are pointers, the pointed-to memory is not touched in any
+       *  way. Managing the pointer is the user's responsibility.
+       */
+      ~set() = default;
 #endif
 
       /**
        *  @brief  %Set assignment operator.
-       *  @param  __x  A %set of identical element and allocator types.
-       *
-       *  All the elements of @a __x are copied.
        *
        *  Whether the allocator is copied depends on the allocator traits.
        */
+#if __cplusplus < 201103L
       set&
       operator=(const set& __x)
       {
 	_M_t = __x._M_t;
 	return *this;
       }
+#else
+      set&
+      operator=(const set&) = default;
 
-#if __cplusplus >= 201103L
       /// Move assignment operator.
       set&
       operator=(set&&) = default;
diff --git a/libstdc++-v3/include/bits/stl_tree.h b/libstdc++-v3/include/bits/stl_tree.h
index 2c67ad9..f5bb5f7 100644
--- a/libstdc++-v3/include/bits/stl_tree.h
+++ b/libstdc++-v3/include/bits/stl_tree.h
@@ -137,6 +137,80 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     }
   };
 
+  // Helper type offering value initialization guarantee on the compare functor.
+  template<typename _Key_compare>
+    struct _Rb_tree_key_compare
+    {
+      _Key_compare		_M_key_compare;
+
+      _Rb_tree_key_compare()
+      _GLIBCXX_NOEXCEPT_IF(
+	is_nothrow_default_constructible<_Key_compare>::value)
+      : _M_key_compare()
+      { }
+
+      _Rb_tree_key_compare(const _Key_compare& __comp)
+      : _M_key_compare(__comp)
+      { }
+
+#if __cplusplus >= 201103L
+      // Copy constructor added for consistency with C++98 mode.
+      _Rb_tree_key_compare(const _Rb_tree_key_compare&) = default;
+
+      _Rb_tree_key_compare(_Rb_tree_key_compare&& __x)
+	noexcept(is_nothrow_copy_constructible<_Key_compare>::value)
+      : _M_key_compare(__x._M_key_compare)
+      { }
+#endif
+    };
+
+  // Helper type to manage default initialization of node count and header.
+  struct _Rb_tree_header
+  {
+    _Rb_tree_node_base	_M_header;
+    size_t		_M_node_count; // Keeps track of size of tree.
+
+    _Rb_tree_header() _GLIBCXX_NOEXCEPT
+    {
+      _M_header._M_color = _S_red;
+      _M_reset();
+    }
+
+#if __cplusplus >= 201103L
+    _Rb_tree_header(_Rb_tree_header&& __x) noexcept
+    {
+      if (__x._M_header._M_parent != nullptr)
+	_M_move_data(__x);
+      else
+	{
+	  _M_header._M_color = _S_red;
+	  _M_reset();
+	}
+    }
+#endif
+
+    void
+    _M_move_data(_Rb_tree_header& __from)
+    {
+      _M_header._M_parent = __from._M_header._M_parent;
+      _M_header._M_left = __from._M_header._M_left;
+      _M_header._M_right = __from._M_header._M_right;
+      _M_header._M_parent->_M_parent = &_M_header;
+      _M_node_count = __from._M_node_count;
+
+      __from._M_reset();
+    }
+
+    void
+    _M_reset()
+    {
+      _M_header._M_parent = 0;
+      _M_header._M_left = &_M_header;
+      _M_header._M_right = &_M_header;
+      _M_node_count = 0;
+    }
+  };
+
   template<typename _Val>
     struct _Rb_tree_node : public _Rb_tree_node_base
     {
@@ -599,50 +673,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       // Unused _Is_pod_comparator is kept as it is part of mangled name.
       template<typename _Key_compare,
 	       bool /* _Is_pod_comparator */ = __is_pod(_Key_compare)>
-        struct _Rb_tree_impl : public _Node_allocator
+        struct _Rb_tree_impl
+	: public _Node_allocator
+	, public _Rb_tree_key_compare<_Key_compare>
+	, public _Rb_tree_header
         {
-	  _Key_compare		_M_key_compare;
-	  _Rb_tree_node_base	_M_header;
-	  size_type		_M_node_count; // Keeps track of size of tree.
+	  typedef _Rb_tree_key_compare<_Key_compare> _Base_key_compare;
 
+#if __cplusplus < 201103L
 	  _Rb_tree_impl()
-	  _GLIBCXX_NOEXCEPT_IF(
-	    is_nothrow_default_constructible<_Node_allocator>::value
-	    && is_nothrow_default_constructible<_Key_compare>::value)
-	  : _Node_allocator(), _M_key_compare(), _M_header(),
-	    _M_node_count(0)
-	  { _M_initialize(); }
-
-	  _Rb_tree_impl(const _Key_compare& __comp, const _Node_allocator& __a)
-	  : _Node_allocator(__a), _M_key_compare(__comp), _M_header(),
-	    _M_node_count(0)
-	  { _M_initialize(); }
+	  { }
+#else
+	  _Rb_tree_impl() = default;
+#endif
+
+	  _Rb_tree_impl(const _Rb_tree_impl& __x)
+	  : _Node_allocator(_Alloc_traits::_S_select_on_copy(__x))
+	  , _Base_key_compare(__x._M_key_compare)
+	  { }
 
 #if __cplusplus >= 201103L
+	  _Rb_tree_impl(_Rb_tree_impl&&) = default;
 	  _Rb_tree_impl(const _Key_compare& __comp, _Node_allocator&& __a)
-	  : _Node_allocator(std::move(__a)), _M_key_compare(__comp),
-	    _M_header(), _M_node_count(0)
-	  { _M_initialize(); }
+	  : _Node_allocator(std::move(__a)), _Base_key_compare(__comp)
+	  { }
 #endif
-
-	  void
-	  _M_reset()
-	  {
-	    this->_M_header._M_parent = 0;
-	    this->_M_header._M_left = &this->_M_header;
-	    this->_M_header._M_right = &this->_M_header;
-	    this->_M_node_count = 0;
-	  }
-
-	private:
-	  void
-	  _M_initialize()
-	  {
-	    this->_M_header._M_color = _S_red;
-	    this->_M_header._M_parent = 0;
-	    this->_M_header._M_left = &this->_M_header;
-	    this->_M_header._M_right = &this->_M_header;
-	  }
 	};
 
       _Rb_tree_impl<_Compare> _M_impl;
@@ -845,8 +900,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       : _M_impl(__comp, _Node_allocator(__a)) { }
 
       _Rb_tree(const _Rb_tree& __x)
-      : _M_impl(__x._M_impl._M_key_compare,
-	        _Alloc_traits::_S_select_on_copy(__x._M_get_Node_allocator()))
+      : _M_impl(__x._M_impl)
       {
 	if (__x._M_root() != 0)
 	  {
@@ -874,13 +928,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  }
       }
 
-      _Rb_tree(_Rb_tree&& __x)
-      : _M_impl(__x._M_impl._M_key_compare,
-		std::move(__x._M_get_Node_allocator()))
-      {
-	if (__x._M_root() != 0)
-	  _M_move_data(__x, std::true_type());
-      }
+      _Rb_tree(_Rb_tree&&) = default;
 
       _Rb_tree(_Rb_tree&& __x, const allocator_type& __a)
       : _Rb_tree(std::move(__x), _Node_allocator(__a))
@@ -1278,7 +1326,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     private:
       // Move elements from container with equal allocator.
       void
-      _M_move_data(_Rb_tree&, std::true_type);
+      _M_move_data(_Rb_tree& __x, std::true_type)
+      { _M_impl._M_move_data(__x._M_impl); }
 
       // Move elements from container with possibly non-equal allocator,
       // which might result in a copy not a move.
@@ -1533,29 +1582,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
            typename _Compare, typename _Alloc>
     void
     _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
-    _M_move_data(_Rb_tree& __x, std::true_type)
-    {
-      _M_root() = __x._M_root();
-      _M_leftmost() = __x._M_leftmost();
-      _M_rightmost() = __x._M_rightmost();
-      _M_root()->_M_parent = _M_end();
-
-      __x._M_root() = 0;
-      __x._M_leftmost() = __x._M_end();
-      __x._M_rightmost() = __x._M_end();
-
-      this->_M_impl._M_node_count = __x._M_impl._M_node_count;
-      __x._M_impl._M_node_count = 0;
-    }
-
-  template<typename _Key, typename _Val, typename _KeyOfValue,
-           typename _Compare, typename _Alloc>
-    void
-    _Rb_tree<_Key, _Val, _KeyOfValue, _Compare, _Alloc>::
     _M_move_data(_Rb_tree& __x, std::false_type)
     {
       if (_M_get_Node_allocator() == __x._M_get_Node_allocator())
-	  _M_move_data(__x, std::true_type());
+	_M_move_data(__x, std::true_type());
       else
 	{
 	  _Alloc_node __an(*this);
@@ -1966,26 +1996,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       if (_M_root() == 0)
 	{
 	  if (__t._M_root() != 0)
-	    {
-	      _M_root() = __t._M_root();
-	      _M_leftmost() = __t._M_leftmost();
-	      _M_rightmost() = __t._M_rightmost();
-	      _M_root()->_M_parent = _M_end();
-	      _M_impl._M_node_count = __t._M_impl._M_node_count;
-	      
-	      __t._M_impl._M_reset();
-	    }
+	    _M_impl._M_move_data(__t._M_impl);
 	}
       else if (__t._M_root() == 0)
-	{
-	  __t._M_root() = _M_root();
-	  __t._M_leftmost() = _M_leftmost();
-	  __t._M_rightmost() = _M_rightmost();
-	  __t._M_root()->_M_parent = __t._M_end();
-	  __t._M_impl._M_node_count = _M_impl._M_node_count;
-	  
-	  _M_impl._M_reset();
-	}
+	__t._M_impl._M_move_data(_M_impl);
       else
 	{
 	  std::swap(_M_root(),__t._M_root());
-- 
cgit v1.1


From 352f824f094c380befd7755a331defda5f1edf7f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Dumont?= <fdumont@gcc.gnu.org>
Date: Wed, 7 Dec 2016 21:16:24 +0000
Subject: =?UTF-8?q?2016-12-07=20=20Fran=C3=A7ois=20Dumont=20=20<fdumont@gc?=
 =?UTF-8?q?c.gnu.org>?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

	* include/bits/stl_tree.h
	(_Rb_tree<>::_M_copy(_Const_Link_type, _Base_ptr)): Delete.
	(_Rb_tree<>::_M_copy(const _Rb_tree&, _NodeGen&)): New.
	(_Rb_tree<>::_M_copy(const _Rb_tree&)): New, use latter.
	(_Rb_tree<>(const _Rb_tree&): Use latter.
	(_Rb_tree<>(const _Rb_tree&, const allocator_type&)): Likewise.
	(_Rb_tree<>::_M_move_data(_Rb_tree&, false_type)): Likewise.
	(_Rb_tree<>::_M_move_assign(_Rb_tree&, false_type)): Likewise.
	(_Rb_tree<>::operator=(const _Rb_tree&)): Likewise.

From-SVN: r243380
---
 libstdc++-v3/ChangeLog               | 10 +++++++
 libstdc++-v3/include/bits/stl_tree.h | 51 ++++++++++++++----------------------
 2 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 3feef85..085b95a 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,15 @@
 2016-12-07  François Dumont  <fdumont@gcc.gnu.org>
 
+	* include/bits/stl_tree.h
+	(_Rb_tree<>::_M_copy(_Const_Link_type, _Base_ptr)): Delete.
+	(_Rb_tree<>::_M_copy(const _Rb_tree&, _NodeGen&)): New.
+	(_Rb_tree<>::_M_copy(const _Rb_tree&)): New, use latter.
+	(_Rb_tree<>(const _Rb_tree&): Use latter.
+	(_Rb_tree<>(const _Rb_tree&, const allocator_type&)): Likewise.
+	(_Rb_tree<>::_M_move_data(_Rb_tree&, false_type)): Likewise.
+	(_Rb_tree<>::_M_move_assign(_Rb_tree&, false_type)): Likewise.
+	(_Rb_tree<>::operator=(const _Rb_tree&)): Likewise.
+
 	* include/bits/stl_map.h (map(const map&)): Make default.
 	(map(map&&)): Likewise.
 	(~map()): Likewise.
diff --git a/libstdc++-v3/include/bits/stl_tree.h b/libstdc++-v3/include/bits/stl_tree.h
index f5bb5f7..86d26d5 100644
--- a/libstdc++-v3/include/bits/stl_tree.h
+++ b/libstdc++-v3/include/bits/stl_tree.h
@@ -861,11 +861,22 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	_Link_type
 	_M_copy(_Const_Link_type __x, _Base_ptr __p, _NodeGen&);
 
+      template<typename _NodeGen>
+	_Link_type
+	_M_copy(const _Rb_tree& __x, _NodeGen& __gen)
+	{
+	  _Link_type __root = _M_copy(__x._M_begin(), _M_end(), __gen);
+	  _M_leftmost() = _S_minimum(__root);
+	  _M_rightmost() = _S_maximum(__root);
+	  _M_impl._M_node_count = __x._M_impl._M_node_count;
+	  return __root;
+	}
+
       _Link_type
-      _M_copy(_Const_Link_type __x, _Base_ptr __p)
+      _M_copy(const _Rb_tree& __x)
       {
 	_Alloc_node __an(*this);
-	return _M_copy(__x, __p, __an);
+	return _M_copy(__x, __an);
       }
 
       void
@@ -903,12 +914,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       : _M_impl(__x._M_impl)
       {
 	if (__x._M_root() != 0)
-	  {
-	    _M_root() = _M_copy(__x._M_begin(), _M_end());
-	    _M_leftmost() = _S_minimum(_M_root());
-	    _M_rightmost() = _S_maximum(_M_root());
-	    _M_impl._M_node_count = __x._M_impl._M_node_count;
-	  }
+	  _M_root() = _M_copy(__x);
       }
 
 #if __cplusplus >= 201103L
@@ -920,12 +926,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
       : _M_impl(__x._M_impl._M_key_compare, _Node_allocator(__a))
       {
 	if (__x._M_root() != nullptr)
-	  {
-	    _M_root() = _M_copy(__x._M_begin(), _M_end());
-	    _M_leftmost() = _S_minimum(_M_root());
-	    _M_rightmost() = _S_maximum(_M_root());
-	    _M_impl._M_node_count = __x._M_impl._M_node_count;
-	  }
+	  _M_root() = _M_copy(__x);
       }
 
       _Rb_tree(_Rb_tree&&) = default;
@@ -1595,10 +1596,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      auto& __val = const_cast<value_type&>(__cval);
 	      return __an(std::move_if_noexcept(__val));
 	    };
-	  _M_root() = _M_copy(__x._M_begin(), _M_end(), __lbd);
-	  _M_leftmost() = _S_minimum(_M_root());
-	  _M_rightmost() = _S_maximum(_M_root());
-	  _M_impl._M_node_count = __x._M_impl._M_node_count;
+	  _M_root() = _M_copy(__x, __lbd);
 	}
     }
 
@@ -1636,10 +1634,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	      auto& __val = const_cast<value_type&>(__cval);
 	      return __roan(std::move_if_noexcept(__val));
 	    };
-	  _M_root() = _M_copy(__x._M_begin(), _M_end(), __lbd);
-	  _M_leftmost() = _S_minimum(_M_root());
-	  _M_rightmost() = _S_maximum(_M_root());
-	  _M_impl._M_node_count = __x._M_impl._M_node_count;
+	  _M_root() = _M_copy(__x, __lbd);
 	  __x.clear();
 	}
     }
@@ -1653,10 +1648,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	     && is_nothrow_move_assignable<_Compare>::value)
     {
       _M_impl._M_key_compare = std::move(__x._M_impl._M_key_compare);
-      constexpr bool __move_storage =
-	  _Alloc_traits::_S_propagate_on_move_assign()
-	  || _Alloc_traits::_S_always_equal();
-      _M_move_assign(__x, __bool_constant<__move_storage>());
+      _M_move_assign(__x, __bool_constant<_Alloc_traits::_S_nothrow_move()>());
       return *this;
     }
 
@@ -1716,12 +1708,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 	  _M_impl._M_reset();
 	  _M_impl._M_key_compare = __x._M_impl._M_key_compare;
 	  if (__x._M_root() != 0)
-	    {
-	      _M_root() = _M_copy(__x._M_begin(), _M_end(), __roan);
-	      _M_leftmost() = _S_minimum(_M_root());
-	      _M_rightmost() = _S_maximum(_M_root());
-	      _M_impl._M_node_count = __x._M_impl._M_node_count;
-	    }
+	    _M_root() = _M_copy(__x, __roan);
 	}
 
       return *this;
-- 
cgit v1.1


From 18697a1de7710088982537008b244a80361cc4e0 Mon Sep 17 00:00:00 2001
From: Alan Modra <amodra@gmail.com>
Date: Thu, 8 Dec 2016 09:46:03 +1030
Subject: sync config/* from binutils

	* elf.m4: Revert 2016-06-21 change.
	* picflag.m4: Likewise.  Revert 2016-04-30 change too.
	* override.m4 (AC_PROG_LEX): Import 2016-01-18 binutils fix
	for PR binutils/19481.

From-SVN: r243417
---
 config/ChangeLog   |  7 +++++++
 config/elf.m4      |  2 +-
 config/override.m4 | 12 ++++++++++++
 config/picflag.m4  |  7 ++++++-
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/config/ChangeLog b/config/ChangeLog
index a823d21..35ddacf 100644
--- a/config/ChangeLog
+++ b/config/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-08  Alan Modra  <amodra@gmail.com>
+
+	* elf.m4: Revert 2016-06-21 change.
+	* picflag.m4: Likewise.  Revert 2016-04-30 change too.
+	* override.m4 (AC_PROG_LEX): Import 2016-01-18 binutils fix
+	for PR binutils/19481.
+
 2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
 
 	* bootstrap-asan.mk: Replace LSAN_OPTIONS=detect_leaks=0 with
diff --git a/config/elf.m4 b/config/elf.m4
index 5f5cd88..1772a44 100644
--- a/config/elf.m4
+++ b/config/elf.m4
@@ -17,7 +17,7 @@ target_elf=no
 case $target in
   *-darwin* | *-aix* | *-cygwin* | *-mingw* | *-aout* | *-*coff* | \
   *-msdosdjgpp* | *-vms* | *-wince* | *-*-pe* | \
-  alpha*-dec-osf* | hppa[[12]]*-*-hpux* | \
+  alpha*-dec-osf* | *-interix* | hppa[[12]]*-*-hpux* | \
   nvptx-*-none)
     target_elf=no
     ;;
diff --git a/config/override.m4 b/config/override.m4
index 52bd1c3..b5ce654 100644
--- a/config/override.m4
+++ b/config/override.m4
@@ -101,4 +101,16 @@ m4_define([_AC_CHECK_DECLS],
 
 ])
 
+dnl If flex/lex are not found, the top level configure sets LEX to
+dnl "/path_to/missing flex".  When AC_PROG_LEX tries to find the flex
+dnl output file, it calls $LEX to do so, but the current lightweight
+dnl "missing" won't create a file.  This results in an error.
+dnl Avoid calling the bulk of AC_PROG_LEX when $LEX is "missing".
+AC_DEFUN_ONCE([AC_PROG_LEX],
+[AC_CHECK_PROGS(LEX, flex lex, :)
+case "$LEX" in
+  :|*"missing "*) ;;
+  *) _AC_PROG_LEX_YYTEXT_DECL ;;
+esac])
+
 ])
diff --git a/config/picflag.m4 b/config/picflag.m4
index 614421d..2f5b9721e 100644
--- a/config/picflag.m4
+++ b/config/picflag.m4
@@ -27,6 +27,10 @@ case "${$2}" in
 	;;
     i[[34567]]86-*-mingw* | x86_64-*-mingw*)
 	;;
+    i[[34567]]86-*-interix[[3-9]]*)
+	# Interix 3.x gcc -fpic/-fPIC options generate broken code.
+	# Instead, we relocate shared libraries at runtime.
+	;;
     i[[34567]]86-*-nto-qnx*)
 	# QNX uses GNU C++, but need to define -shared option too, otherwise
 	# it will coredump.
@@ -57,7 +61,8 @@ case "${$2}" in
 	$1=-fpic
 	;;
     # FIXME: Simplify to sh*-*-netbsd*?
-    sh-*-netbsdelf* | shl*-*-netbsdelf*)
+    sh-*-netbsdelf* | shl*-*-netbsdelf* | sh5-*-netbsd* | sh5l*-*-netbsd* | \
+      sh64-*-netbsd* | sh64l*-*-netbsd*)
 	$1=-fpic
 	;;
     # Default to -fPIC unless specified otherwise.
-- 
cgit v1.1


From fc3f36f9a1642338da8c0428ea5dc140402556c9 Mon Sep 17 00:00:00 2001
From: Michael Meissner <meissner@linux.vnet.ibm.com>
Date: Wed, 7 Dec 2016 23:52:05 +0000
Subject: re PR target/72717 (ICE: in emit_move_insn, at expr.c:3693 with
 vector shift @ powerpc64le)

[gcc]
2016-12-07  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/72717
	* config/rs6000/rs6000.c (rs6000_expand_vector_init): If the
	V2DImode elements are SUBREG's convert the result into DImode
	rather than failing in emit_move_insn.

[gcc/testsuite]
2016-12-07  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/72717
	* gcc.target/powerpc/pr72717.c: New test.

From-SVN: r243418
---
 gcc/ChangeLog                              |  7 ++++++
 gcc/config/rs6000/rs6000.c                 | 38 ++++++++++++++++++++++--------
 gcc/testsuite/ChangeLog                    |  5 ++++
 gcc/testsuite/gcc.target/powerpc/pr72717.c | 20 ++++++++++++++++
 4 files changed, 60 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr72717.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index cc6dc71..79d5d6d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2016-12-07  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/72717
+	* config/rs6000/rs6000.c (rs6000_expand_vector_init): If the
+	V2DImode elements are SUBREG's convert the result into DImode
+	rather than failing in emit_move_insn.
+
 2016-12-07  Jakub Jelinek  <jakub@redhat.com>
 
 	* builtins.c (fold_builtin_strstr): Removed.
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index b75a290..f0c1354 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6891,25 +6891,43 @@ rs6000_expand_vector_init (rtx target, rtx vals)
   /* Double word values on VSX can use xxpermdi or lxvdsx.  */
   if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
     {
-      rtx op0 = XVECEXP (vals, 0, 0);
-      rtx op1 = XVECEXP (vals, 0, 1);
+      rtx op[2];
+      size_t i;
+      size_t num_elements = all_same ? 1 : 2;
+      for (i = 0; i < num_elements; i++)
+	{
+	  op[i] = XVECEXP (vals, 0, i);
+	  /* Just in case there is a SUBREG with a smaller mode, do a
+	     conversion.  */
+	  if (GET_MODE (op[i]) != inner_mode)
+	    {
+	      rtx tmp = gen_reg_rtx (inner_mode);
+	      convert_move (tmp, op[i], 0);
+	      op[i] = tmp;
+	    }
+	  /* Allow load with splat double word.  */
+	  else if (MEM_P (op[i]))
+	    {
+	      if (!all_same)
+		op[i] = force_reg (inner_mode, op[i]);
+	    }
+	  else if (!REG_P (op[i]))
+	    op[i] = force_reg (inner_mode, op[i]);
+	}
+
       if (all_same)
 	{
-	  if (!MEM_P (op0) && !REG_P (op0))
-	    op0 = force_reg (inner_mode, op0);
 	  if (mode == V2DFmode)
-	    emit_insn (gen_vsx_splat_v2df (target, op0));
+	    emit_insn (gen_vsx_splat_v2df (target, op[0]));
 	  else
-	    emit_insn (gen_vsx_splat_v2di (target, op0));
+	    emit_insn (gen_vsx_splat_v2di (target, op[0]));
 	}
       else
 	{
-	  op0 = force_reg (inner_mode, op0);
-	  op1 = force_reg (inner_mode, op1);
 	  if (mode == V2DFmode)
-	    emit_insn (gen_vsx_concat_v2df (target, op0, op1));
+	    emit_insn (gen_vsx_concat_v2df (target, op[0], op[1]));
 	  else
-	    emit_insn (gen_vsx_concat_v2di (target, op0, op1));
+	    emit_insn (gen_vsx_concat_v2di (target, op[0], op[1]));
 	}
       return;
     }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 1c3f290..33f1f86 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-07  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	PR target/72717
+	* gcc.target/powerpc/pr72717.c: New test.
+
 2016-12-07  Jakub Jelinek  <jakub@redhat.com>
 
 	* gcc.dg/builtin-strstr-1.c: New test.
diff --git a/gcc/testsuite/gcc.target/powerpc/pr72717.c b/gcc/testsuite/gcc.target/powerpc/pr72717.c
new file mode 100644
index 0000000..1446098
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr72717.c
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+typedef long V __attribute__((__vector_size__(32)));
+
+extern void foo (V *, V*);
+
+/* This test generated an failure in emit_move_insn.  */
+
+void
+foo(V *p, V *q)
+{
+  V v = *q;
+  *p = v << v[0];
+}
+
+/* { dg-final { scan-assembler-times "vsld" 2 } } */
-- 
cgit v1.1


From ee92e7bafb52d267f52e4c48edada5f279bcf591 Mon Sep 17 00:00:00 2001
From: Martin Sebor <msebor@redhat.com>
Date: Thu, 8 Dec 2016 00:01:33 +0000
Subject: PR c/53562 - Add -Werror= support for -D_FORTIFY_SOURCE /
 __builtin___memcpy_chk

PR c/53562 - Add -Werror= support for -D_FORTIFY_SOURCE / __builtin___memcpy_chk
PR middle-end/77784 - duplicate warning for snprintf when n > object size
PR middle-end/78149 - missing warning on strncpy buffer overflow due to an excessive bound
PR middle-end/78138 - missing warnings on buffer overflow with non-constant source length

gcc/c-family/ChangeLog:

	PR c/53562
	PR middle-end/77784
	PR middle-end/78149
	PR middle-end/78138
	* c.opt (-Wstringop-overflow): New option.

gcc/ChangeLog:

	PR middle-end/77784
	PR middle-end/78149
	PR middle-end/78138

	* builtins.c (expand_builtin_strcat, expand_builtin_strncat): New
	functions.
	(compute_dest_size, get_size_range, check_sizes, check_strncat_sizes)
	(check_memop_sizes): Same.
	(expand_builtin_memcpy): Call check memop_sizes.
	(expand_builtin_mempcpy): Same.
	(expand_builtin_memset): Same,
	(expand_builtin_bzero): Same.
	(expand_builtin_memory_chk): Call check_sizes.
	(expand_builtin_strcpy): Same.
	(expand_builtin_strncpy): Same.
	(maybe_emit_sprintf_chk_warning): Same.
	(expand_builtin): Handle strcat and strncat.
	(fini_object_sizes): Reset pointers.
	(compute_object_size): New function.
	* gimple-ssa-sprintf.c (pass_sprintf_length::handle_gimple_call):
	Avoid issuing warnings also issued during built-in expansion.
	* doc/invoke.texi (Warning Options): Document -Wstringop-overflow.

gcc/testsuite/ChangeLog:

	PR middle-end/77784
	PR middle-end/78149
	PR middle-end/78138

	* c-c++-common/Wsizeof-pointer-memaccess2.c: Adjust expected diagnostic.
	* g++.dg/ext/builtin-object-size3.C (bar): Same.
	* g++.dg/ext/strncpy-chk1.C: Same.
	* g++.dg/opt/memcpy1.C: Same.
	* g++.dg/torture/Wsizeof-pointer-memaccess1.C: Same.
	* gcc.c-torture/compile/pr55569.c: Disable -Wstringop-overflow.
	* gcc.dg/Wobjsize-1.c: Adjust expected diagnostic.
	* gcc.dg/attr-alloc_size.c: Same.
	* gcc.dg/builtin-stringop-chk-1.c: Adjust expected diagnostic.
	* gcc.dg/builtin-stringop-chk-2.c: Same.
	* gcc.dg/builtin-stringop-chk-4.c: New test.
	* gcc.dg/builtin-strncat-chk-1.c: Adjust expected diagnostic.
	* gcc.dg/memcpy-2.c: Same.
	* gcc.dg/pr40340-1.c: Same.
	* gcc.dg/pr40340-2.c (main): Same.
	* gcc.dg/pr40340-5.c (main): Same.
	* gcc.dg/torture/Wsizeof-pointer-memaccess1.c: Same.
	* gcc.dg/torture/pr71132.c: Disable -Wstringop-overflow.
	* gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Adjust text of expected
	warning.
	* gfortran.dg/char_length_3.f90: Prune expected warnings.
	* gfortran.dg/pr38868.f: Add expected warnings.

From-SVN: r243419
---
 gcc/ChangeLog                                      |  25 +
 gcc/builtins.c                                     | 673 ++++++++++++++++++---
 gcc/c-family/ChangeLog                             |   8 +
 gcc/c-family/c.opt                                 |  10 +
 gcc/doc/invoke.texi                                |  81 +++
 gcc/gimple-ssa-sprintf.c                           |  61 +-
 gcc/testsuite/ChangeLog                            |  29 +
 .../c-c++-common/Wsizeof-pointer-memaccess2.c      |   2 +-
 gcc/testsuite/g++.dg/ext/builtin-object-size3.C    |   4 +-
 gcc/testsuite/g++.dg/ext/strncpy-chk1.C            |   2 +-
 gcc/testsuite/g++.dg/opt/memcpy1.C                 |   5 +-
 .../g++.dg/torture/Wsizeof-pointer-memaccess1.C    |   2 +-
 .../g++.dg/torture/Wsizeof-pointer-memaccess2.C    |   5 +-
 gcc/testsuite/gcc.c-torture/compile/pr55569.c      |  10 +-
 gcc/testsuite/gcc.dg/Wobjsize-1.c                  |   2 +-
 gcc/testsuite/gcc.dg/attr-alloc_size.c             |   8 +-
 gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c      |  54 +-
 gcc/testsuite/gcc.dg/builtin-stringop-chk-2.c      |   4 +-
 gcc/testsuite/gcc.dg/builtin-stringop-chk-4.c      | 525 ++++++++++++++++
 gcc/testsuite/gcc.dg/builtin-stringop-chk-5.c      | 260 ++++++++
 gcc/testsuite/gcc.dg/builtin-stringop-chk-6.c      | 112 ++++
 gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c       |   8 +-
 gcc/testsuite/gcc.dg/fstack-protector-strong.c     |   2 +-
 gcc/testsuite/gcc.dg/memcpy-2.c                    |   2 +-
 gcc/testsuite/gcc.dg/pr40340-1.c                   |   2 +-
 gcc/testsuite/gcc.dg/pr40340-2.c                   |   2 +-
 gcc/testsuite/gcc.dg/pr40340-5.c                   |   2 +-
 .../gcc.dg/torture/Wsizeof-pointer-memaccess1.c    |   2 +-
 gcc/testsuite/gcc.dg/torture/pr71132.c             |   5 +
 .../gcc.dg/tree-ssa/builtin-sprintf-warn-1.c       |  16 +-
 .../gcc.dg/tree-ssa/builtin-sprintf-warn-3.c       |  42 +-
 gcc/testsuite/gfortran.dg/char_length_3.f90        |   3 +
 gcc/testsuite/gfortran.dg/pr38868.f                |   2 +-
 33 files changed, 1779 insertions(+), 191 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/builtin-stringop-chk-4.c
 create mode 100644 gcc/testsuite/gcc.dg/builtin-stringop-chk-5.c
 create mode 100644 gcc/testsuite/gcc.dg/builtin-stringop-chk-6.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 79d5d6d..6372ff4 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,28 @@
+2016-12-07  Martin Sebor  <msebor@redhat.com>
+
+	PR middle-end/77784
+	PR middle-end/78149
+	PR middle-end/78138
+	
+	* builtins.c (expand_builtin_strcat, expand_builtin_strncat): New
+	functions.
+	(compute_dest_size, get_size_range, check_sizes, check_strncat_sizes)
+	(check_memop_sizes): Same.
+	(expand_builtin_memcpy): Call check memop_sizes.
+	(expand_builtin_mempcpy): Same.
+	(expand_builtin_memset): Same,
+	(expand_builtin_bzero): Same.
+	(expand_builtin_memory_chk): Call check_sizes.
+	(expand_builtin_strcpy): Same.
+	(expand_builtin_strncpy): Same.
+	(maybe_emit_sprintf_chk_warning): Same.
+	(expand_builtin): Handle strcat and strncat.
+	(fini_object_sizes): Reset pointers.
+	(compute_object_size): New function.
+	* gimple-ssa-sprintf.c (pass_sprintf_length::handle_gimple_call):
+	Avoid issuing warnings also issued during built-in expansion.
+	* doc/invoke.texi (Warning Options): Document -Wstringop-overflow.
+
 2016-12-07  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/72717
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 58ed469..b58056c 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -67,7 +67,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "internal-fn.h"
 #include "case-cfn-macros.h"
 #include "gimple-fold.h"
-
+#include "intl.h"
 
 struct target_builtins default_target_builtins;
 #if SWITCHABLE_TARGET
@@ -125,9 +125,11 @@ static rtx expand_builtin_mempcpy (tree, rtx, machine_mode);
 static rtx expand_builtin_mempcpy_with_bounds (tree, rtx, machine_mode);
 static rtx expand_builtin_mempcpy_args (tree, tree, tree, rtx,
 					machine_mode, int, tree);
+static rtx expand_builtin_strcat (tree, rtx);
 static rtx expand_builtin_strcpy (tree, rtx);
 static rtx expand_builtin_strcpy_args (tree, tree, rtx);
 static rtx expand_builtin_stpcpy (tree, rtx, machine_mode);
+static rtx expand_builtin_strncat (tree, rtx);
 static rtx expand_builtin_strncpy (tree, rtx);
 static rtx builtin_memset_gen_str (void *, HOST_WIDE_INT, machine_mode);
 static rtx expand_builtin_memset (tree, rtx, machine_mode);
@@ -3010,6 +3012,292 @@ expand_builtin_memcpy_args (tree dest, tree src, tree len, rtx target, tree exp)
   return dest_addr;
 }
 
+/* Fill the 2-element RANGE array with the minimum and maximum values
+   EXP is known to have and return true, otherwise null and return
+   false.  */
+
+static bool
+get_size_range (tree exp, tree range[2])
+{
+  if (tree_fits_uhwi_p (exp))
+    {
+      range[0] = range[1] = exp;
+      return true;
+    }
+
+  if (TREE_CODE (exp) == SSA_NAME)
+    {
+      wide_int min, max;
+      enum value_range_type range_type = get_range_info (exp, &min, &max);
+
+      if (range_type == VR_RANGE)
+	{
+	  /* Interpret the bound in the variable's type.  */
+	  range[0] = wide_int_to_tree (TREE_TYPE (exp), min);
+	  range[1] = wide_int_to_tree (TREE_TYPE (exp), max);
+	  return true;
+	}
+      else if (range_type == VR_ANTI_RANGE)
+	{
+	  /* FIXME: Handle anti-ranges.  */
+	}
+    }
+
+  range[0] = NULL_TREE;
+  range[1] = NULL_TREE;
+  return false;
+}
+
+/* Try to verify that the sizes and lengths of the arguments to a string
+   manipulation function given by EXP are within valid bounds and that
+   the operation does not lead to buffer overflow.  Arguments other than
+   EXP may be null.  When non-null, the arguments have the following
+   meaning:
+   SIZE is the user-supplied size argument to the function (such as in
+   memcpy(d, s, SIZE) or strncpy(d, s, SIZE).  It specifies the exact
+   number of bytes to write.
+   MAXLEN is the user-supplied bound on the length of the source sequence
+   (such as in strncat(d, s, N).  It specifies the upper limit on the number
+   of bytes to write.
+   STR is the source string (such as in strcpy(d, s)) when the epxression
+   EXP is a string function call (as opposed to a memory call like memcpy).
+   As an exception, STR can also be an integer denoting the precomputed
+   length of the source string.
+   OBJSIZE is the size of the destination object specified by the last
+   argument to the _chk builtins, typically resulting from the expansion
+   of __builtin_object_size (such as in __builtin___strcpy_chk(d, s,
+   OBJSIZE).
+
+   When SIZE is null LEN is checked to verify that it doesn't exceed
+   SIZE_MAX.
+
+   If the call is successfully verified as safe from buffer overflow
+   the function returns true, otherwise false..  */
+
+static bool
+check_sizes (int opt, tree exp, tree size, tree maxlen, tree str, tree objsize)
+{
+  /* The size of the largest object is half the address space, or
+     SSIZE_MAX.  (This is way too permissive.)  */
+  tree maxobjsize = TYPE_MAX_VALUE (ssizetype);
+
+  tree slen = NULL_TREE;
+
+  /* Set to true when the exact number of bytes written by a string
+     function like strcpy is not known and the only thing that is
+     known is that it must be at least one (for the terminating nul).  */
+  bool at_least_one = false;
+  if (str)
+    {
+      /* STR is normally a pointer to string but as a special case
+	 it can be an integer denoting the length of a string.  */
+      if (TREE_CODE (TREE_TYPE (str)) == POINTER_TYPE)
+	{
+	  /* Try to determine the range of lengths the source string
+	     refers to.  If it can be determined add one to it for
+	     the terminating nul.  Otherwise, set it to one for
+	     the same reason.  */
+	  tree lenrange[2];
+	  get_range_strlen (str, lenrange);
+	  if (lenrange[0])
+	    slen = fold_build2 (PLUS_EXPR, size_type_node, lenrange[0],
+				size_one_node);
+	  else
+	    {
+	      at_least_one = true;
+	      slen = size_one_node;
+	    }
+	}
+      else
+	slen = str;
+    }
+
+  if (!size && !maxlen)
+    {
+      /* When the only available piece of data is the object size
+	 there is nothing to do.  */
+      if (!slen)
+	return true;
+
+      /* Otherwise, when the length of the source sequence is known
+	 (as with with strlen), set SIZE to it.  */
+      size = slen;
+    }
+
+  if (!objsize)
+    objsize = maxobjsize;
+
+  /* The SIZE is exact if it's non-null, constant, and in range of
+     unsigned HOST_WIDE_INT.  */
+  bool exactsize = size && tree_fits_uhwi_p (size);
+
+  tree range[2] = { NULL_TREE, NULL_TREE };
+  if (size)
+    get_size_range (size, range);
+
+  /* First check the number of bytes to be written against the maximum
+     object size.  */
+  if (range[0] && tree_int_cst_lt (maxobjsize, range[0]))
+    {
+      location_t loc = tree_nonartificial_location (exp);
+
+      if (range[0] == range[1])
+	warning_at (loc, opt,
+		    "%K%qD: specified size %wu "
+		    "exceeds maximum object size %wu",
+		    exp, get_callee_fndecl (exp),
+		    tree_to_uhwi (range[0]),
+		    tree_to_uhwi (maxobjsize));
+	  else
+	    warning_at (loc, opt,
+			"%K%qD: specified size between %wu and %wu "
+			"exceeds maximum object size %wu",
+			exp, get_callee_fndecl (exp),
+			tree_to_uhwi (range[0]),
+			tree_to_uhwi (range[1]),
+			tree_to_uhwi (maxobjsize));
+      return false;
+    }
+
+  /* Next check the number of bytes to be written against the destination
+     object size.  */
+  if (range[0] || !exactsize || integer_all_onesp (size))
+    {
+      if (range[0]
+	  && ((tree_fits_uhwi_p (objsize)
+	       && tree_int_cst_lt (objsize, range[0]))
+	      || (tree_fits_uhwi_p (size)
+		  && tree_int_cst_lt (size, range[0]))))
+	{
+	  unsigned HOST_WIDE_INT uwir0 = tree_to_uhwi (range[0]);
+
+	  location_t loc = tree_nonartificial_location (exp);
+
+	  if (at_least_one)
+	    warning_at (loc, opt,
+			"%K%qD: writing at least %wu byte into a region "
+			"of size %wu overflows the destination",
+			exp, get_callee_fndecl (exp), uwir0,
+			tree_to_uhwi (objsize));
+	  else if (range[0] == range[1])
+	    warning_at (loc, opt,
+			(uwir0 == 1
+			 ? G_("%K%qD: writing %wu byte into a region "
+			      "of size %wu overflows the destination")
+			 : G_("%K%qD writing %wu bytes into a region "
+			      "of size %wu overflows the destination")),
+			exp, get_callee_fndecl (exp), uwir0,
+			tree_to_uhwi (objsize));
+	  else
+	    warning_at (loc, opt,
+			"%K%qD: writing between %wu and %wu bytes "
+			"into a region of size %wu overflows "
+			"the destination",
+			exp, get_callee_fndecl (exp), uwir0,
+			tree_to_uhwi (range[1]), tree_to_uhwi (objsize));
+
+	  /* Return error when an overflow has been detected.  */
+	  return false;
+	}
+    }
+
+  /* Check the maximum length of the source sequence against the size
+     of the destination object if known, or against the maximum size
+     of an object.  */
+  if (maxlen)
+    {
+      get_size_range (maxlen, range);
+
+      if (range[0] && objsize && tree_fits_uhwi_p (objsize))
+	{
+	  location_t loc = tree_nonartificial_location (exp);
+
+	  if (tree_int_cst_lt (maxobjsize, range[0]))
+	    {
+	      /* Warn about crazy big sizes first since that's more
+		 likely to be meaningful than saying that the bound
+		 is greater than the object size if both are big.  */
+	      if (range[0] == range[1])
+		warning_at (loc, opt,
+			    "%K%qD: specified bound %wu "
+			    "exceeds maximum object size %wu",
+			    exp, get_callee_fndecl (exp),
+			    tree_to_uhwi (range[0]),
+			    tree_to_uhwi (maxobjsize));
+	      else
+		warning_at (loc, opt,
+			    "%K%qD: specified bound between %wu and %wu "
+			    " exceeds maximum object size %wu",
+			    exp, get_callee_fndecl (exp),
+			    tree_to_uhwi (range[0]),
+			    tree_to_uhwi (range[1]),
+			    tree_to_uhwi (maxobjsize));
+
+	      return false;
+	    }
+
+	  if (objsize != maxobjsize && tree_int_cst_lt (objsize, range[0]))
+	    {
+	      if (range[0] == range[1])
+		warning_at (loc, opt,
+			    "%K%qD: specified bound %wu "
+			    "exceeds the size %wu of the destination",
+			    exp, get_callee_fndecl (exp),
+			    tree_to_uhwi (range[0]),
+			    tree_to_uhwi (objsize));
+	      else
+		warning_at (loc, opt,
+			    "%K%qD: specified bound between %wu and %wu "
+			    " exceeds the size %wu of the destination",
+			    exp, get_callee_fndecl (exp),
+			    tree_to_uhwi (range[0]),
+			    tree_to_uhwi (range[1]),
+			    tree_to_uhwi (objsize));
+	      return false;
+	    }
+	}
+    }
+
+  return true;
+}
+
+/* Helper to compute the size of the object referenced by the DEST
+   expression which must of of pointer type, using Object Size type
+   OSTYPE (only the least significant 2 bits are used).  Return
+   the size of the object if successful or NULL when the size cannot
+   be determined.  */
+
+static inline tree
+compute_dest_size (tree dest, int ostype)
+{
+  unsigned HOST_WIDE_INT size;
+  if (compute_builtin_object_size (dest, ostype & 3, &size))
+    return build_int_cst (sizetype, size);
+
+  return NULL_TREE;
+}
+
+/* Helper to determine and check the sizes of the source and the destination
+   of calls to __builtin_{bzero,memcpy,memset} calls.  Use Object Size type-0
+   regardless of the OPT_Wstringop_overflow_ setting.  Returns true on success
+   (no overflow or invalid sizes), false otherwise.  */
+
+static bool
+check_memop_sizes (tree exp, tree dest, tree size)
+{
+  if (!warn_stringop_overflow)
+    return true;
+
+  /* For functions like memset and memcpy that operate on raw memory
+     try to determine the size of the largest destination object using
+     type-0 Object Size regardless of the object size type specified
+     by the option.  */
+  tree objsize = compute_dest_size (dest, 0);
+
+  return check_sizes (OPT_Wstringop_overflow_, exp,
+		      size, /*maxlen=*/NULL_TREE, /*str=*/NULL_TREE, objsize);
+}
+
 /* Expand a call EXP to the memcpy builtin.
    Return NULL_RTX if we failed, the caller should emit a normal call,
    otherwise try to get the result in TARGET, if convenient (and in
@@ -3021,13 +3309,14 @@ expand_builtin_memcpy (tree exp, rtx target)
   if (!validate_arglist (exp,
  			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
-  else
-    {
-      tree dest = CALL_EXPR_ARG (exp, 0);
-      tree src = CALL_EXPR_ARG (exp, 1);
-      tree len = CALL_EXPR_ARG (exp, 2);
-      return expand_builtin_memcpy_args (dest, src, len, target, exp);
-    }
+
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree src = CALL_EXPR_ARG (exp, 1);
+  tree len = CALL_EXPR_ARG (exp, 2);
+
+  check_memop_sizes (exp, dest, len);
+
+  return expand_builtin_memcpy_args (dest, src, len, target, exp);
 }
 
 /* Expand an instrumented call EXP to the memcpy builtin.
@@ -3075,15 +3364,20 @@ expand_builtin_mempcpy (tree exp, rtx target, machine_mode mode)
   if (!validate_arglist (exp,
  			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
-  else
-    {
-      tree dest = CALL_EXPR_ARG (exp, 0);
-      tree src = CALL_EXPR_ARG (exp, 1);
-      tree len = CALL_EXPR_ARG (exp, 2);
-      return expand_builtin_mempcpy_args (dest, src, len,
-					  target, mode, /*endp=*/ 1,
-					  exp);
-    }
+
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree src = CALL_EXPR_ARG (exp, 1);
+  tree len = CALL_EXPR_ARG (exp, 2);
+
+  /* Avoid expanding mempcpy into memcpy when the call is determined
+     to overflow the buffer.  This also prevents the same overflow
+     from being diagnosed again when expanding memcpy.  */
+  if (!check_memop_sizes (exp, dest, len))
+    return NULL_RTX;
+
+  return expand_builtin_mempcpy_args (dest, src, len,
+				      target, mode, /*endp=*/ 1,
+				      exp);
 }
 
 /* Expand an instrumented call EXP to the mempcpy builtin.
@@ -3255,6 +3549,33 @@ expand_movstr (tree dest, tree src, rtx target, int endp)
   return target;
 }
 
+/* Do some very basic size validation of a call to the strcpy builtin
+   given by EXP.  Return NULL_RTX to have the built-in expand to a call
+   to the library function.  */
+
+static rtx
+expand_builtin_strcat (tree exp, rtx)
+{
+  if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE)
+      || !warn_stringop_overflow)
+    return NULL_RTX;
+
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree src = CALL_EXPR_ARG (exp, 1);
+
+  /* There is no way here to determine the length of the string in
+     the destination to which the SRC string is being appended so
+     just diagnose cases when the souce string is longer than
+     the destination object.  */
+
+  tree destsize = compute_dest_size (dest, warn_stringop_overflow - 1);
+
+  check_sizes (OPT_Wstringop_overflow_,
+	       exp, /*size=*/NULL_TREE, /*maxlen=*/NULL_TREE, src, destsize);
+
+  return NULL_RTX;
+}
+
 /* Expand expression EXP, which is a call to the strcpy builtin.  Return
    NULL_RTX if we failed the caller should emit a normal call, otherwise
    try to get the result in TARGET, if convenient (and in mode MODE if that's
@@ -3263,13 +3584,20 @@ expand_movstr (tree dest, tree src, rtx target, int endp)
 static rtx
 expand_builtin_strcpy (tree exp, rtx target)
 {
-  if (validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE))
-   {
-     tree dest = CALL_EXPR_ARG (exp, 0);
-     tree src = CALL_EXPR_ARG (exp, 1);
-     return expand_builtin_strcpy_args (dest, src, target);
-   }
-   return NULL_RTX;
+  if (!validate_arglist (exp, POINTER_TYPE, POINTER_TYPE, VOID_TYPE))
+    return NULL_RTX;
+
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree src = CALL_EXPR_ARG (exp, 1);
+
+  if (warn_stringop_overflow)
+    {
+      tree destsize = compute_dest_size (dest, warn_stringop_overflow - 1);
+      check_sizes (OPT_Wstringop_overflow_,
+		   exp, /*size=*/NULL_TREE, /*maxlen=*/NULL_TREE, src, destsize);
+    }
+
+  return expand_builtin_strcpy_args (dest, src, target);
 }
 
 /* Helper function to do the actual work for expand_builtin_strcpy.  The
@@ -3377,6 +3705,131 @@ builtin_strncpy_read_str (void *data, HOST_WIDE_INT offset,
   return c_readstr (str + offset, mode);
 }
 
+/* Helper to check the sizes of sequences and the destination of calls
+   to __builtin_strncat and __builtin___strncat_chk.  Returns true on
+   success (no overflow or invalid sizes), false otherwise.  */
+
+static bool
+check_strncat_sizes (tree exp, tree objsize)
+{
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree src = CALL_EXPR_ARG (exp, 1);
+  tree maxlen = CALL_EXPR_ARG (exp, 2);
+
+  /* Try to determine the range of lengths that the source expression
+     refers to.  */
+  tree lenrange[2];
+  get_range_strlen (src, lenrange);
+
+  /* Try to verify that the destination is big enough for the shortest
+     string.  */
+
+  if (!objsize && warn_stringop_overflow)
+    {
+      /* If it hasn't been provided by __strncat_chk, try to determine
+	 the size of the destination object into which the source is
+	 being copied.  */
+      objsize = compute_dest_size (dest, warn_stringop_overflow - 1);
+    }
+
+  /* Add one for the terminating nul.  */
+  tree srclen = (lenrange[0]
+		 ? fold_build2 (PLUS_EXPR, size_type_node, lenrange[0],
+				size_one_node)
+		 : NULL_TREE);
+
+  /* Strncat copies at most MAXLEN bytes and always appends the terminating
+     nul so the specified upper bound should never be equal to (or greater
+     than) the size of the destination.  */
+  if (tree_fits_uhwi_p (maxlen) && tree_fits_uhwi_p (objsize)
+      && tree_int_cst_equal (objsize, maxlen))
+    {
+      warning_at (EXPR_LOCATION (exp), OPT_Wstringop_overflow_,
+		  "specified bound %wu "
+		  "equals the size of the destination",
+		  tree_to_uhwi (maxlen));
+
+      return false;
+    }
+
+  if (!srclen
+      || (maxlen && tree_fits_uhwi_p (maxlen)
+	  && tree_fits_uhwi_p (srclen)
+	  && tree_int_cst_lt (maxlen, srclen)))
+    srclen = maxlen;
+
+  /* The number of bytes to write is LEN but check_sizes will also
+     check SRCLEN if LEN's value isn't known.  */
+  return check_sizes (OPT_Wstringop_overflow_,
+		      exp, /*size=*/NULL_TREE, maxlen, srclen, objsize);
+}
+
+/* Similar to expand_builtin_strcat, do some very basic size validation
+   of a call to the strcpy builtin given by EXP.  Return NULL_RTX to have
+   the built-in expand to a call to the library function.  */
+
+static rtx
+expand_builtin_strncat (tree exp, rtx)
+{
+  if (!validate_arglist (exp,
+			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE)
+      || !warn_stringop_overflow)
+    return NULL_RTX;
+
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree src = CALL_EXPR_ARG (exp, 1);
+  /* The upper bound on the number of bytes to write.  */
+  tree maxlen = CALL_EXPR_ARG (exp, 2);
+  /* The length of the source sequence.  */
+  tree slen = c_strlen (src, 1);
+
+  /* Try to determine the range of lengths that the source expression
+     refers to.  */
+  tree lenrange[2];
+  if (slen)
+    lenrange[0] = lenrange[1] = slen;
+  else
+    get_range_strlen (src, lenrange);
+
+  /* Try to verify that the destination is big enough for the shortest
+     string.  First try to determine the size of the destination object
+     into which the source is being copied.  */
+  tree destsize = compute_dest_size (dest, warn_stringop_overflow - 1);
+
+  /* Add one for the terminating nul.  */
+  tree srclen = (lenrange[0]
+		 ? fold_build2 (PLUS_EXPR, size_type_node, lenrange[0],
+				size_one_node)
+		 : NULL_TREE);
+
+  /* Strncat copies at most MAXLEN bytes and always appends the terminating
+     nul so the specified upper bound should never be equal to (or greater
+     than) the size of the destination.  */
+  if (tree_fits_uhwi_p (maxlen) && tree_fits_uhwi_p (destsize)
+      && tree_int_cst_equal (destsize, maxlen))
+    {
+      warning_at (EXPR_LOCATION (exp), OPT_Wstringop_overflow_,
+		  "specified bound %wu "
+		  "equals the size of the destination",
+		  tree_to_uhwi (maxlen));
+
+      return NULL_RTX;
+    }
+
+  if (!srclen
+      || (maxlen && tree_fits_uhwi_p (maxlen)
+	  && tree_fits_uhwi_p (srclen)
+	  && tree_int_cst_lt (maxlen, srclen)))
+    srclen = maxlen;
+
+  /* The number of bytes to write is LEN but check_sizes will also
+     check SRCLEN if LEN's value isn't known.  */
+  check_sizes (OPT_Wstringop_overflow_,
+	       exp, /*size=*/NULL_TREE, maxlen, srclen, destsize);
+
+  return NULL_RTX;
+}
+
 /* Expand expression EXP, which is a call to the strncpy builtin.  Return
    NULL_RTX if we failed the caller should emit a normal call.  */
 
@@ -3390,9 +3843,33 @@ expand_builtin_strncpy (tree exp, rtx target)
     {
       tree dest = CALL_EXPR_ARG (exp, 0);
       tree src = CALL_EXPR_ARG (exp, 1);
+      /* The number of bytes to write (not the maximum).  */
       tree len = CALL_EXPR_ARG (exp, 2);
+      /* The length of the source sequence.  */
       tree slen = c_strlen (src, 1);
 
+      if (warn_stringop_overflow)
+	{
+	  /* Try to determine the range of lengths that the source expression
+	     refers to.  */
+	  tree lenrange[2];
+	  if (slen)
+	    lenrange[0] = lenrange[1] = slen;
+	  else
+	    {
+	      get_range_strlen (src, lenrange);
+	      slen = lenrange[0];
+	    }
+
+	  tree destsize = compute_dest_size (dest,
+					     warn_stringop_overflow - 1);
+
+	  /* The number of bytes to write is LEN but check_sizes will also
+	     check SLEN if LEN's value isn't known.  */
+	  check_sizes (OPT_Wstringop_overflow_,
+		       exp, len, /*maxlen=*/NULL_TREE, slen, destsize);
+	}
+
       /* We must be passed a constant len and src parameter.  */
       if (!tree_fits_uhwi_p (len) || !slen || !tree_fits_uhwi_p (slen))
 	return NULL_RTX;
@@ -3480,13 +3957,14 @@ expand_builtin_memset (tree exp, rtx target, machine_mode mode)
   if (!validate_arglist (exp,
  			 POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
-  else
-    {
-      tree dest = CALL_EXPR_ARG (exp, 0);
-      tree val = CALL_EXPR_ARG (exp, 1);
-      tree len = CALL_EXPR_ARG (exp, 2);
-      return expand_builtin_memset_args (dest, val, len, target, mode, exp);
-    }
+
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree val = CALL_EXPR_ARG (exp, 1);
+  tree len = CALL_EXPR_ARG (exp, 2);
+
+  check_memop_sizes (exp, dest, len);
+
+  return expand_builtin_memset_args (dest, val, len, target, mode, exp);
 }
 
 /* Expand expression EXP, which is an instrumented call to the memset builtin.
@@ -3667,20 +4145,21 @@ expand_builtin_memset_args (tree dest, tree val, tree len,
 static rtx
 expand_builtin_bzero (tree exp)
 {
-  tree dest, size;
-  location_t loc = EXPR_LOCATION (exp);
-
   if (!validate_arglist (exp, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
     return NULL_RTX;
 
-  dest = CALL_EXPR_ARG (exp, 0);
-  size = CALL_EXPR_ARG (exp, 1);
+  tree dest = CALL_EXPR_ARG (exp, 0);
+  tree size = CALL_EXPR_ARG (exp, 1);
+
+  check_memop_sizes (exp, dest, size);
 
   /* New argument list transforming bzero(ptr x, int y) to
      memset(ptr x, int 0, size_t y).   This is done this way
      so that if it isn't expanded inline, we fallback to
      calling bzero instead of memset.  */
 
+  location_t loc = EXPR_LOCATION (exp);
+
   return expand_builtin_memset_args (dest, integer_zero_node,
 				     fold_convert_loc (loc,
 						       size_type_node, size),
@@ -6205,12 +6684,24 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
 	return target;
       break;
 
+    case BUILT_IN_STRCAT:
+      target = expand_builtin_strcat (exp, target);
+      if (target)
+	return target;
+      break;
+
     case BUILT_IN_STRCPY:
       target = expand_builtin_strcpy (exp, target);
       if (target)
 	return target;
       break;
 
+    case BUILT_IN_STRNCAT:
+      target = expand_builtin_strncat (exp, target);
+      if (target)
+	return target;
+      break;
+
     case BUILT_IN_STRNCPY:
       target = expand_builtin_strncpy (exp, target);
       if (target)
@@ -9052,22 +9543,22 @@ expand_builtin_memory_chk (tree exp, rtx target, machine_mode mode,
   len = CALL_EXPR_ARG (exp, 2);
   size = CALL_EXPR_ARG (exp, 3);
 
-  if (! tree_fits_uhwi_p (size))
+  bool sizes_ok = check_sizes (OPT_Wstringop_overflow_,
+			       exp, len, /*maxlen=*/NULL_TREE,
+			       /*str=*/NULL_TREE, size);
+
+  if (!tree_fits_uhwi_p (size))
     return NULL_RTX;
 
   if (tree_fits_uhwi_p (len) || integer_all_onesp (size))
     {
-      tree fn;
-
-      if (! integer_all_onesp (size) && tree_int_cst_lt (size, len))
-	{
-	  warning_at (tree_nonartificial_location (exp),
-		      0, "%Kcall to %D will always overflow destination buffer",
-		      exp, get_callee_fndecl (exp));
-	  return NULL_RTX;
-	}
+      /* Avoid transforming the checking call to an ordinary one when
+	 an overflow has been detected or when the call couldn't be
+	 validated because the size is not constant.  */
+      if (!sizes_ok && !integer_all_onesp (size) && tree_int_cst_lt (size, len))
+	return NULL_RTX;
 
-      fn = NULL_TREE;
+      tree fn = NULL_TREE;
       /* If __builtin_mem{cpy,pcpy,move,set}_chk is used, assume
 	 mem{cpy,pcpy,move,set} is available.  */
       switch (fcode)
@@ -9153,68 +9644,68 @@ expand_builtin_memory_chk (tree exp, rtx target, machine_mode mode,
 static void
 maybe_emit_chk_warning (tree exp, enum built_in_function fcode)
 {
-  int is_strlen = 0;
-  tree len, size;
-  location_t loc = tree_nonartificial_location (exp);
+  /* The source string.  */
+  tree srcstr = NULL_TREE;
+  /* The size of the destination object.  */
+  tree objsize = NULL_TREE;
+  /* The string that is being concatenated with (as in __strcat_chk)
+     or null if it isn't.  */
+  tree catstr = NULL_TREE;
+  /* The maximum length of the source sequence in a bounded operation
+     (such as __strncat_chk) or null if the operation isn't bounded
+     (such as __strcat_chk).  */
+  tree maxlen = NULL_TREE;
 
   switch (fcode)
     {
     case BUILT_IN_STRCPY_CHK:
     case BUILT_IN_STPCPY_CHK:
-    /* For __strcat_chk the warning will be emitted only if overflowing
-       by at least strlen (dest) + 1 bytes.  */
+      srcstr = CALL_EXPR_ARG (exp, 1);
+      objsize = CALL_EXPR_ARG (exp, 2);
+      break;
+
     case BUILT_IN_STRCAT_CHK:
-      len = CALL_EXPR_ARG (exp, 1);
-      size = CALL_EXPR_ARG (exp, 2);
-      is_strlen = 1;
+      /* For __strcat_chk the warning will be emitted only if overflowing
+	 by at least strlen (dest) + 1 bytes.  */
+      catstr = CALL_EXPR_ARG (exp, 0);
+      srcstr = CALL_EXPR_ARG (exp, 1);
+      objsize = CALL_EXPR_ARG (exp, 2);
       break;
+
     case BUILT_IN_STRNCAT_CHK:
+      catstr = CALL_EXPR_ARG (exp, 0);
+      srcstr = CALL_EXPR_ARG (exp, 1);
+      maxlen = CALL_EXPR_ARG (exp, 2);
+      objsize = CALL_EXPR_ARG (exp, 3);
+      break;
+
     case BUILT_IN_STRNCPY_CHK:
     case BUILT_IN_STPNCPY_CHK:
-      len = CALL_EXPR_ARG (exp, 2);
-      size = CALL_EXPR_ARG (exp, 3);
+      srcstr = CALL_EXPR_ARG (exp, 1);
+      maxlen = CALL_EXPR_ARG (exp, 2);
+      objsize = CALL_EXPR_ARG (exp, 3);
       break;
+
     case BUILT_IN_SNPRINTF_CHK:
     case BUILT_IN_VSNPRINTF_CHK:
-      len = CALL_EXPR_ARG (exp, 1);
-      size = CALL_EXPR_ARG (exp, 3);
+      maxlen = CALL_EXPR_ARG (exp, 1);
+      objsize = CALL_EXPR_ARG (exp, 3);
       break;
     default:
       gcc_unreachable ();
     }
 
-  if (!len || !size)
-    return;
-
-  if (! tree_fits_uhwi_p (size) || integer_all_onesp (size))
-    return;
-
-  if (is_strlen)
+  if (catstr && maxlen)
     {
-      len = c_strlen (len, 1);
-      if (! len || ! tree_fits_uhwi_p (len) || tree_int_cst_lt (len, size))
+      /* Check __strncat_chk.  There is no way to determine the length
+	 of the string to which the source string is being appended so
+	 just warn when the length of the source string is not known.  */
+      if (!check_strncat_sizes (exp, objsize))
 	return;
     }
-  else if (fcode == BUILT_IN_STRNCAT_CHK)
-    {
-      tree src = CALL_EXPR_ARG (exp, 1);
-      if (! src || ! tree_fits_uhwi_p (len) || tree_int_cst_lt (len, size))
-	return;
-      src = c_strlen (src, 1);
-      if (! src || ! tree_fits_uhwi_p (src))
-	{
-	  warning_at (loc, 0, "%Kcall to %D might overflow destination buffer",
-		      exp, get_callee_fndecl (exp));
-	  return;
-	}
-      else if (tree_int_cst_lt (src, size))
-	return;
-    }
-  else if (! tree_fits_uhwi_p (len) || ! tree_int_cst_lt (size, len))
-    return;
 
-  warning_at (loc, 0, "%Kcall to %D will always overflow destination buffer",
-	      exp, get_callee_fndecl (exp));
+  check_sizes (OPT_Wstringop_overflow_, exp,
+	       /*size=*/NULL_TREE, maxlen, srcstr, objsize);
 }
 
 /* Emit warning if a buffer overflow is detected at compile time
@@ -9268,10 +9759,10 @@ maybe_emit_sprintf_chk_warning (tree exp, enum built_in_function fcode)
   else
     return;
 
-  if (! tree_int_cst_lt (len, size))
-    warning_at (tree_nonartificial_location (exp),
-		0, "%Kcall to %D will always overflow destination buffer",
-		exp, get_callee_fndecl (exp));
+  /* Add one for the terminating nul.  */
+  len = fold_build2 (PLUS_EXPR, TREE_TYPE (len), len, size_one_node);
+  check_sizes (OPT_Wstringop_overflow_,
+	       exp, /*size=*/NULL_TREE, /*maxlen=*/NULL_TREE, len, size);
 }
 
 /* Emit warning if a free is called with address of a variable.  */
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index 5890798..aba0b14 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-07  Martin Sebor  <msebor@redhat.com>
+
+	PR c/53562
+	PR middle-end/77784
+	PR middle-end/78149
+	PR middle-end/78138
+	* c.opt (-Wstringop-overflow): New option.
+
 2016-12-02  Maxim Ostapenko  <m.ostapenko@samsung.com>
 
 	* c-attribs.c (asan odr indicator): New attribute.
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 2d47d54..288e4ce 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -684,6 +684,16 @@ Wsizeof-array-argument
 C ObjC C++ ObjC++ Var(warn_sizeof_array_argument) Warning Init(1)
 Warn when sizeof is applied on a parameter declared as an array.
 
+Wstringop-overflow
+C ObjC C++ ObjC++ Warning Alias(Wstringop-overflow=, 2, 0)
+Warn about buffer overflow in string manipulation functions like memcpy
+and strcpy.
+
+Wstringop-overflow=
+C ObjC C++ ObjC++ Joined RejectNegative UInteger Var(warn_stringop_overflow) Init(2) Warning
+Under the control of Object Size type, warn about buffer overflow in string
+manipulation functions like memcpy and strcpy.
+
 Wsuggest-attribute=format
 C ObjC C++ ObjC++ Var(warn_suggest_attribute_format) Warning
 Warn about functions which might be candidates for format attributes.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 034ae98..5622c0f 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -304,6 +304,7 @@ Objective-C and Objective-C++ Dialects}.
 -Wsizeof-pointer-memaccess  -Wsizeof-array-argument @gol
 -Wstack-protector -Wstack-usage=@var{len} -Wstrict-aliasing @gol
 -Wstrict-aliasing=n -Wstrict-overflow -Wstrict-overflow=@var{n} @gol
+-Wstringop-overflow=@var{n} @gol
 -Wsuggest-attribute=@r{[}pure@r{|}const@r{|}noreturn@r{|}format@r{]} @gol
 -Wsuggest-final-types @gol -Wsuggest-final-methods -Wsuggest-override @gol
 -Wmissing-format-attribute -Wsubobject-linkage @gol
@@ -4936,6 +4937,86 @@ comparisons, so this warning level gives a very large number of
 false positives.
 @end table
 
+@item -Wstringop-overflow
+@itemx -Wstringop-overflow=@var{type}
+@opindex Wstringop-overflow
+@opindex Wno-stringop-overflow
+Warn for calls to string manipulation functions such as @code{memcpy} and
+@code{strcpy} that are determined to overflow the destination buffer.  The
+optional argument is one greater than the type of Object Size Checking to
+perform to determine the size of the destination.  @xref{Object Size Checking}.
+The argument is meaningful only for functions that operate on character arrays
+but not for raw memory functions like @code{memcpy} which always make use
+of Object Size type-0.  The option also warns for calls that specify a size
+in excess of the largest possible object or at most @code{SIZE_MAX / 2} bytes.
+The option produces the best results with optimization enabled but can detect
+a small subset of simple buffer overflows even without optimization in
+calls to the GCC built-in functions like @code{__builtin_memcpy} that
+correspond to the standard functions.  In any case, the option warns about
+just a subset of buffer overflows detected by the corresponding overflow
+checking built-ins.  For example, the option will issue a warning for
+the @code{strcpy} call below because it copies at least 5 characters
+(the string @code{"blue"} including the terminating NUL) into the buffer
+of size 4.
+
+@smallexample
+enum Color @{ blue, purple, yellow @};
+const char* f (enum Color clr)
+@{
+  static char buf [4];
+  const char *str;
+  switch (clr)
+    @{
+      case blue: str = "blue"; break;
+      case purple: str = "purple"; break;
+      case yellow: str = "yellow"; break;
+    @}
+
+  return strcpy (buf, str);   // warning here
+@}
+@end smallexample
+
+Option @option{-Wstringop-overflow=2} is enabled by default.
+
+@table @gcctabopt
+@item -Wstringop-overflow
+@item -Wstringop-overflow=1
+@opindex Wstringop-overflow
+@opindex Wno-stringop-overflow
+The @option{-Wstringop-overflow=1} option uses type-zero Object Size Checking
+to determine the sizes of destination objects.  This is the default setting
+of the option.  At this setting the option will not warn for writes past
+the end of subobjects of larger objects accessed by pointers unless the
+size of the largest surrounding object is known.  When the destination may
+be one of several objects it is assumed to be the largest one of them.  On
+Linux systems, when optimization is enabled at this setting the option warns
+for the same code as when the @code{_FORTIFY_SOURCE} macro is defined to
+a non-zero value.
+
+@item -Wstringop-overflow=2
+The @option{-Wstringop-overflow=2} option uses type-one Object Size Checking
+to determine the sizes of destination objects.  At this setting the option
+will warn about overflows when writing to members of the largest complete
+objects whose exact size is known.  It will, however, not warn for excessive
+writes to the same members of unknown objects referenced by pointers since
+they may point to arrays containing unknown numbers of elements.
+
+@item -Wstringop-overflow=3
+The @option{-Wstringop-overflow=3} option uses type-two Object Size Checking
+to determine the sizes of destination objects.  At this setting the option
+warns about overflowing the smallest object or data member.  This is the
+most restrictive setting of the option that may result in warnings for safe
+code.
+
+@item -Wstringop-overflow=4
+The @option{-Wstringop-overflow=4} option uses type-three Object Size Checking
+to determine the sizes of destination objects.  At this setting the option
+will warn about overflowing any data members, and when the destination is
+one of several objects it uses the size of the largest of them to decide
+whether to issue a warning.  Similarly to @option{-Wstringop-overflow=3} this
+setting of the option may result in warnings for benign code.
+@end table
+
 @item -Wsuggest-attribute=@r{[}pure@r{|}const@r{|}noreturn@r{|}format@r{]}
 @opindex Wsuggest-attribute=
 @opindex Wno-suggest-attribute=
diff --git a/gcc/gimple-ssa-sprintf.c b/gcc/gimple-ssa-sprintf.c
index e86c4dc..8de9a1e 100644
--- a/gcc/gimple-ssa-sprintf.c
+++ b/gcc/gimple-ssa-sprintf.c
@@ -774,7 +774,23 @@ get_width_and_precision (const conversion_spec &spec,
   if (spec.star_width)
     {
       if (TREE_CODE (spec.star_width) == INTEGER_CST)
-	width = abs (tree_to_shwi (spec.star_width));
+	{
+	  width = tree_to_shwi (spec.star_width);
+	  if (width < 0)
+	    {
+	      if (width == HOST_WIDE_INT_MIN)
+		{
+		  /* Avoid undefined behavior due to negating a minimum.
+		     This case will be diagnosed since it will result in
+		     more than INT_MAX bytes on output, either by the
+		     directive itself (when INT_MAX < HOST_WIDE_INT_MAX)
+		     or by the format function itself.  */
+		  width = HOST_WIDE_INT_MAX;
+		}
+	      else
+		width = -width;
+	    }
+	}
       else
 	width = HOST_WIDE_INT_MIN;
     }
@@ -1261,9 +1277,9 @@ format_floating (const conversion_spec &spec, int width, int prec)
 	res.range.min = 2 + (prec < 0 ? 6 : prec);
 
 	/* Compute the maximum just once.  */
-	static const int f_max[] = {
-	  format_floating_max (double_type_node, 'f'),
-	  format_floating_max (long_double_type_node, 'f')
+	const int f_max[] = {
+	  format_floating_max (double_type_node, 'f', prec),
+	  format_floating_max (long_double_type_node, 'f', prec)
 	};
 	res.range.max = width == INT_MIN ? HOST_WIDE_INT_MAX : f_max [ldbl];
 
@@ -1279,9 +1295,9 @@ format_floating (const conversion_spec &spec, int width, int prec)
 	res.range.min = 2 + (prec < 0 ? 6 : prec);
 
 	/* Compute the maximum just once.  */
-	static const int g_max[] = {
-	  format_floating_max (double_type_node, 'g'),
-	  format_floating_max (long_double_type_node, 'g')
+	const int g_max[] = {
+	  format_floating_max (double_type_node, 'g', prec),
+	  format_floating_max (long_double_type_node, 'g', prec)
 	};
 	res.range.max = width == INT_MIN ? HOST_WIDE_INT_MAX : g_max [ldbl];
 
@@ -2743,19 +2759,27 @@ pass_sprintf_length::handle_gimple_call (gimple_stmt_iterator *gsi)
 	{
 	  dstsize = tree_to_uhwi (size);
 	  /* No object can be larger than SIZE_MAX bytes (half the address
-	     space) on the target.  This imposes a limit that's one byte
-	     less than that.
+	     space) on the target.
 	     The functions are defined only for output of at most INT_MAX
 	     bytes.  Specifying a bound in excess of that limit effectively
 	     defeats the bounds checking (and on some implementations such
 	     as Solaris cause the function to fail with EINVAL).  */
-	  if (dstsize >= target_size_max () / 2)
-	    warning_at (gimple_location (info.callstmt), OPT_Wformat_length_,
-			"specified destination size %wu is too large",
-			dstsize);
+	  if (dstsize > target_size_max () / 2)
+	    {
+	      /* Avoid warning if -Wstringop-overflow is specified since
+		 it also warns for the same thing though only for the
+		 checking built-ins.  */
+	      if ((idx_objsize == HOST_WIDE_INT_M1U
+		   || !warn_stringop_overflow))
+		warning_at (gimple_location (info.callstmt),
+			    OPT_Wformat_length_,
+			    "specified bound %wu exceeds maximum object size "
+			    "%wu",
+			    dstsize, target_size_max () / 2);
+	    }
 	  else if (dstsize > target_int_max ())
 	    warning_at (gimple_location (info.callstmt), OPT_Wformat_length_,
-			"specified destination size %wu exceeds %<INT_MAX %>",
+			"specified bound %wu exceeds %<INT_MAX %>",
 			dstsize);
 	}
       else if (TREE_CODE (size) == SSA_NAME)
@@ -2800,10 +2824,15 @@ pass_sprintf_length::handle_gimple_call (gimple_stmt_iterator *gsi)
       info.objsize = dstsize < objsize ? dstsize : objsize;
 
       if (info.bounded
-	  && dstsize < target_size_max () / 2 && objsize < dstsize)
+	  && dstsize < target_size_max () / 2 && objsize < dstsize
+	  /* Avoid warning if -Wstringop-overflow is specified since
+	     it also warns for the same thing though only for the
+	     checking built-ins.  */
+	  && (idx_objsize == HOST_WIDE_INT_M1U
+	      || !warn_stringop_overflow))
 	{
 	  warning_at (gimple_location (info.callstmt), OPT_Wformat_length_,
-		      "specified size %wu exceeds the size %wu "
+		      "specified bound %wu exceeds the size %wu "
 		      "of the destination object", dstsize, objsize);
 	}
     }
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 33f1f86..27225c2 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,32 @@
+2016-12-07  Martin Sebor  <msebor@redhat.com>
+
+	PR middle-end/77784
+	PR middle-end/78149
+	PR middle-end/78138
+
+	* c-c++-common/Wsizeof-pointer-memaccess2.c: Adjust expected diagnostic.
+	* g++.dg/ext/builtin-object-size3.C (bar): Same.
+	* g++.dg/ext/strncpy-chk1.C: Same.
+	* g++.dg/opt/memcpy1.C: Same.
+	* g++.dg/torture/Wsizeof-pointer-memaccess1.C: Same.
+	* gcc.c-torture/compile/pr55569.c: Disable -Wstringop-overflow.
+	* gcc.dg/Wobjsize-1.c: Adjust expected diagnostic.
+	* gcc.dg/attr-alloc_size.c: Same.
+	* gcc.dg/builtin-stringop-chk-1.c: Adjust expected diagnostic.
+	* gcc.dg/builtin-stringop-chk-2.c: Same.
+	* gcc.dg/builtin-stringop-chk-4.c: New test.
+	* gcc.dg/builtin-strncat-chk-1.c: Adjust expected diagnostic.
+	* gcc.dg/memcpy-2.c: Same.
+	* gcc.dg/pr40340-1.c: Same.
+	* gcc.dg/pr40340-2.c (main): Same.
+	* gcc.dg/pr40340-5.c (main): Same.
+	* gcc.dg/torture/Wsizeof-pointer-memaccess1.c: Same.
+	* gcc.dg/torture/pr71132.c: Disable -Wstringop-overflow.
+	* gcc.dg/tree-ssa/builtin-sprintf-warn-1.c: Adjust text of expected
+	warning.
+	* gfortran.dg/char_length_3.f90: Prune expected warnings.
+	* gfortran.dg/pr38868.f: Add expected warnings.
+
 2016-12-07  Michael Meissner  <meissner@linux.vnet.ibm.com>
 
 	PR target/72717
diff --git a/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c b/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c
index d9ec7e2..9a02373 100644
--- a/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c
+++ b/gcc/testsuite/c-c++-common/Wsizeof-pointer-memaccess2.c
@@ -481,4 +481,4 @@ f4 (char *x, char **y, int z, char w[64])
   stpncpy (x, s3, sizeof (s3));
 }
 
-/* { dg-prune-output "\[\n\r\]*will always overflow\[\n\r\]*" } */
+/* { dg-prune-output "\[\n\r\]*writing\[\n\r\]*" } */
diff --git a/gcc/testsuite/g++.dg/ext/builtin-object-size3.C b/gcc/testsuite/g++.dg/ext/builtin-object-size3.C
index 09263e5..0207f9a 100644
--- a/gcc/testsuite/g++.dg/ext/builtin-object-size3.C
+++ b/gcc/testsuite/g++.dg/ext/builtin-object-size3.C
@@ -20,7 +20,7 @@ bar ()
 {
   int *p = new int;
   int *q = new int[4];
-  MEMCPY (p, "abcdefghijklmnopqrstuvwxyz", sizeof (int) + 1);		// { dg-warning "will always overflow destination buffer" }
-  MEMCPY (q, "abcdefghijklmnopqrstuvwxyz", 4 * sizeof (int) + 1);	// { dg-warning "will always overflow destination buffer" }
+  MEMCPY (p, "abcdefghijklmnopqrstuvwxyz", sizeof (int) + 1);		// { dg-warning "writing" }
+  MEMCPY (q, "abcdefghijklmnopqrstuvwxyz", 4 * sizeof (int) + 1);	// { dg-warning "writing" }
   baz (p, q);
 }
diff --git a/gcc/testsuite/g++.dg/ext/strncpy-chk1.C b/gcc/testsuite/g++.dg/ext/strncpy-chk1.C
index ebafc99..d67d6bf 100644
--- a/gcc/testsuite/g++.dg/ext/strncpy-chk1.C
+++ b/gcc/testsuite/g++.dg/ext/strncpy-chk1.C
@@ -9,7 +9,7 @@ struct B { char z[50]; };
 inline void
 foo (char *dest, const char *__restrict src, __SIZE_TYPE__ n)
 {
-  __builtin___strncpy_chk (dest, src, n, __builtin_object_size (dest, 0));	// { dg-warning "will always overflow" }
+  __builtin___strncpy_chk (dest, src, n, __builtin_object_size (dest, 0));	// { dg-warning "specified bound 36 exceeds the size 35 of the destination" }
 }
 
 void bar (const char *, int);
diff --git a/gcc/testsuite/g++.dg/opt/memcpy1.C b/gcc/testsuite/g++.dg/opt/memcpy1.C
index f291345..e2b1dd2 100644
--- a/gcc/testsuite/g++.dg/opt/memcpy1.C
+++ b/gcc/testsuite/g++.dg/opt/memcpy1.C
@@ -59,7 +59,10 @@ namespace CS
     }
     uint8 Clip ()
     {
-      __builtin_memcpy (this->OutP, InP, OutV * sizeof (csVector2));
+      // OutV is initialized to SIZE_MAX in the ctor above causing
+      // the multiplication below to produce a very large number
+      // in excess of the maximum possible object size (SIZE_MAX/2).
+      __builtin_memcpy (this->OutP, InP, OutV * sizeof (csVector2));   // { dg-warning "specified size \[0-9\]+ exceeds maximum object size" }
     }
   };
 }
diff --git a/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess1.C b/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess1.C
index 8b5c33e..2e6189b 100644
--- a/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess1.C
+++ b/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess1.C
@@ -713,4 +713,4 @@ f4 (char *x, char **y, int z, char w[64])
   return z;
 }
 
-// { dg-prune-output "\[\n\r\]*will always overflow\[\n\r\]*" }
+// { dg-prune-output "\[\n\r\]*overflows\[\n\r\]*" }
diff --git a/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess2.C b/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess2.C
index 0e99568..a216f47 100644
--- a/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess2.C
+++ b/gcc/testsuite/g++.dg/torture/Wsizeof-pointer-memaccess2.C
@@ -1,7 +1,8 @@
 // Test -Wsizeof-pointer-memaccess warnings.
 // { dg-do compile }
-// { dg-options "-Wall -Wno-sizeof-array-argument" }
-// Test just twice, once with -O0 non-fortified, once with -O2 fortified.
+// { dg-options "-Wall -Wno-sizeof-array-argument -Wno-stringop-overflow" }
+// Test just twice, once with -O0 non-fortified, once with -O2 fortified,
+// suppressing buffer overflow warnings.
 // { dg-skip-if "" { *-*-* }  { "*" } { "-O0" "-O2" } }
 // { dg-skip-if "" { *-*-* }  { "-flto" } { "" } }
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr55569.c b/gcc/testsuite/gcc.c-torture/compile/pr55569.c
index cffbcfc..cf274cd 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr55569.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr55569.c
@@ -1,4 +1,4 @@
-/* { dg-options "-ftree-vectorize" } */
+/* { dg-options "-Wno-stringop-overflow -ftree-vectorize" } */
 int *bar (void);
 
 void
@@ -6,6 +6,10 @@ foo (void)
 {
   long x;
   int *y = bar ();
-    for (x = -1 / sizeof (int); x; --x, ++y)
-       *y = 0;
+
+  /* The loop below may be optimized to a call to memset with a size
+     that's in excess of the maximum object size.  This is diagnosed
+     by the -Wstringop-overflow option.  */
+  for (x = -1 / sizeof (int); x; --x, ++y)
+    *y = 0;
 }
diff --git a/gcc/testsuite/gcc.dg/Wobjsize-1.c b/gcc/testsuite/gcc.dg/Wobjsize-1.c
index 291cfb9..211e068 100644
--- a/gcc/testsuite/gcc.dg/Wobjsize-1.c
+++ b/gcc/testsuite/gcc.dg/Wobjsize-1.c
@@ -10,6 +10,6 @@ int main(int argc, char **argv)
   return 0;
 }
 
-/* { dg-warning "will always overflow destination buffer" "" { target *-*-* } 6 } */
+/* { dg-warning "writing" "" { target *-*-* } 6 } */
 /* { dg-message "file included" "included" { target *-*-* } 0 } */
 /* { dg-message "inlined from" "inlined" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.dg/attr-alloc_size.c b/gcc/testsuite/gcc.dg/attr-alloc_size.c
index e8129ce..f50ba7c 100644
--- a/gcc/testsuite/gcc.dg/attr-alloc_size.c
+++ b/gcc/testsuite/gcc.dg/attr-alloc_size.c
@@ -22,15 +22,15 @@ test (void)
   strcpy (p, "Hello");
   p = malloc1 (6);
   strcpy (p, "Hello");
-  strcpy (p, "Hello World"); /* { dg-warning "will always overflow" "strcpy" } */
+  strcpy (p, "Hello World"); /* { dg-warning "writing" "strcpy" } */
   p = malloc2 (__INT_MAX__ >= 1700000 ? 424242 : __INT_MAX__ / 4, 6);
   strcpy (p, "World");
-  strcpy (p, "Hello World"); /* { dg-warning "will always overflow" "strcpy" } */
+  strcpy (p, "Hello World"); /* { dg-warning "writing" "strcpy" } */
   p = calloc1 (2, 5);
   strcpy (p, "World");
-  strcpy (p, "Hello World"); /* { dg-warning "will always overflow" "strcpy" } */
+  strcpy (p, "Hello World"); /* { dg-warning "writing" "strcpy" } */
   p = calloc2 (2, __INT_MAX__ >= 1700000 ? 424242 : __INT_MAX__ / 4, 5);
   strcpy (p, "World");
-  strcpy (p, "Hello World"); /* { dg-warning "will always overflow" "strcpy" } */
+  strcpy (p, "Hello World"); /* { dg-warning "writing" "strcpy" } */
 }
 
diff --git a/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c b/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c
index e491ff5..7689287 100644
--- a/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c
+++ b/gcc/testsuite/gcc.dg/builtin-stringop-chk-1.c
@@ -8,7 +8,10 @@
 extern void abort (void);
 
 #include "../gcc.c-torture/execute/builtins/chk.h"
-#include <stdarg.h>
+
+#define va_list    __builtin_va_list
+#define va_start   __builtin_va_start
+#define va_end     __builtin_va_end
 
 volatile void *vx;
 char buf1[20];
@@ -22,60 +25,61 @@ test (int arg, ...)
   char *p = &buf1[10], *q;
 
   memcpy (&buf2[19], "ab", 1);
-  memcpy (&buf2[19], "ab", 2); /* { dg-warning "will always overflow" "memcpy" } */
+  memcpy (&buf2[19], "ab", 2); /* { dg-warning "writing 2 bytes into a region of size 1" "memcpy" } */
   vx = mempcpy (&buf2[19], "ab", 1);
-  vx = mempcpy (&buf2[19], "ab", 2); /* { dg-warning "will always overflow" "mempcpy" } */
+  vx = mempcpy (&buf2[19], "ab", 2); /* { dg-warning "writing 2 " "mempcpy" } */
   memmove (&buf2[18], &buf1[10], 2);
-  memmove (&buf2[18], &buf1[10], 3); /* { dg-warning "will always overflow" "memmove" } */
+  memmove (&buf2[18], &buf1[10], 3); /* { dg-warning "writing 3 " "memmove" } */
   memset (&buf2[16], 'a', 4);
-  memset (&buf2[15], 'b', 6); /* { dg-warning "will always overflow" "memset" } */
+  memset (&buf2[15], 'b', 6); /* { dg-warning "writing 6 " "memset" } */
   strcpy (&buf2[18], "a");
-  strcpy (&buf2[18], "ab"); /* { dg-warning "will always overflow" "strcpy" } */
+  strcpy (&buf2[18], "ab"); /* { dg-warning "writing 3 " "strcpy" } */
   vx = stpcpy (&buf2[18], "a");
-  vx = stpcpy (&buf2[18], "ab"); /* { dg-warning "will always overflow" "stpcpy" } */
+  vx = stpcpy (&buf2[18], "ab"); /* { dg-warning "writing 3" "stpcpy" } */
   strncpy (&buf2[18], "a", 2);
-  strncpy (&buf2[18], "a", 3); /* { dg-warning "will always overflow" "strncpy" } */
+  strncpy (&buf2[18], "a", 3); /* { dg-warning "specified bound 3 exceeds the size 2 of the destination" "strncpy" } */
   strncpy (&buf2[18], "abc", 2);
-  strncpy (&buf2[18], "abc", 3); /* { dg-warning "will always overflow" "strncpy" } */
+  strncpy (&buf2[18], "abc", 3); /* { dg-warning "specified bound 3 exceeds the size 2 of the destination" "strncpy" } */
   memset (buf2, '\0', sizeof (buf2));
   strcat (&buf2[18], "a");
   memset (buf2, '\0', sizeof (buf2));
-  strcat (&buf2[18], "ab"); /* { dg-warning "will always overflow" "strcat" } */
+  strcat (&buf2[18], "ab"); /* { dg-warning "writing 3 " "strcat" } */
   sprintf (&buf2[18], "%s", buf1);
   sprintf (&buf2[18], "%s", "a");
-  sprintf (&buf2[18], "%s", "ab"); /* { dg-warning "will always overflow" "sprintf" } */
+  sprintf (&buf2[18], "%s", "ab"); /* { dg-warning "writing 3 " "sprintf" } */
   sprintf (&buf2[18], "a");
-  sprintf (&buf2[18], "ab"); /* { dg-warning "will always overflow" "sprintf" } */
+  sprintf (&buf2[18], "ab"); /* { dg-warning "writing 3 " "sprintf" } */
   snprintf (&buf2[18], 2, "%d", x);
   /* N argument to snprintf is the size of the buffer.
      Although this particular call wouldn't overflow buf2,
      incorrect buffer size was passed to it and therefore
      we want a warning and runtime failure.  */
-  snprintf (&buf2[18], 3, "%d", x); /* { dg-warning "will always overflow" "snprintf" } */
+  snprintf (&buf2[18], 3, "%d", x); /* { dg-warning "specified bound 3 exceeds the size 2 of the destination" "snprintf" } */
   va_start (ap, arg);
   vsprintf (&buf2[18], "a", ap);
   va_end (ap);
+
   va_start (ap, arg);
-  vsprintf (&buf2[18], "ab", ap); /* { dg-warning "will always overflow" "vsprintf" } */
+  vsprintf (&buf2[18], "ab", ap); /* { dg-warning "writing 3" "vsprintf" } */
   va_end (ap);
   va_start (ap, arg);
   vsnprintf (&buf2[18], 2, "%s", ap);
   va_end (ap);
   va_start (ap, arg);
   /* See snprintf above.  */
-  vsnprintf (&buf2[18], 3, "%s", ap); /* { dg-warning "will always overflow" "vsnprintf" } */
+  vsnprintf (&buf2[18], 3, "%s", ap); /* { dg-warning "specified bound 3 exceeds the size 2 of the destination" "vsnprintf" } */
   va_end (ap);
 
   p = p + 10;
   memset (p, 'd', 0);
-  q = strcpy (p, ""); /* { dg-warning "will always overflow" "strcpy" } */
+  q = strcpy (p, ""); /* { dg-warning "writing 1 " "strcpy" } */
 
   /* This invokes undefined behavior, since we are past the end of buf1.  */
   p = p + 10;
-  memset (p, 'd', 1); /* { dg-warning "will always overflow" "memset" } */
+  memset (p, 'd', 1); /* { dg-warning "writing 1 " "memset" } */
 
   memset (q, 'd', 0);
-  memset (q, 'd', 1); /* { dg-warning "will always overflow" "memset" } */
+  memset (q, 'd', 1); /* { dg-warning "writing 1 " "memset" } */
   q = q - 10;
   memset (q, 'd', 10);
 }
@@ -90,26 +94,26 @@ void
 test2 (const H h)
 {
   char c;
-  strncpy (&c, str, 3); /* { dg-warning "will always overflow" "strncpy" } */
+  strncpy (&c, str, 3); /* { dg-warning "specified bound 3 exceeds the size 1 of the destination" "strncpy" } */
 
   struct { char b[4]; } x;
-  sprintf (x.b, "%s", "ABCD"); /* { dg-warning "will always overflow" "sprintf" } */
+  sprintf (x.b, "%s", "ABCD"); /* { dg-warning "writing 5" "sprintf" } */
 
   unsigned int i;
-  memcpy (&i, &h, sizeof (h)); /* { dg-warning "will always overflow" "memcpy" } */
+  memcpy (&i, &h, sizeof (h)); /* { dg-warning "writing 16 " "memcpy" } */
 
   unsigned char buf[21];
-  memset (buf + 16, 0, 8); /* { dg-warning "will always overflow" "memset" } */
+  memset (buf + 16, 0, 8); /* { dg-warning "writing 8 " "memset" } */
 
   typedef struct { int i, j, k, l; } S;
   S *s[3];
-  memset (s, 0, sizeof (S) * 3); /* { dg-warning "will always overflow" "memset" } */
+  memset (s, 0, sizeof (S) * 3); /* { dg-warning "writing 48 " "memset" } */
 
   struct T { char a[8]; char b[4]; char c[10]; } t;
-  stpcpy (t.c,"Testing..."); /* { dg-warning "will always overflow" "stpcpy" } */
+  stpcpy (t.c,"Testing..."); /* { dg-warning "writing" "stpcpy" } */
 
   char b1[7];
   char b2[4];
   memset (b1, 0, sizeof (b1));
-  memset (b2, 0, sizeof (b1)); /* { dg-warning "will always overflow" "memset" } */
+  memset (b2, 0, sizeof (b1)); /* { dg-warning "writing 7" "memset" } */
 }
diff --git a/gcc/testsuite/gcc.dg/builtin-stringop-chk-2.c b/gcc/testsuite/gcc.dg/builtin-stringop-chk-2.c
index 7c2bb60..d537fb0 100644
--- a/gcc/testsuite/gcc.dg/builtin-stringop-chk-2.c
+++ b/gcc/testsuite/gcc.dg/builtin-stringop-chk-2.c
@@ -6,7 +6,7 @@
 /* { dg-options "-O2 -ftrack-macro-expansion=0" } */
 
 #include "../gcc.c-torture/execute/builtins/chk.h"
-   
+
 void *bar (int);
 extern void *malloc (__SIZE_TYPE__);
 
@@ -115,7 +115,7 @@ baz (const struct A *x, const unsigned char *z)
 	  else
 	    do
 	      {
-		memcpy (e, d, 513); /* { dg-warning "will always overflow" "memcpy" } */
+		memcpy (e, d, 513); /* { dg-warning "writing" "memcpy" } */
 		e += 4;
 	      }
 	    while (--h);
diff --git a/gcc/testsuite/gcc.dg/builtin-stringop-chk-4.c b/gcc/testsuite/gcc.dg/builtin-stringop-chk-4.c
new file mode 100644
index 0000000..4857bda
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-stringop-chk-4.c
@@ -0,0 +1,525 @@
+/* Test exercising buffer overflow warnings emitted for raw memory and
+   string manipulation builtins involving ranges of sizes and strings
+   of varying lengths.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftrack-macro-expansion=0" } */
+
+#define INT_MAX      __INT_MAX__
+#define PTRDIFF_MAX  __PTRDIFF_MAX__
+#define SIZE_MAX     __SIZE_MAX__
+
+typedef __PTRDIFF_TYPE__ ptrdiff_t;
+typedef __SIZE_TYPE__    size_t;
+
+static const size_t ssize_max = SIZE_MAX / 2;
+static const size_t size_max = SIZE_MAX;
+
+extern signed char    schar_val;
+extern signed short   sshrt_val;
+extern signed int     sint_val;
+extern signed long    slong_val;
+extern unsigned char  uchar_val;
+extern unsigned short ushrt_val;
+extern unsigned int   uint_val;
+extern unsigned long  ulong_val;
+
+#define memcpy(d, s, n) (memcpy ((d), (s), (n)), sink ((d)))
+extern void* (memcpy)(void*, const void*, size_t);
+
+#define mempcpy(d, s, n) (mempcpy ((d), (s), (n)), sink ((d)))
+extern void* (mempcpy)(void*, const void*, size_t);
+
+#define memset(d, c, n) (memset ((d), (c), (n)), sink ((d)))
+extern void* (memset)(void*, int, size_t);
+
+#define bzero(d, n) (bzero ((d), (n)), sink ((d)))
+extern void (bzero)(void*, size_t);
+
+#define strcat(d, s) (strcat ((d), (s)), sink ((d)))
+extern char* (strcat)(char*, const char*);
+
+#define strncat(d, s, n) (strncat ((d), (s), (n)), sink ((d)))
+extern char* (strncat)(char*, const char*, size_t);
+
+#define strcpy(d, s) (strcpy ((d), (s)), sink ((d)))
+extern char* (strcpy)(char*, const char*);
+
+#define strncpy(d, s, n) (strncpy ((d), (s), (n)), sink ((d)))
+extern char* (strncpy)(char*, const char*, size_t);
+
+void sink (void*);
+
+/* Function to "generate" a random number each time it's called.  Declared
+   (but not defined) and used to prevent GCC from making assumptions about
+   their values based on the variables uses in the tested expressions.  */
+size_t random_unsigned_value (void);
+ptrdiff_t random_signed_value (void);
+
+/* Return a random unsigned value between MIN and MAX.  */
+
+static inline size_t
+unsigned_range (size_t min, size_t max)
+{
+  const size_t val = random_unsigned_value ();
+  return val < min || max < val ? min : val;
+}
+
+/* Return a random signed value between MIN and MAX.  */
+
+static inline ptrdiff_t
+signed_range (ptrdiff_t min, ptrdiff_t max)
+{
+  const ptrdiff_t val = random_signed_value ();
+  return val < min || max < val ? min : val;
+}
+
+/* For brevity.  */
+#define UR(min, max)   unsigned_range (min, max)
+#define SR(min, max)   signed_range (min, max)
+
+/* UReturn a pointer to constant string whose length is at least MINLEN
+   and at most 10.  */
+static inline const char*
+string_range (size_t minlen)
+{
+  static const char str[] = "0123456789";
+
+  const size_t len = unsigned_range (minlen, sizeof str - 1);
+
+  switch (len)
+    {
+    case 10: return "0123456789";
+    case  9: return "012345678";
+    case  8: return "01234567";
+    case  7: return "0123456";
+    case  6: return "012345";
+    case  5: return "01234";
+    case  4: return "0123";
+    case  3: return "012";
+    case  2: return "01";
+    case  1: return "0";
+    case  0: return "";
+    }
+}
+
+#define S(minlen)   string_range (minlen)
+
+/* Test memcpy with a number of bytes bounded by a known range.  */
+
+void test_memcpy_range (void *d, const void *s)
+{
+  char buf[5];
+
+  memcpy (buf, s, UR (0, 5));
+  memcpy (buf, s, UR (1, 5));
+  memcpy (buf, s, UR (2, 5));
+  memcpy (buf, s, UR (3, 5));
+  memcpy (buf, s, UR (4, 5));
+
+  memcpy (buf, s, UR (6, 7));  /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 overflows the destination" } */
+
+  memcpy (buf + 5, s, UR (1, 2));  /* { dg-warning "writing between 1 and 2 bytes into a region of size 0 overflows the destination" } */
+
+  memcpy (buf + size_max, s, UR (1, 2));  /* { dg-warning "writing between 1 and 2 bytes into a region of size 0 overflows the destination" "excessive pointer offset" { xfail *-*-* } } */
+
+  memcpy (buf, s, UR (ssize_max, size_max));   /* { dg-warning "writing between \[0-9\]+ and \[0-9\]+ bytes into a region of size 5 overflows the destination" } */
+  memcpy (buf, s, UR (ssize_max + 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+  memcpy (buf, s, UR (size_max - 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+
+  /* Exercise memcpy into a destination of unknown size with excessive
+     number of bytes.  */
+  memcpy (d, s, UR (ssize_max, size_max));
+  memcpy (d, s, UR (ssize_max + 1, size_max));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+
+  memcpy (buf, s, SR (-1, 1));
+  memcpy (buf, s, SR (-3, 2));
+  memcpy (buf, s, SR (-5, 3));
+  memcpy (buf, s, SR (-7, 4));
+  memcpy (buf, s, SR (-9, 5));
+  memcpy (buf, s, SR (-11, 6));
+
+  memcpy (d, s, SR (-1, 1));
+  memcpy (d, s, SR (-3, 2));
+  memcpy (d, s, SR (-5, 3));
+  memcpy (d, s, SR (-7, 4));
+  memcpy (d, s, SR (-9, 5));
+  memcpy (d, s, SR (-11, 6));
+
+  memcpy (buf, s, SR (-2, -1));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+  memcpy (d, s, SR (-2, -1));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+
+  /* Even though the following calls are bounded by the range of N's
+     type they must not cause a warning for obvious reasons.  */
+  memcpy (buf, s, schar_val);
+  memcpy (buf, s, sshrt_val);
+  memcpy (buf, s, sint_val);
+  memcpy (buf, s, slong_val);
+
+  memcpy (buf, s, uchar_val);
+  memcpy (buf, s, ushrt_val);
+  memcpy (buf, s, uint_val);
+  memcpy (buf, s, ulong_val);
+
+  memcpy (buf, s, schar_val + 1);
+  memcpy (buf, s, sshrt_val + 2);
+  memcpy (buf, s, sint_val + 3);
+  memcpy (buf, s, slong_val + 4);
+
+  memcpy (d, s, uchar_val + 5);
+  memcpy (d, s, ushrt_val + 6);
+  memcpy (d, s, uint_val + 7);
+  memcpy (d, s, ulong_val + 8);
+
+  memcpy (d, s, schar_val);
+  memcpy (d, s, sshrt_val);
+  memcpy (d, s, sint_val);
+  memcpy (d, s, slong_val);
+
+  memcpy (d, s, uchar_val);
+  memcpy (d, s, ushrt_val);
+  memcpy (d, s, uint_val);
+  memcpy (d, s, ulong_val);
+
+  memcpy (d, s, schar_val + 1);
+  memcpy (d, s, sshrt_val + 2);
+  memcpy (d, s, sint_val + 3);
+  memcpy (d, s, slong_val + 4);
+
+  memcpy (d, s, uchar_val + 5);
+  memcpy (d, s, ushrt_val + 6);
+  memcpy (d, s, uint_val + 7);
+  memcpy (d, s, ulong_val + 8);
+}
+
+/* Test mempcpy with a number of bytes bounded by a known range.  */
+
+void test_mempcpy_range (void *d, const void *s)
+{
+  char buf[5];
+
+  mempcpy (buf, s, UR (0, 5));
+  mempcpy (buf, s, UR (1, 5));
+  mempcpy (buf, s, UR (2, 5));
+  mempcpy (buf, s, UR (3, 5));
+  mempcpy (buf, s, UR (4, 5));
+
+  mempcpy (buf, s, UR (6, 7));  /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 overflows the destination" } */
+
+  mempcpy (buf, s, UR (6, 7));  /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 overflows the destination" } */
+
+  mempcpy (buf, s, UR (ssize_max, size_max));   /* { dg-warning "writing between \[0-9\]+ and \[0-9\]+ bytes into a region of size 5 overflows the destination" } */
+  mempcpy (buf, s, UR (ssize_max + 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+  mempcpy (buf, s, UR (size_max - 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+
+  /* Exercise mempcpy into a destination of unknown size with excessive
+     number of bytes.  */
+  mempcpy (d, s, UR (ssize_max, size_max));
+  mempcpy (d, s, UR (ssize_max + 1, size_max));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+}
+
+/* Test memset with a number of bytes bounded by a known range.  */
+
+void test_memset_range (void *d)
+{
+  char buf[5];
+
+  memset (buf, 0, UR (0, 5));
+  memset (buf, 0, UR (1, 5));
+  memset (buf, 0, UR (2, 5));
+  memset (buf, 0, UR (3, 5));
+  memset (buf, 0, UR (4, 5));
+
+  memset (buf, 0, UR (6, 7));  /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 overflows the destination" } */
+
+  memset (buf, 0, UR (6, 7));  /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 overflows the destination" } */
+
+  memset (buf, 0, UR (ssize_max, size_max));   /* { dg-warning "writing between \[0-9\]+ and \[0-9\]+ bytes into a region of size 5 overflows the destination" } */
+  memset (buf, 0, UR (ssize_max + 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+  memset (buf, 0, UR (size_max - 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+
+  /* Exercise memset into a destination of unknown size with excessive
+     number of bytes.  */
+  memset (d, 0, UR (ssize_max, size_max));
+  memset (d, 0, UR (ssize_max + 1, size_max));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+}
+
+/* Test bzero with a number of bytes bounded by a known range.  */
+
+void test_bzero_range (void *d)
+{
+  char buf[5];
+
+  bzero (buf, UR (0, 5));
+  bzero (buf, UR (1, 5));
+  bzero (buf, UR (2, 5));
+  bzero (buf, UR (3, 5));
+  bzero (buf, UR (4, 5));
+
+  bzero (buf, UR (6, 7));  /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 overflows the destination" } */
+
+  bzero (buf, UR (6, 7));  /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 overflows the destination" } */
+
+  bzero (buf, UR (ssize_max, size_max));   /* { dg-warning "writing between \[0-9\]+ and \[0-9\]+ bytes into a region of size 5 overflows the destination" } */
+  bzero (buf, UR (ssize_max + 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+  bzero (buf, UR (size_max - 1, size_max));  /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+
+  /* Exercise bzero into a destination of unknown size with excessive
+     number of bytes.  */
+  bzero (d, UR (ssize_max, size_max));
+  bzero (d, UR (ssize_max + 1, size_max));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+}
+
+/* Test strcat with an argument referencing a non-constant string of
+   lengths in a known range.  */
+
+void test_strcat_range (void)
+{
+  char buf[5] = "";
+
+  strcat (buf, S (0));
+  strcat (buf, S (1));
+  strcat (buf, S (2));
+  strcat (buf, S (3));
+  strcat (buf, S (4));
+  strcat (buf, S (5));   /* { dg-warning "writing 6 bytes into a region of size 5 " } */
+
+  {
+    /* The implementation of the warning isn't smart enough to determine
+       the length of the string in the buffer so it assumes it's empty
+       and issues the warning basically for the same cases as strcat.  */
+    char buf2[5] = "12";
+    strcat (buf2, S (4));   /* { dg-warning "writing 5 bytes into a region of size 3" "strcat to a non-empty string" { xfail *-*-* } } */
+  }
+}
+
+/* Verify that strcpy with an unknown source string doesn't cause
+   warnings unless the destination has zero size.  */
+
+void test_strcpy (const char *src)
+{
+  struct A { char a[2]; char b[3]; } a;
+
+  strcpy (a.a, src);
+  strcpy (a.a + 1, src);
+
+  /* There must be enough room in the destination for the terminating
+     nul, otherwise verify that a warning is issued.
+     The following works as expected with __builtin___strcpy_chk and
+     __builtin_object_size because they see that the offset is from
+     the a.a array.  When optimization is enabled, it isn't detected
+     by __bultin_strcpy (when __builtin_object_size isn't called
+     explicitly) because by the time it's seen the offset has been
+     transformed to one from the beginning of the whole object, i.e.,
+     as if it had been written as (char*)&a + 2 .  Then the destination
+     size is taken to be the rest of the whole object.  It is detected
+     by __builtin_strcpy when optimization is not enabled because then
+     the &a.a + 2 expression is preserved.  But without optimization
+     an ordinary call to strcpy isn't transformed to __builtin_strcpy
+     and so it can't be detected here (since the rest of the test
+     relies on optimization).  */
+  strcpy (a.a + 2, src);    /* { dg-warning "writing at least 1 byte into a region of size 0 " "strcpy into empty substring" { xfail *-*-* } } */
+
+  /* This does work.  */
+  strcpy (a.a + 5, src);    /* { dg-warning "writing at least 1 byte into a region of size 0 " } */
+
+  /* As does this.  */
+  strcpy (a.a + 17, src);    /* { dg-warning "writing at least 1 byte into a region of size 0 " } */
+}
+
+/* Test strcpy with a non-constant source string of length in a known
+   range.  */
+
+void test_strcpy_range (void)
+{
+  char buf[5];
+
+  strcpy (buf, S (0));
+  strcpy (buf, S (1));
+  strcpy (buf, S (2));
+  strcpy (buf, S (4));
+  strcpy (buf, S (5));   /* { dg-warning "writing 6 bytes into a region of size 5 " } */
+  strcpy (buf, S (6));   /* { dg-warning "writing 7 bytes into a region of size 5 " } */
+  strcpy (buf, S (7));   /* { dg-warning "writing 8 bytes into a region of size 5 " } */
+  strcpy (buf, S (8));   /* { dg-warning "writing 9 bytes into a region of size 5 " } */
+  strcpy (buf, S (9));   /* { dg-warning "writing 10 bytes into a region of size 5 " } */
+  strcpy (buf, S (10));   /* { dg-warning "writing 11 bytes into a region of size 5 " } */
+
+  strcpy (buf + 5, S (0));   /* { dg-warning "writing 1 byte into a region of size 0 " } */
+
+  strcpy (buf + 17, S (0));   /* { dg-warning "writing 1 byte into a region of size 0 " } */
+}
+
+/* Test strncat with an argument referencing a non-constant string of
+   lengths in a known range.  */
+
+void test_strncat_range (void)
+{
+  char buf[5] = "";
+
+  strncat (buf, S (0), 0);
+  strncat (buf, S (0), 1);
+  strncat (buf, S (0), 2);
+  strncat (buf, S (0), 3);
+  strncat (buf, S (0), 4);
+
+  strncat (buf + 5, S (0), 0);
+
+  strncat (buf + 5, S (0), 1);   /* { dg-warning "specified bound 1 exceeds the size 0 of the destination " } */
+  strncat (buf + 5, S (1), 1);   /* { dg-warning "specified bound 1 exceeds the size 0 of the destination " } */
+
+  /* Strncat always appends a terminating null after copying the N
+     characters so the following triggers a warning pointing out
+     that specifying sizeof(buf) as the upper bound may cause
+     the nul to overflow the destination.  */
+  strncat (buf, S (0), 5);   /* { dg-warning "specified bound 5 equals the size of the destination" } */
+  strncat (buf, S (0), 6);   /* { dg-warning "specified bound 6 exceeds the size 5 of the destination" } */
+
+  strncat (buf, S (1), 0);
+  strncat (buf, S (1), 1);
+  strncat (buf, S (1), 2);
+  strncat (buf, S (1), 3);
+  strncat (buf, S (1), 4);
+  strncat (buf, S (1), 5);   /* { dg-warning "specified bound 5 equals the size of the destination" } */
+  strncat (buf, S (1), 6);   /* { dg-warning "specified bound 6 exceeds the size 5 of the destination" } */
+  strncat (buf, S (2), 6);   /* { dg-warning "specified bound 6 exceeds the size 5 of the destination" } */
+
+  /* The following could just as well say "writing 6 bytes into a region
+     of size 5.  Either would be correct and probably equally as clear
+     in this case.  But when the length of the source string is not known
+     at all then the bound warning seems clearer.  */
+  strncat (buf, S (5), 6);   /* { dg-warning "specified bound 6 exceeds the size 5 of the destination " } */
+  strncat (buf, S (7), 6);   /* { dg-warning "specified bound 6 exceeds the size 5 of the destination" } */
+
+  {
+    /* The implementation of the warning isn't smart enough to determine
+       the length of the string in the buffer so it assumes it's empty
+       and issues the warning basically for the same cases as strncpy.  */
+    char buf2[5] = "12";
+    strncat (buf2, S (4), 4);   /* { dg-warning "writing 5 bytes into a region of size 3" "strncat to a non-empty string" { xfail *-*-* } } */
+  }
+}
+
+/* Test strncat_chk with an argument referencing a non-constant string
+   of lengths in a known range.  */
+
+void test_strncat_chk_range (char *d)
+{
+  char buf[5] = "";
+
+#define strncat_chk(d, s, n) \
+  __builtin___strncat_chk ((d), (s), (n), __builtin_object_size (d, 1));
+
+  strncat_chk (buf, S (0), 1);
+  strncat_chk (buf, S (0), 2);
+  strncat_chk (buf, S (0), 3);
+  strncat_chk (buf, S (0), 4);
+  strncat_chk (buf, S (0), 5);   /* { dg-warning "specified bound 5 equals the size of the destination " } */
+
+  strncat_chk (buf, S (5), 1);
+  strncat_chk (buf, S (5), 2);
+  strncat_chk (buf, S (5), 3);
+  strncat_chk (buf, S (5), 4);
+  strncat_chk (buf, S (5), 5);   /* { dg-warning "specified bound 5 equals the size of the destination " } */
+
+  strncat_chk (buf, S (5), 10);   /* { dg-warning "specified bound \[0-9\]+ exceeds the size 5 of the destination " } */
+
+  strncat_chk (d, S (5), size_max);   /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size " } */
+}
+
+/* Test strncpy with a non-constant source string of length in a known
+   range and a constant number of bytes.  */
+
+void test_strncpy_string_range (char *d)
+{
+  char buf[5];
+
+  strncpy (buf, S (0), 0);
+  strncpy (buf, S (0), 1);
+  strncpy (buf, S (0), 2);
+  strncpy (buf, S (0), 3);
+  strncpy (buf, S (0), 4);
+  strncpy (buf, S (0), 5);
+  strncpy (buf, S (0), 6);   /* { dg-warning "writing 6 bytes into a region of size 5 " } */
+
+  strncpy (buf, S (6), 4);
+  strncpy (buf, S (7), 5);
+  strncpy (buf, S (8), 6);   /* { dg-warning "writing 6 bytes into a region of size 5 " } */
+
+  strncpy (buf, S (1), ssize_max - 1);   /* { dg-warning "writing \[0-9\]+ bytes into a region of size 5" } */
+  strncpy (buf, S (2), ssize_max);   /* { dg-warning "writing \[0-9\]+ bytes into a region of size 5" } */
+  strncpy (buf, S (3), ssize_max + 1);   /* { dg-warning "specified size \[0-9\]+ exceeds maximum object size" } */
+  strncpy (buf, S (4), size_max);   /* { dg-warning "specified size \[0-9\]+ exceeds maximum object size" } */
+
+  /* Exercise strncpy into a destination of unknown size with a valid
+     and invalid constant number of bytes.  */
+  strncpy (d, S (1), ssize_max - 1);
+  strncpy (d, S (2), ssize_max);
+  strncpy (d, S (3), ssize_max + 1);   /* { dg-warning "specified size \[0-9\]+ exceeds maximum object size" } */
+  strncpy (d, S (4), size_max);   /* { dg-warning "specified size \[0-9\]+ exceeds maximum object size" } */
+}
+
+/* Test strncpy with a non-constant source string of length in a known
+   range and a non-constant number of bytes also in a known range.  */
+
+void test_strncpy_string_count_range (char *dst, const char *src)
+{
+  char buf[5];
+
+  strncpy (buf, S (0), UR (0, 1));
+  strncpy (buf, S (0), UR (0, 2));
+  strncpy (buf, S (0), UR (0, 3));
+  strncpy (buf, S (0), UR (0, 4));
+  strncpy (buf, S (0), UR (0, 5));
+  strncpy (buf, S (0), UR (0, 6));
+  strncpy (buf, S (0), UR (1, 6));
+  strncpy (buf, S (0), UR (2, 6));
+  strncpy (buf, S (0), UR (3, 6));
+  strncpy (buf, S (0), UR (4, 6));
+  strncpy (buf, S (0), UR (5, 6));
+
+  strncpy (buf, S (9), UR (0, 1));
+  strncpy (buf, S (8), UR (0, 2));
+  strncpy (buf, S (7), UR (0, 3));
+  strncpy (buf, S (6), UR (0, 4));
+  strncpy (buf, S (8), UR (0, 5));
+  strncpy (buf, S (7), UR (0, 6));
+  strncpy (buf, S (6), UR (1, 6));
+  strncpy (buf, S (5), UR (2, 6));
+  strncpy (buf, S (9), UR (3, 6));
+  strncpy (buf, S (8), UR (4, 6));
+  strncpy (buf, S (7), UR (5, 6));
+
+  strncpy (buf, S (0), UR (6, 7));   /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 " } */
+  strncpy (buf, S (1), UR (7, 8));   /* { dg-warning "writing between 7 and 8 bytes into a region of size 5 " } */
+  strncpy (buf, S (2), UR (ssize_max, ssize_max + 1));   /* { dg-warning "writing between \[0-9\]+ and \[0-9\]+ bytes into a region of size 5 " } */
+
+  strncpy (buf, S (2), UR (ssize_max + 1, ssize_max + 2));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+
+  strncpy (buf + 5, S (0), UR (0, 1));
+  strncpy (buf + 5, S (1), UR (0, 1));
+  strncpy (buf + 5, S (0), UR (1, 2));   /* { dg-warning "writing between 1 and 2 bytes into a region of size 0 " } */
+  strncpy (buf + 5, S (1), UR (1, 2));   /* { dg-warning "writing between 1 and 2 bytes into a region of size 0 " } */
+
+  strncpy (buf, src, UR (0, 1));
+  strncpy (buf, src, UR (0, 2));
+  strncpy (buf, src, UR (0, 3));
+  strncpy (buf, src, UR (0, 4));
+  strncpy (buf, src, UR (0, 5));
+  strncpy (buf, src, UR (0, 6));
+  strncpy (buf, src, UR (1, 6));
+  strncpy (buf, src, UR (2, 6));
+  strncpy (buf, src, UR (3, 6));
+  strncpy (buf, src, UR (4, 6));
+  strncpy (buf, src, UR (5, 6));
+  strncpy (buf, src, UR (6, 7));   /* { dg-warning "writing between 6 and 7 bytes into a region of size 5 " } */
+
+  /* Exercise strncpy into a destination of unknown size  with a valid
+     and invalid constant number of bytes.  */
+  strncpy (dst, S (0), UR (5, 6));
+  strncpy (dst, S (1), UR (6, 7));
+  strncpy (dst, S (2), UR (7, 8));
+
+  strncpy (dst, S (3), UR (ssize_max, ssize_max + 1));
+
+  strncpy (dst, S (4), UR (ssize_max + 1, ssize_max + 2));   /* { dg-warning "specified size between \[0-9\]+ and \[0-9\]+ exceeds maximum object size" } */
+}
diff --git a/gcc/testsuite/gcc.dg/builtin-stringop-chk-5.c b/gcc/testsuite/gcc.dg/builtin-stringop-chk-5.c
new file mode 100644
index 0000000..489f880
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-stringop-chk-5.c
@@ -0,0 +1,260 @@
+/* Test exercising -Wrawmem-overflow and -Wstringop-overflow warnings.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wstringop-overflow=1" } */
+
+#define offsetof(type, mem)   __builtin_offsetof (type, mem)
+
+/* Return the number of bytes from member MEM of TYPE to the end
+   of object OBJ.  */
+#define offsetfrom(type, obj, mem) (sizeof (obj) - offsetof (type, mem))
+
+
+typedef __SIZE_TYPE__ size_t;
+extern void* memcpy (void*, const void*, size_t);
+extern void* memset (void*, int, __SIZE_TYPE__);
+
+
+struct A { char a, b; };
+struct B { struct A a; char c, d; };
+
+/* Function to call to "escape" pointers from tests below to prevent
+   GCC from assuming the values of the objects they point to stay
+   the unchanged.  */
+void escape (void*, ...);
+
+/* Function to "generate" a random number each time it's called.  Declared
+   (but not defined) and used to prevent GCC from making assumptions about
+   their values based on the variables uses in the tested expressions.  */
+size_t random_unsigned_value (void);
+
+/* Return a random unsigned value between MIN and MAX.  */
+
+static inline size_t
+range (size_t min, size_t max)
+{
+  const size_t val = random_unsigned_value ();
+  return val < min || max < val ? min : val;
+}
+
+/* Verify that writing past the end of a local array is diagnosed.  */
+
+void test_memop_warn_local (const void *src)
+{
+  size_t n;
+
+  n = range (8, 32);
+
+  struct A a[2];
+
+  memcpy (a, src, n);   /* { dg-warning "writing between 8 and 32 bytes into a region of size 4 overflows the destination" } */
+  escape (a, src);
+
+  /* At -Wrawmem-overflow=1 the destination is considered to be
+     the whole array and its size is therefore sizeof a.  */
+  memcpy (&a[0], src, n);   /* { dg-warning "writing between 8 and 32 bytes into a region of size 4 overflows the destination" } */
+  escape (a, src);
+
+  /* Verify the same as above but by writing into the first mmeber
+     of the first element of the array.  */
+  memcpy (&a[0].a, src, n);   /* { dg-warning "writing between 8 and 32 bytes into a region of size 4 overflows the destination" } */
+  escape (a, src);
+
+  n = range (12, 32);
+
+  struct B b[2];
+
+  memcpy (&b[0], src, n);   /* { dg-warning "writing between 12 and 32 bytes into a region of size 8 overflows the destination" } */
+  escape (b);
+
+  /* The following idiom of clearing multiple members of a struct is
+     used in a few places in the Linux kernel.  Verify that a warning
+     is issued for it when it writes past the end of the array object.  */
+  memset (&b[0].a.b, 0, offsetfrom (struct B, b, a.b) + 1);   /* { dg-warning "writing 8 bytes into a region of size 7" } */
+  escape (b);
+
+  memset (&b->a.b, 0, offsetfrom (struct B, b, a.b) + 1);   /* { dg-warning "writing 8 bytes into a region of size 7" } */
+  escape (b);
+
+  memset (&b[0].c, 0, offsetfrom (struct B, b, c) + 1);   /* { dg-warning "writing 7 bytes into a region of size 6" } */
+  escape (b);
+
+  memset (&b->c, 0, offsetfrom (struct B, b, c) + 1);   /* { dg-warning "writing 7 bytes into a region of size 6" } */
+  escape (b);
+
+  memset (&b[0].d, 0, offsetfrom (struct B, b, d) + 1);   /* { dg-warning "writing 6 bytes into a region of size 5" } */
+  escape (b);
+
+  memset (&b->d, 0, offsetfrom (struct B, b, d) + 1);   /* { dg-warning "writing 6 bytes into a region of size 5" } */
+  escape (b);
+
+  /* Same as above but clearing just elements of the second element
+     of the array.  */
+  memset (&b[1].a.b, 0, offsetfrom (struct B, b[1], a.b) + 1);   /* { dg-warning "writing 4 bytes into a region of size 3" } */
+  escape (b);
+
+  memset (&b[1].c, 0, offsetfrom (struct B, b[1], c) + 1);   /* { dg-warning "writing 3 bytes into a region of size 2" } */
+  escape (b);
+
+  memset (&b[1].d, 0, offsetfrom (struct B, b[1], d) + 1);   /* { dg-warning "writing 2 bytes into a region of size 1" } */
+  escape (b);
+}
+
+/* Verify that writing past the end of a dynamically allocated array
+   of known size is diagnosed.  */
+
+void test_memop_warn_alloc (const void *src)
+{
+  size_t n;
+
+  n = range (8, 32);
+
+  struct A *a = __builtin_malloc (sizeof *a * 2);
+
+  memcpy (a, src, n);   /* { dg-warning "writing between 8 and 32 bytes into a region of size 4 overflows the destination" "memcpy into allocated" { xfail *-*-*} } */
+  escape (a, src);
+
+  /* At -Wrawmem-overflow=1 the destination is considered to be
+     the whole array and its size is therefore sizeof a.  */
+  memcpy (&a[0], src, n);   /* { dg-warning "writing between 8 and 32 bytes into a region of size 4 overflows the destination" "memcpy into allocated" { xfail *-*-*} } */
+  escape (a, src);
+
+  /* Verify the same as above but by writing into the first mmeber
+     of the first element of the array.  */
+  memcpy (&a[0].a, src, n);   /* { dg-warning "writing between 8 and 32 bytes into a region of size 4 overflows the destination" "memcpy into allocated" { xfail *-*-*} } */
+  escape (a, src);
+
+  n = range (12, 32);
+
+  struct B *b = __builtin_malloc (sizeof *b * 2);
+
+  memcpy (&b[0], src, n);   /* { dg-warning "writing between 12 and 32 bytes into a region of size 8 overflows the destination" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  /* The following idiom of clearing multiple members of a struct is
+     used in a few places in the Linux kernel.  Verify that a warning
+     is issued for it when it writes past the end of the array object.  */
+  memset (&b[0].a.b, 0, offsetfrom (struct B, b, a.b) + 1);   /* { dg-warning "writing 8 bytes into a region of size 7" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  memset (&b->a.b, 0, offsetfrom (struct B, b, a.b) + 1);   /* { dg-warning "writing 8 bytes into a region of size 7" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  memset (&b[0].c, 0, offsetfrom (struct B, b, c) + 1);   /* { dg-warning "writing 7 bytes into a region of size 6" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  memset (&b->c, 0, offsetfrom (struct B, b, c) + 1);   /* { dg-warning "writing 7 bytes into a region of size 6" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  memset (&b[0].d, 0, offsetfrom (struct B, b, d) + 1);   /* { dg-warning "writing 6 bytes into a region of size 5" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  memset (&b->d, 0, offsetfrom (struct B, b, d) + 1);   /* { dg-warning "writing 6 bytes into a region of size 5" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  /* Same as above but clearing just elements of the second element
+     of the array.  */
+  memset (&b[1].a.b, 0, offsetfrom (struct B, b[1], a.b) + 1);   /* { dg-warning "writing 4 bytes into a region of size 3" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  memset (&b[1].c, 0, offsetfrom (struct B, b[1], c) + 1);   /* { dg-warning "writing 3 bytes into a region of size 2" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+
+  memset (&b[1].d, 0, offsetfrom (struct B, b[1], d) + 1);   /* { dg-warning "writing 2 bytes into a region of size 1" "memcpy into allocated" { xfail *-*-*} } */
+  escape (b);
+}
+
+
+void test_memop_nowarn (const void *src)
+{
+  struct B b[2];
+
+  size_t n = range (sizeof b, 32);
+
+  /* Verify that clearing the whole array is not diagnosed regardless
+     of whether the expression pointing to its beginning is obtained
+     from the array itself or its first member(s).  */
+  memcpy (b, src, n);
+  escape (b);
+
+  memcpy (&b[0], src, n);
+  escape (b);
+
+  memcpy (&b[0].a, src, n);
+  escape (b, src);
+
+  memcpy (&b[0].a.a, src, n);
+  escape (b, src);
+
+  /* Clearing multiple elements of an array of structs.  */
+  memset (&b[0].a.b, 0, sizeof b - offsetof (struct B, a.b));
+  escape (b);
+
+  memset (&b->a.b, 0, sizeof b - offsetof (struct B, a.b));
+  escape (b);
+
+  memset (&b[0].c, 0, sizeof b - offsetof (struct B, c));
+  escape (b);
+
+  memset (&b->c, 0, sizeof b - offsetof (struct B, c));
+  escape (b);
+
+  memset (&b[0].d, 0, sizeof b - offsetof (struct B, d));
+  escape (b);
+
+  memset (&b->d, 0, sizeof b - offsetof (struct B, d));
+  escape (b);
+
+  /* Same as above but clearing just elements of the second element
+     of the array.  */
+  memset (&b[1].a.b, 0, sizeof b[1] - offsetof (struct B, a.b));
+  escape (b);
+
+  memset (&b[1].c, 0, sizeof b[1] - offsetof (struct B, c));
+  escape (b);
+
+  memset (&b[1].d, 0, sizeof b[1] - offsetof (struct B, d));
+  escape (b);
+}
+
+
+/* The foollowing function could specify in its API that it takes
+   an array of exactly two elements, as shown below.  Verify that
+   writing into both elements is not diagnosed.  */
+void test_memop_nowarn_arg (struct A[2], const void*);
+
+void test_memop_nowarn_arg (struct A *a, const void *src)
+{
+  memcpy (a, src, 2 * sizeof *a);
+  escape (a, src);
+
+  memcpy (a, src, range (2 * sizeof *a, 123));
+  escape (a, src);
+}
+
+
+struct C { char a[3], b; };
+struct D { struct C c; char d, e; };
+
+extern char* strncpy (char*, const char*, __SIZE_TYPE__);
+
+void test_stringop_warn (void)
+{
+  size_t n = range (2 * sizeof (struct D) + 1, 33);
+
+  struct C c[2];
+
+  /* Similarly, at -Wstringop-overflow=1 the destination is considered
+     to be the whole array and its size is therefore sizeof c.  */
+  strncpy (c[0].a, "123", n);   /* { dg-warning "writing between 13 and 33 bytes into a region of size 8 overflows the destination" } */
+
+  escape (c);
+}
+
+
+void test_stringop_nowarn (void)
+{
+  struct D d[2];
+
+  strncpy (d[0].c.a, "123", range (sizeof d, 32));
+  escape (d);
+}
diff --git a/gcc/testsuite/gcc.dg/builtin-stringop-chk-6.c b/gcc/testsuite/gcc.dg/builtin-stringop-chk-6.c
new file mode 100644
index 0000000..9572ce1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-stringop-chk-6.c
@@ -0,0 +1,112 @@
+/* Test exercising -Wrawmem-overflow and -Wstringop-overflow warnings.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -Wstringop-overflow=2" } */
+
+#define offsetof(type, mem)   __builtin_offsetof (type, mem)
+
+/* Return the number of bytes from member MEM of TYPE to the end
+   of object OBJ.  */
+#define offsetfrom(type, obj, mem) (sizeof (obj) - offsetof (type, mem))
+
+
+typedef __SIZE_TYPE__ size_t;
+extern void* memcpy (void*, const void*, size_t);
+extern void* memset (void*, int, __SIZE_TYPE__);
+
+
+struct A { char a, b; };
+struct B { struct A a; char c, d; };
+
+/* Function to call to "escape" pointers from tests below to prevent
+   GCC from assuming the values of the objects they point to stay
+   the unchanged.  */
+void escape (void*, ...);
+
+/* Function to "generate" a random number each time it's called.  Declared
+   (but not defined) and used to prevent GCC from making assumptions about
+   their values based on the variables uses in the tested expressions.  */
+size_t random_unsigned_value (void);
+
+/* Return a random unsigned value between MIN and MAX.  */
+
+static inline size_t
+range (size_t min, size_t max)
+{
+  const size_t val = random_unsigned_value ();
+  return val < min || max < val ? min : val;
+}
+
+
+void test_memop_warn_object (const void *src)
+{
+  unsigned n = range (17, 29);
+
+  struct A a[2];
+
+  /* At both -Wstringop-overflow=2, like at 1, the destination of functions
+     that operate on raw memory is considered to be the whole array and its
+     size is therefore sizeof a.  */
+  memcpy (&a[0], src, n);   /* { dg-warning "writing between 17 and 29 bytes into a region of size 4 overflows the destination" } */
+  escape (a);
+}
+
+void test_memop_warn_subobject (const void *src)
+{
+  unsigned n = range (17, 31);
+
+  struct B b[2];
+
+  /* At -Wrawmem-overflow=2 the destination is considered to be
+     the member sobobject of the first array element and its size
+     is therefore sizeof b[0].a.  */
+  memcpy (&b[0].a, src, n);   /* { dg-warning "writing between 17 and 31 bytes into a region of size 8 overflows the destination" } */
+
+  escape (b);
+}
+
+void test_memop_nowarn_subobject (void)
+{
+  struct B b[2];
+
+  /* The following idiom of clearing multiple members of a struct
+     has been seen in a few places in the Linux kernel.  Verify
+     that a warning is not issued for it.  */
+  memset (&b[0].c, 0, sizeof b[0] - offsetof (struct B, c));
+
+  escape (b);
+}
+
+struct C { char a[3], b; };
+struct D { struct C c; char d, e; };
+
+extern char* strncpy (char*, const char*, __SIZE_TYPE__);
+
+void test_stringop_warn_object (const char *str)
+{
+  unsigned n = range (2 * sizeof (struct D), 32);
+
+  struct C c[2];
+
+  /* Similarly, at -Wstringop-overflow=2 the destination is considered
+     to be the array member of the first element of the array c and its
+     size is therefore sizeof c[0].a.  */
+  strncpy (c[0].a, "123", n);   /* { dg-warning "writing between 12 and 32 bytes into a region of size 3 overflows the destination" } */
+  escape (c);
+
+  strncpy (c[0].a, str, n);   /* { dg-warning "writing between 12 and 32 bytes into a region of size 3 overflows the destination" } */
+  escape (c);
+}
+
+void test_stringop_warn_subobject (const char *src)
+{
+  unsigned n = range (2 * sizeof (struct D), 32);
+
+  struct D d[2];
+
+  /* Same as above.  */
+  strncpy (d[0].c.a, "123", n);   /* { dg-warning "writing between 12 and 32 bytes into a region of size 3 overflows the destination" } */
+  escape (d);
+
+  strncpy (d[0].c.a, src, n);   /* { dg-warning "writing between 12 and 32 bytes into a region of size 3 overflows the destination" } */
+  escape (d);
+}
diff --git a/gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c b/gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c
index 44677f1..daff680 100644
--- a/gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c
+++ b/gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c
@@ -24,15 +24,15 @@ test (int arg, ...)
   *p = 0;
   strncat (p, "abcdefghi", 10);
   *p = 0;
-  strncat (p, "abcdefghij", 10); /* { dg-warning "will always overflow" } */
+  strncat (p, "abcdefghij", 10); /* { dg-warning "writing 11 bytes into a region of size 10 overflows the destination" } */
   *p = 0;
   strncat (p, "abcdefgh", 11);
   *p = 0;
-  strncat (p, "abcdefghijkl", 11); /* { dg-warning "will always overflow" } */
+  strncat (p, "abcdefghijkl", 11); /* { dg-warning "specified bound 11 exceeds the size 10 of the destination" } */
   *p = 0;
   strncat (p, q, 9);
   *p = 0;
-  strncat (p, q, 10); /* { dg-warning "might overflow" } */
+  strncat (p, q, 10); /* { dg-warning "specified bound 10 equals the size of the destination" } */
   *p = 0;
-  strncat (p, q, 11); /* { dg-warning "might overflow" } */
+  strncat (p, q, 11); /* { dg-warning "specified bound 11 exceeds the size 10 of the destination" } */
 }
diff --git a/gcc/testsuite/gcc.dg/fstack-protector-strong.c b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
index 8e9d891..94dc350 100644
--- a/gcc/testsuite/gcc.dg/fstack-protector-strong.c
+++ b/gcc/testsuite/gcc.dg/fstack-protector-strong.c
@@ -106,7 +106,7 @@ int
 foo8 ()
 {
   char base[100];
-  memcpy ((void *)base, (const void *)pg0, 105);
+  memcpy ((void *)base, (const void *)pg0, 105);   /* { dg-warning "writing 105 bytes into a region of size 100" } */
   return (int)(base[32]);
 }
 
diff --git a/gcc/testsuite/gcc.dg/memcpy-2.c b/gcc/testsuite/gcc.dg/memcpy-2.c
index 24464abd..7f839d2 100644
--- a/gcc/testsuite/gcc.dg/memcpy-2.c
+++ b/gcc/testsuite/gcc.dg/memcpy-2.c
@@ -7,7 +7,7 @@ typedef __SIZE_TYPE__ size_t;
 extern inline __attribute__((gnu_inline, always_inline, artificial)) void *
 memcpy (void *__restrict dest, const void *__restrict src, size_t len)
 {
-  return __builtin___memcpy_chk (dest, /* { dg-warning "will always overflow destination buffer" } */
+  return __builtin___memcpy_chk (dest, /* { dg-warning "writing" } */
 				 src, len, __builtin_object_size (dest, 0));
 }
 
diff --git a/gcc/testsuite/gcc.dg/pr40340-1.c b/gcc/testsuite/gcc.dg/pr40340-1.c
index aae84c6..78540a2 100644
--- a/gcc/testsuite/gcc.dg/pr40340-1.c
+++ b/gcc/testsuite/gcc.dg/pr40340-1.c
@@ -20,5 +20,5 @@ main (void)
   return 0;
 }
 
-/* { dg-warning "will always overflow destination buffer" "" { target *-*-* } 10 } */
+/* { dg-warning "writing" "" { target *-*-* } 10 } */
 /* { dg-message "file included" "In file included" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.dg/pr40340-2.c b/gcc/testsuite/gcc.dg/pr40340-2.c
index a0d6e084..1dc21d1 100644
--- a/gcc/testsuite/gcc.dg/pr40340-2.c
+++ b/gcc/testsuite/gcc.dg/pr40340-2.c
@@ -12,5 +12,5 @@ main (void)
   return 0;
 }
 
-/* { dg-warning "will always overflow destination buffer" "" { target *-*-* } 10 } */
+/* { dg-warning "writing" "" { target *-*-* } 10 } */
 /* { dg-message "file included" "In file included" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.dg/pr40340-5.c b/gcc/testsuite/gcc.dg/pr40340-5.c
index f50514c..e517147 100644
--- a/gcc/testsuite/gcc.dg/pr40340-5.c
+++ b/gcc/testsuite/gcc.dg/pr40340-5.c
@@ -13,5 +13,5 @@ main (void)
   return 0;
 }
 
-/* { dg-warning "will always overflow destination buffer" "" { target *-*-* } 10 } */
+/* { dg-warning "writing" "" { target *-*-* } 10 } */
 /* { dg-message "file included" "In file included" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.dg/torture/Wsizeof-pointer-memaccess1.c b/gcc/testsuite/gcc.dg/torture/Wsizeof-pointer-memaccess1.c
index 7ce9eae..b5a59f4 100644
--- a/gcc/testsuite/gcc.dg/torture/Wsizeof-pointer-memaccess1.c
+++ b/gcc/testsuite/gcc.dg/torture/Wsizeof-pointer-memaccess1.c
@@ -710,4 +710,4 @@ f4 (char *x, char **y, int z, char w[64])
   return z;
 }
 
-/* { dg-prune-output "\[\n\r\]*will always overflow\[\n\r\]*" } */
+/* { dg-prune-output "\[\n\r\]*writing\[\n\r\]*" } */
diff --git a/gcc/testsuite/gcc.dg/torture/pr71132.c b/gcc/testsuite/gcc.dg/torture/pr71132.c
index 2991718..2544eb1 100644
--- a/gcc/testsuite/gcc.dg/torture/pr71132.c
+++ b/gcc/testsuite/gcc.dg/torture/pr71132.c
@@ -1,4 +1,9 @@
 /* { dg-do compile } */
+/* { dg-additional-options "-Wno-stringop-overflow" } */
+/* The loop below writes past the end of the global object a.
+   When the loop is transformed into a call to memcpy the buffer
+   overflow is detected and diagnosed by the -Wstringop-overflow
+   option enabled by default.  */
 
 typedef unsigned size_t;
 struct {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
index fae584e..a551e23 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-1.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-std=c99 -Wformat -Wformat-length=1 -ftrack-macro-expansion=0" } */
+/* { dg-options "-Wformat -Wformat-length=1 -ftrack-macro-expansion=0" } */
 /* { dg-require-effective-target int32plus } */
 
 /* When debugging, define LINE to the line number of the test case to exercise
@@ -1456,9 +1456,7 @@ void test_vsprintf_chk_int (__builtin_va_list va)
 
 void test_snprintf_c_const (char *d)
 {
-  T (-1, "%c",    0);            /* { dg-warning "specified destination size \[0-9\]+ is too large" } */
-
-  __builtin_snprintf (d, INT_MAX, "%c", 0);   /* { dg-warning "specified destination size 2147483647 is too large" "ilp32" { target { ilp32 } } } */
+  T (-1, "%c",    0);            /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size \[0-9\]+" } */
 
   /* Verify the full text of the diagnostic for just the distinct messages
      and use abbreviations in subsequent test cases.  */
@@ -1506,9 +1504,9 @@ void test_snprintf_chk_c_const (void)
   /* Verify that specifying a size of the destination buffer that's
      bigger than its actual size (normally determined and passed to
      the function by __builtin_object_size) is diagnosed.  */
-  __builtin___snprintf_chk (buffer, 3, 0, 2, " ");   /* { dg-warning "always overflow|specified size 3 exceeds the size 2 of the destination" } */
+  __builtin___snprintf_chk (buffer, 3, 0, 2, " ");   /* { dg-warning "specified bound 3 exceeds the size 2 of the destination" } */
 
-  T (-1, "%c",    0);           /* { dg-warning "specified destination size \[^ \]* is too large" } */
+  T (-1, "%c",    0);           /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size \[0-9\]+" } */
 
   T (0, "%c",     0);
   T (0, "%c%c",   0, 0);
@@ -1619,7 +1617,7 @@ void test_vsprintf_int (__builtin_va_list va)
 
 void test_vsnprintf_s (__builtin_va_list va)
 {
-  T (-1, "%s");             /* { dg-warning "specified destination size \[^ \]* is too large" } */
+  T (-1, "%s");             /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size \[0-9\]+" } */
 
   T (0, "%s");
   T (1, "%s");
@@ -1642,9 +1640,9 @@ void test_vsnprintf_chk_s (__builtin_va_list va)
   /* Verify that specifying a size of the destination buffer that's
      bigger than its actual size (normally determined and passed to
      the function by __builtin_object_size) is diagnosed.  */
-  __builtin___vsnprintf_chk (buffer, 123, 0, 122, "%-s", va);   /* { dg-warning "always overflow|specified size 123 exceeds the size 122 of the destination object" } */
+  __builtin___vsnprintf_chk (buffer, 123, 0, 122, "%-s", va);   /* { dg-warning "specified bound 123 exceeds the size 122 of the destination" } */
 
-  __builtin___vsnprintf_chk (buffer, __SIZE_MAX__, 0, 2, "%-s", va);   /* { dg-warning "always overflow|destination size .\[0-9\]+. is too large" } */
+  __builtin___vsnprintf_chk (buffer, __SIZE_MAX__, 0, 2, "%-s", va);   /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size \[0-9\]+" } */
 
   T (0, "%s");
   T (1, "%s");
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-3.c b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-3.c
index 00176ed..57fea66 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-3.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-std=c99 -O2 -Wformat -Wformat-length=1 -ftrack-macro-expansion=0" } */
+/* { dg-options "-O2 -Wformat -Wformat-length=1 -ftrack-macro-expansion=0" } */
 
 typedef __SIZE_TYPE__ size_t;
 
@@ -248,34 +248,34 @@ void test_too_large (char *d, int x, __builtin_va_list va)
   const size_t imax = __INT_MAX__;
   const size_t imax_p1 = imax + 1;
 
-  __builtin_snprintf (d, imax,    "%c", x);   /* { dg-warning "specified destination size \[0-9\]+ is too large" "INT_MAX" { target ilp32 } } */
-  __builtin_snprintf (d, imax_p1, "%c", x);   /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
-  /* { dg-warning "specified destination size \[0-9\]+ is too large" "" { target { ilp32 } } .-1 } */
+  __builtin_snprintf (d, imax,    "%c", x);
+  __builtin_snprintf (d, imax_p1, "%c", x);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
+  /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size" "INT_MAX + 1" { target { ilp32 } } .-1 } */
 
-  __builtin_vsnprintf (d, imax,    "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ is too large" "INT_MAX" { target ilp32 } } */
-  __builtin_vsnprintf (d, imax_p1, "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
-  /* { dg-warning "specified destination size \[0-9\]+ is too large" "" { target { ilp32 } } .-1 } */
+  __builtin_vsnprintf (d, imax,    "%c", va);
+  __builtin_vsnprintf (d, imax_p1, "%c", va);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
+  /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size" "INT_MAX + 1" { target { ilp32 } } .-1 } */
 
-  __builtin___snprintf_chk (d, imax,    0, imax,    "%c", x);   /* { dg-warning "specified destination size \[0-9\]+ is too large" "INT_MAX" { target ilp32 } } */
-  __builtin___snprintf_chk (d, imax_p1, 0, imax_p1, "%c", x);   /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
-  /* { dg-warning "specified destination size \[0-9\]+ is too large" "INT_MAX + 1" { target { ilp32 } } .-1 } */
+  __builtin___snprintf_chk (d, imax,    0, imax,    "%c", x);
+  __builtin___snprintf_chk (d, imax_p1, 0, imax_p1, "%c", x);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
+  /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size" "INT_MAX + 1" { target { ilp32 } } .-1 } */
 
-  __builtin___vsnprintf_chk (d, imax,    0, imax,    "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ is too large" "INT_MAX" { target ilp32 } } */
-  __builtin___vsnprintf_chk (d, imax_p1, 0, imax_p1, "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
-  /* { dg-warning "specified destination size \[0-9\]+ is too large" "" { target { ilp32 } } .-1 } */
+  __builtin___vsnprintf_chk (d, imax,    0, imax,    "%c", va);
+  __builtin___vsnprintf_chk (d, imax_p1, 0, imax_p1, "%c", va);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "INT_MAX + 1" { target lp64 } } */
+  /* { dg-warning "specified bound \[0-9\]+ exceeds maximum object size" "INT_MAX + 1" { target { ilp32 } } .-1 } */
 
   const size_t ptrmax = __PTRDIFF_MAX__;
   const size_t ptrmax_m1 = ptrmax - 1;
 
-  __builtin_snprintf (d, ptrmax_m1, "%c", x);  /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
-  __builtin_snprintf (d, ptrmax, "  %c", x);   /* { dg-warning "specified destination size \[0-9\]+ is too large" } */
+  __builtin_snprintf (d, ptrmax_m1, "%c", x);  /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
+  __builtin_snprintf (d, ptrmax, "  %c", x);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX" { target lp64 } } */
 
-  __builtin_vsnprintf (d, ptrmax_m1, "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
-  __builtin_vsnprintf (d, ptrmax,    "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ is too large" } */
+  __builtin_vsnprintf (d, ptrmax_m1, "%c", va);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
+  __builtin_vsnprintf (d, ptrmax,    "%c", va);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX" { target lp64 } } */
 
-  __builtin___snprintf_chk (d, ptrmax_m1, 0, ptrmax_m1, "%c", x);   /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
-  __builtin___snprintf_chk (d, ptrmax,    0, ptrmax,    "%c", x);   /* { dg-warning "specified destination size \[0-9\]+ is too large" } */
+  __builtin___snprintf_chk (d, ptrmax_m1, 0, ptrmax_m1, "%c", x);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
+  __builtin___snprintf_chk (d, ptrmax,    0, ptrmax,    "%c", x);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX" { target lp64 } } */
 
-  __builtin___vsnprintf_chk (d, ptrmax_m1, 0, ptrmax_m1, "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
-  __builtin___vsnprintf_chk (d, ptrmax,    0, ptrmax,    "%c", va);   /* { dg-warning "specified destination size \[0-9\]+ is too large" } */
+  __builtin___vsnprintf_chk (d, ptrmax_m1, 0, ptrmax_m1, "%c", va);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX - 1" { target lp64 } } */
+  __builtin___vsnprintf_chk (d, ptrmax,    0, ptrmax,    "%c", va);   /* { dg-warning "specified bound \[0-9\]+ exceeds .INT_MAX." "PTRDIFF_MAX" { target lp64 } } */
 }
diff --git a/gcc/testsuite/gfortran.dg/char_length_3.f90 b/gcc/testsuite/gfortran.dg/char_length_3.f90
index 97f7fb4..6529a77 100644
--- a/gcc/testsuite/gfortran.dg/char_length_3.f90
+++ b/gcc/testsuite/gfortran.dg/char_length_3.f90
@@ -47,3 +47,6 @@
            y(1) = 'hello world'
         end subroutine
        end
+
+ ! Remove -Wstringop-overflow warnings.
+ ! { dg-prune-output "overflows the destination" }
diff --git a/gcc/testsuite/gfortran.dg/pr38868.f b/gcc/testsuite/gfortran.dg/pr38868.f
index a8c4469..583bf90 100644
--- a/gcc/testsuite/gfortran.dg/pr38868.f
+++ b/gcc/testsuite/gfortran.dg/pr38868.f
@@ -9,7 +9,7 @@
       ANER(1)='A   '
       ANER(2)='    '
       LINE=' '
-      LINE(78:80)='xyz'
+      LINE(78:80)='xyz'   ! { dg-warning "writing 3 bytes into a region of size 2" }
       WRITE(*,'(A82)') "'"//LINE//"'"
       END
 
-- 
cgit v1.1


From 12705913ef7409301b76596cf60bf4327abfee7a Mon Sep 17 00:00:00 2001
From: Segher Boessenkool <segher@gcc.gnu.org>
Date: Thu, 8 Dec 2016 01:09:01 +0100
Subject: simplify-rtx: Fix the last fix (PR78638)

I managed to get the last obvious fix wrong: mode is M1, GET_MODE (op)
is M2.

[ adding missing PR marker ]


	PR rtl-optimization/78638
	* simplify-rtx.c (simplify_truncation): M2 is not mode, it is
	GET_MODE (op).  Fix this.

From-SVN: r243420
---
 gcc/ChangeLog | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6372ff4..05ae50b5 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -446,6 +446,7 @@
 
 2016-12-02  Segher Boessenkool  <segher@kernel.crashing.org>
 
+	PR rtl-optimization/78638
 	* simplify-rtx.c (simplify_truncation): M2 is not mode, it is
 	GET_MODE (op).  Fix this.
 
-- 
cgit v1.1


From f9adfccab13fb7b1864c80b90b82d99ae1a30006 Mon Sep 17 00:00:00 2001
From: GCC Administrator <gccadmin@gcc.gnu.org>
Date: Thu, 8 Dec 2016 00:16:22 +0000
Subject: Daily bump.

From-SVN: r243423
---
 gcc/DATESTAMP | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index f7c6d5c..8a8244b 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20161207
+20161208
-- 
cgit v1.1


From 776a8a927eeeb7ea3c13b6a981f340591dcef530 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 8 Dec 2016 00:38:03 +0000
Subject: runtime: remove some unused variables/declarations from runtime.h

    Small patch from Eric Botcazou.

    Reviewed-on: https://go-review.googlesource.com/34029

From-SVN: r243424
---
 gcc/go/gofrontend/MERGE | 2 +-
 libgo/runtime/runtime.h | 4 ----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 0cb0f9c..7586fd4 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-2102112e26a21589455f940ec6b409766d942c62
+08d221726e3f50cb197a931ba385fac67f66a028
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index 34b5b44..f793fea 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -212,10 +212,6 @@ extern bool runtime_copystack;
 #define USED(v)		((void) v)
 #define	ROUND(x, n)	(((x)+(n)-1)&~(uintptr)((n)-1)) /* all-caps to mark as macro: it evaluates n twice */
 
-byte*	runtime_startup_random_data;
-uint32	runtime_startup_random_data_len;
-void	runtime_get_random_data(byte**, int32*);
-
 enum {
 	// hashinit wants this many random bytes
 	HashRandomBytes = 32
-- 
cgit v1.1


From a96d1f1d028e6798413eea810d2c6beee7519a6d Mon Sep 17 00:00:00 2001
From: David Malcolm <dmalcolm@redhat.com>
Date: Thu, 8 Dec 2016 01:51:04 +0000
Subject: Split class rtx_reader into md_reader vs rtx_reader

This moves read_rtx and friends into rtx_reader, and splits
rtx_reader into two classes:

class md_reader: has responsibility for reading chars, managing
include files, top-level directives etc.  It is the read-md.o part.

class rtx_reader, a subclass, has the code for reading hierarchical
rtx expressions using the format codes.   It is the read-rtl.o part.

This split is needed by a followup patch, which converts
read_rtx_operand to a virtual function of rtx_reader.  To do this,
instances of rtx_reader (or its subclasses) need a vtable, which
needs to include a ptr to the code in read-rtl.o.  Splitting it up
allows the gen* tools that currently purely use read-md.o to continue
to do so.

gcc/ChangeLog:
	* genpreds.c (write_tm_constrs_h): Update for renaming of
	rtx_reader_ptr to md_reader_ptr.
	(write_tm_preds_h): Likewise.
	(write_insn_preds_c): Likewise.
	* read-md.c (rtx_reader_ptr): Rename to...
	(md_reader_ptr): ...this, and convert from an
	rtx_reader * to a md_reader *.
	(rtx_reader::set_md_ptr_loc): Rename to...
	(md_reader::set_md_ptr_loc): ...this.
	(rtx_reader::get_md_ptr_loc): Rename to...
	(md_reader::get_md_ptr_loc): ...this.
	(rtx_reader::copy_md_ptr_loc): Rename to...
	(md_reader::copy_md_ptr_loc): ...this.
	(rtx_reader::fprint_md_ptr_loc): Rename to...
	(md_reader::fprint_md_ptr_loc): ...this.
	(rtx_reader::print_md_ptr_loc): Rename to...
	(md_reader::print_md_ptr_loc): ...this.
	(rtx_reader::join_c_conditions): Rename to...
	(md_reader::join_c_conditions): ...this.
	(rtx_reader::fprint_c_condition): ...this.
	(rtx_reader::print_c_condition): Rename to...
	(md_reader::print_c_condition): ...this.
	(fatal_with_file_and_line):  Update for renaming of
	rtx_reader_ptr to md_reader_ptr.
	(rtx_reader::require_char): Rename to...
	(md_reader::require_char): ...this.
	(rtx_reader::require_char_ws): Rename to...
	(md_reader::require_char_ws): ...this.
	(rtx_reader::require_word_ws): Rename to...
	(md_reader::require_word_ws): ...this.
	(rtx_reader::read_char): Rename to...
	(md_reader::read_char): ...this.
	(rtx_reader::unread_char): Rename to...
	(md_reader::unread_char): ...this.
	(rtx_reader::peek_char): Rename to...
	(md_reader::peek_char): ...this.
	(rtx_reader::read_name): Rename to...
	(md_reader::read_name): ...this.
	(rtx_reader::read_escape): Rename to...
	(md_reader::read_escape): ...this.
	(rtx_reader::read_quoted_string): Rename to...
	(md_reader::read_quoted_string): ...this.
	(rtx_reader::read_braced_string): Rename to...
	(md_reader::read_braced_string): ...this.
	(rtx_reader::read_string): Rename to...
	(md_reader::read_string): ...this.
	(rtx_reader::read_skip_construct): Rename to...
	(md_reader::read_skip_construct): ...this.
	(rtx_reader::handle_constants): Rename to...
	(md_reader::handle_constants): ...this.
	(rtx_reader::traverse_md_constants): Rename to...
	(md_reader::traverse_md_constants): ...this.
	(rtx_reader::handle_enum): Rename to...
	(md_reader::handle_enum): ...this.
	(rtx_reader::lookup_enum_type): Rename to...
	(md_reader::lookup_enum_type): ...this.
	(rtx_reader::traverse_enum_types): Rename to...
	(md_reader::traverse_enum_types): ...this.
	(rtx_reader::rtx_reader): Rename to...
	(md_reader::md_reader): ...this, and update for renaming of
	rtx_reader_ptr to md_reader_ptr.
	(rtx_reader::~rtx_reader): Rename to...
	(md_reader::~md_reader): ...this, and update for renaming of
	rtx_reader_ptr to md_reader_ptr.
	(rtx_reader::handle_include): Rename to...
	(md_reader::handle_include): ...this.
	(rtx_reader::handle_file): Rename to...
	(md_reader::handle_file): ...this.
	(rtx_reader::handle_toplevel_file): Rename to...
	(md_reader::handle_toplevel_file): ...this.
	(rtx_reader::get_current_location): Rename to...
	(md_reader::get_current_location): ...this.
	(rtx_reader::add_include_path): Rename to...
	(md_reader::add_include_path): ...this.
	(rtx_reader::read_md_files): Rename to...
	(md_reader::read_md_files): ...this.
	* read-md.h (class rtx_reader): Split into...
	(class md_reader): ...new class.
	(rtx_reader_ptr): Rename to...
	(md_reader_ptr): ...this, and convert to a md_reader *.
	(class noop_reader): Update base class to be md_reader.
	(class rtx_reader): Reintroduce as a subclass of md_reader.
	(rtx_reader_ptr): Reintroduce as a rtx_reader *.
	(read_char): Update for renaming of rtx_reader_ptr to
	md_reader_ptr.
	(unread_char): Likewise.
	* read-rtl.c (rtx_reader_ptr): New global.
	(rtx_reader::apply_iterator_to_string): Rename to...
	(md_reader::apply_iterator_to_string): ...this.
	(rtx_reader::copy_rtx_for_iterators): Rename to...
	(md_reader::copy_rtx_for_iterators): ...this.
	(rtx_reader::read_conditions): Rename to...
	(md_reader::read_conditions): ...this.
	(rtx_reader::record_potential_iterator_use): Rename to...
	(md_reader::record_potential_iterator_use): ...this.
	(rtx_reader::read_mapping): Rename to...
	(md_reader::read_mapping): ...this.
	(rtx_reader::read_rtx): Use rtx_reader_ptr when calling
	read_rtx_code.
	(rtx_reader::read_rtx_operand): Use get_string_obstack rather
	than directly accessing m_string_obstack.
	(rtx_reader::rtx_reader): New ctor.
	(rtx_reader::~rtx_reader): New dtor.

From-SVN: r243426
---
 gcc/ChangeLog  | 106 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 gcc/genpreds.c |   6 ++--
 gcc/read-md.c  |  90 ++++++++++++++++++++++++------------------------
 gcc/read-md.h  |  59 ++++++++++++++++++++++++--------
 gcc/read-rtl.c |  43 +++++++++++++++++------
 5 files changed, 231 insertions(+), 73 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 05ae50b5..929bf4d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,109 @@
+2016-12-07  David Malcolm  <dmalcolm@redhat.com>
+
+	* genpreds.c (write_tm_constrs_h): Update for renaming of
+	rtx_reader_ptr to md_reader_ptr.
+	(write_tm_preds_h): Likewise.
+	(write_insn_preds_c): Likewise.
+	* read-md.c (rtx_reader_ptr): Rename to...
+	(md_reader_ptr): ...this, and convert from an
+	rtx_reader * to a md_reader *.
+	(rtx_reader::set_md_ptr_loc): Rename to...
+	(md_reader::set_md_ptr_loc): ...this.
+	(rtx_reader::get_md_ptr_loc): Rename to...
+	(md_reader::get_md_ptr_loc): ...this.
+	(rtx_reader::copy_md_ptr_loc): Rename to...
+	(md_reader::copy_md_ptr_loc): ...this.
+	(rtx_reader::fprint_md_ptr_loc): Rename to...
+	(md_reader::fprint_md_ptr_loc): ...this.
+	(rtx_reader::print_md_ptr_loc): Rename to...
+	(md_reader::print_md_ptr_loc): ...this.
+	(rtx_reader::join_c_conditions): Rename to...
+	(md_reader::join_c_conditions): ...this.
+	(rtx_reader::fprint_c_condition): ...this.
+	(rtx_reader::print_c_condition): Rename to...
+	(md_reader::print_c_condition): ...this.
+	(fatal_with_file_and_line):  Update for renaming of
+	rtx_reader_ptr to md_reader_ptr.
+	(rtx_reader::require_char): Rename to...
+	(md_reader::require_char): ...this.
+	(rtx_reader::require_char_ws): Rename to...
+	(md_reader::require_char_ws): ...this.
+	(rtx_reader::require_word_ws): Rename to...
+	(md_reader::require_word_ws): ...this.
+	(rtx_reader::read_char): Rename to...
+	(md_reader::read_char): ...this.
+	(rtx_reader::unread_char): Rename to...
+	(md_reader::unread_char): ...this.
+	(rtx_reader::peek_char): Rename to...
+	(md_reader::peek_char): ...this.
+	(rtx_reader::read_name): Rename to...
+	(md_reader::read_name): ...this.
+	(rtx_reader::read_escape): Rename to...
+	(md_reader::read_escape): ...this.
+	(rtx_reader::read_quoted_string): Rename to...
+	(md_reader::read_quoted_string): ...this.
+	(rtx_reader::read_braced_string): Rename to...
+	(md_reader::read_braced_string): ...this.
+	(rtx_reader::read_string): Rename to...
+	(md_reader::read_string): ...this.
+	(rtx_reader::read_skip_construct): Rename to...
+	(md_reader::read_skip_construct): ...this.
+	(rtx_reader::handle_constants): Rename to...
+	(md_reader::handle_constants): ...this.
+	(rtx_reader::traverse_md_constants): Rename to...
+	(md_reader::traverse_md_constants): ...this.
+	(rtx_reader::handle_enum): Rename to...
+	(md_reader::handle_enum): ...this.
+	(rtx_reader::lookup_enum_type): Rename to...
+	(md_reader::lookup_enum_type): ...this.
+	(rtx_reader::traverse_enum_types): Rename to...
+	(md_reader::traverse_enum_types): ...this.
+	(rtx_reader::rtx_reader): Rename to...
+	(md_reader::md_reader): ...this, and update for renaming of
+	rtx_reader_ptr to md_reader_ptr.
+	(rtx_reader::~rtx_reader): Rename to...
+	(md_reader::~md_reader): ...this, and update for renaming of
+	rtx_reader_ptr to md_reader_ptr.
+	(rtx_reader::handle_include): Rename to...
+	(md_reader::handle_include): ...this.
+	(rtx_reader::handle_file): Rename to...
+	(md_reader::handle_file): ...this.
+	(rtx_reader::handle_toplevel_file): Rename to...
+	(md_reader::handle_toplevel_file): ...this.
+	(rtx_reader::get_current_location): Rename to...
+	(md_reader::get_current_location): ...this.
+	(rtx_reader::add_include_path): Rename to...
+	(md_reader::add_include_path): ...this.
+	(rtx_reader::read_md_files): Rename to...
+	(md_reader::read_md_files): ...this.
+	* read-md.h (class rtx_reader): Split into...
+	(class md_reader): ...new class.
+	(rtx_reader_ptr): Rename to...
+	(md_reader_ptr): ...this, and convert to a md_reader *.
+	(class noop_reader): Update base class to be md_reader.
+	(class rtx_reader): Reintroduce as a subclass of md_reader.
+	(rtx_reader_ptr): Reintroduce as a rtx_reader *.
+	(read_char): Update for renaming of rtx_reader_ptr to
+	md_reader_ptr.
+	(unread_char): Likewise.
+	* read-rtl.c (rtx_reader_ptr): New global.
+	(rtx_reader::apply_iterator_to_string): Rename to...
+	(md_reader::apply_iterator_to_string): ...this.
+	(rtx_reader::copy_rtx_for_iterators): Rename to...
+	(md_reader::copy_rtx_for_iterators): ...this.
+	(rtx_reader::read_conditions): Rename to...
+	(md_reader::read_conditions): ...this.
+	(rtx_reader::record_potential_iterator_use): Rename to...
+	(md_reader::record_potential_iterator_use): ...this.
+	(rtx_reader::read_mapping): Rename to...
+	(md_reader::read_mapping): ...this.
+	(rtx_reader::read_rtx): Use rtx_reader_ptr when calling
+	read_rtx_code.
+	(rtx_reader::read_rtx_operand): Use get_string_obstack rather
+	than directly accessing m_string_obstack.
+	(rtx_reader::rtx_reader): New ctor.
+	(rtx_reader::~rtx_reader): New dtor.
+
 2016-12-07  Martin Sebor  <msebor@redhat.com>
 
 	PR middle-end/77784
diff --git a/gcc/genpreds.c b/gcc/genpreds.c
index c5d597c..58e9238 100644
--- a/gcc/genpreds.c
+++ b/gcc/genpreds.c
@@ -1205,7 +1205,7 @@ write_tm_constrs_h (void)
   printf ("\
 /* Generated automatically by the program '%s'\n\
    from the machine description file '%s'.  */\n\n", progname,
-	  rtx_reader_ptr->get_top_level_filename ());
+	  md_reader_ptr->get_top_level_filename ());
 
   puts ("\
 #ifndef GCC_TM_CONSTRS_H\n\
@@ -1405,7 +1405,7 @@ write_tm_preds_h (void)
   printf ("\
 /* Generated automatically by the program '%s'\n\
    from the machine description file '%s'.  */\n\n", progname,
-	  rtx_reader_ptr->get_top_level_filename ());
+	  md_reader_ptr->get_top_level_filename ());
 
   puts ("\
 #ifndef GCC_TM_PREDS_H\n\
@@ -1555,7 +1555,7 @@ write_insn_preds_c (void)
   printf ("\
 /* Generated automatically by the program '%s'\n\
    from the machine description file '%s'.  */\n\n", progname,
-	  rtx_reader_ptr->get_top_level_filename ());
+	  md_reader_ptr->get_top_level_filename ());
 
   puts ("\
 #include \"config.h\"\n\
diff --git a/gcc/read-md.c b/gcc/read-md.c
index 095075f..6d9a1bd 100644
--- a/gcc/read-md.c
+++ b/gcc/read-md.c
@@ -39,7 +39,7 @@ void (*include_callback) (const char *);
 
 /* Global singleton.  */
 
-rtx_reader *rtx_reader_ptr;
+md_reader *md_reader_ptr;
 
 /* Given an object that starts with a char * name field, return a hash
    code for its name.  */
@@ -79,7 +79,7 @@ leading_ptr_eq_p (const void *def1, const void *def2)
 /* Associate PTR with the file position given by FILENAME and LINENO.  */
 
 void
-rtx_reader::set_md_ptr_loc (const void *ptr, const char *filename, int lineno)
+md_reader::set_md_ptr_loc (const void *ptr, const char *filename, int lineno)
 {
   struct ptr_loc *loc;
 
@@ -95,7 +95,7 @@ rtx_reader::set_md_ptr_loc (const void *ptr, const char *filename, int lineno)
    position was set.  */
 
 const struct ptr_loc *
-rtx_reader::get_md_ptr_loc (const void *ptr)
+md_reader::get_md_ptr_loc (const void *ptr)
 {
   return (const struct ptr_loc *) htab_find (m_ptr_locs, &ptr);
 }
@@ -103,7 +103,7 @@ rtx_reader::get_md_ptr_loc (const void *ptr)
 /* Associate NEW_PTR with the same file position as OLD_PTR.  */
 
 void
-rtx_reader::copy_md_ptr_loc (const void *new_ptr, const void *old_ptr)
+md_reader::copy_md_ptr_loc (const void *new_ptr, const void *old_ptr)
 {
   const struct ptr_loc *loc = get_md_ptr_loc (old_ptr);
   if (loc != 0)
@@ -114,7 +114,7 @@ rtx_reader::copy_md_ptr_loc (const void *new_ptr, const void *old_ptr)
    directive for it to OUTF.  */
 
 void
-rtx_reader::fprint_md_ptr_loc (FILE *outf, const void *ptr)
+md_reader::fprint_md_ptr_loc (FILE *outf, const void *ptr)
 {
   const struct ptr_loc *loc = get_md_ptr_loc (ptr);
   if (loc != 0)
@@ -123,7 +123,7 @@ rtx_reader::fprint_md_ptr_loc (FILE *outf, const void *ptr)
 
 /* Special fprint_md_ptr_loc for writing to STDOUT.  */
 void
-rtx_reader::print_md_ptr_loc (const void *ptr)
+md_reader::print_md_ptr_loc (const void *ptr)
 {
   fprint_md_ptr_loc (stdout, ptr);
 }
@@ -132,7 +132,7 @@ rtx_reader::print_md_ptr_loc (const void *ptr)
    may be null or empty.  */
 
 const char *
-rtx_reader::join_c_conditions (const char *cond1, const char *cond2)
+md_reader::join_c_conditions (const char *cond1, const char *cond2)
 {
   char *result;
   const void **entry;
@@ -161,7 +161,7 @@ rtx_reader::join_c_conditions (const char *cond1, const char *cond2)
    directive for COND if its original file position is known.  */
 
 void
-rtx_reader::fprint_c_condition (FILE *outf, const char *cond)
+md_reader::fprint_c_condition (FILE *outf, const char *cond)
 {
   const char **halves = (const char **) htab_find (m_joined_conditions, &cond);
   if (halves != 0)
@@ -183,7 +183,7 @@ rtx_reader::fprint_c_condition (FILE *outf, const char *cond)
 /* Special fprint_c_condition for writing to STDOUT.  */
 
 void
-rtx_reader::print_c_condition (const char *cond)
+md_reader::print_c_condition (const char *cond)
 {
   fprint_c_condition (stdout, cond);
 }
@@ -250,8 +250,9 @@ fatal_with_file_and_line (const char *msg, ...)
 
   va_start (ap, msg);
 
-  fprintf (stderr, "%s:%d:%d: error: ", rtx_reader_ptr->get_filename (),
-	   rtx_reader_ptr->get_lineno (), rtx_reader_ptr->get_colno ());
+  fprintf (stderr, "%s:%d:%d: error: ", md_reader_ptr->get_filename (),
+	   md_reader_ptr->get_lineno (),
+	   md_reader_ptr->get_colno ());
   vfprintf (stderr, msg, ap);
   putc ('\n', stderr);
 
@@ -271,8 +272,9 @@ fatal_with_file_and_line (const char *msg, ...)
   context[i] = '\0';
 
   fprintf (stderr, "%s:%d:%d: note: following context is `%s'\n",
-	   rtx_reader_ptr->get_filename (), rtx_reader_ptr->get_lineno (),
-	   rtx_reader_ptr->get_colno (), context);
+	   md_reader_ptr->get_filename (),
+	   md_reader_ptr->get_lineno (),
+	   md_reader_ptr->get_colno (), context);
 
   va_end (ap);
   exit (1);
@@ -344,7 +346,7 @@ read_skip_spaces (void)
    EXPECTED.  */
 
 void
-rtx_reader::require_char (char expected)
+md_reader::require_char (char expected)
 {
   int ch = read_char ();
   if (ch != expected)
@@ -355,7 +357,7 @@ rtx_reader::require_char (char expected)
    character, issuing a fatal error if it is not EXPECTED.  */
 
 void
-rtx_reader::require_char_ws (char expected)
+md_reader::require_char_ws (char expected)
 {
   int ch = read_skip_spaces ();
   if (ch != expected)
@@ -366,7 +368,7 @@ rtx_reader::require_char_ws (char expected)
    issuing a fatal error if it is not EXPECTED.  */
 
 void
-rtx_reader::require_word_ws (const char *expected)
+md_reader::require_word_ws (const char *expected)
 {
   struct md_name name;
   read_name (&name);
@@ -377,7 +379,7 @@ rtx_reader::require_word_ws (const char *expected)
 /* Read the next character from the file.  */
 
 int
-rtx_reader::read_char (void)
+md_reader::read_char (void)
 {
   int ch;
 
@@ -397,7 +399,7 @@ rtx_reader::read_char (void)
 /* Put back CH, which was the last character read from the file.  */
 
 void
-rtx_reader::unread_char (int ch)
+md_reader::unread_char (int ch)
 {
   if (ch == '\n')
     {
@@ -412,7 +414,7 @@ rtx_reader::unread_char (int ch)
 /* Peek at the next character from the file without consuming it.  */
 
 int
-rtx_reader::peek_char (void)
+md_reader::peek_char (void)
 {
   int ch = read_char ();
   unread_char (ch);
@@ -423,7 +425,7 @@ rtx_reader::peek_char (void)
    punctuation chars of rtx printed syntax.  */
 
 void
-rtx_reader::read_name (struct md_name *name)
+md_reader::read_name (struct md_name *name)
 {
   int c;
   size_t i;
@@ -489,7 +491,7 @@ rtx_reader::read_name (struct md_name *name)
    Caller has read the backslash, but not placed it into the obstack.  */
 
 void
-rtx_reader::read_escape ()
+md_reader::read_escape ()
 {
   int c = read_char ();
 
@@ -542,7 +544,7 @@ rtx_reader::read_escape ()
    the leading quote.  */
 
 char *
-rtx_reader::read_quoted_string ()
+md_reader::read_quoted_string ()
 {
   int c;
 
@@ -569,7 +571,7 @@ rtx_reader::read_quoted_string ()
    the outermost braces _are_ included in the string constant.  */
 
 char *
-rtx_reader::read_braced_string ()
+md_reader::read_braced_string ()
 {
   int c;
   int brace_depth = 1;  /* caller-processed */
@@ -606,7 +608,7 @@ rtx_reader::read_braced_string ()
    and dispatch to the appropriate string constant reader.  */
 
 char *
-rtx_reader::read_string (int star_if_braced)
+md_reader::read_string (int star_if_braced)
 {
   char *stringbuf;
   int saw_paren = 0;
@@ -642,7 +644,7 @@ rtx_reader::read_string (int star_if_braced)
    is currently nested by DEPTH levels of parentheses.  */
 
 void
-rtx_reader::read_skip_construct (int depth, file_location loc)
+md_reader::read_skip_construct (int depth, file_location loc)
 {
   struct md_name name;
   int c;
@@ -784,7 +786,7 @@ add_constant (htab_t defs, char *name, char *value,
    after the "define_constants".  */
 
 void
-rtx_reader::handle_constants ()
+md_reader::handle_constants ()
 {
   int c;
   htab_t defs;
@@ -815,7 +817,7 @@ rtx_reader::handle_constants ()
    Stop when CALLBACK returns zero.  */
 
 void
-rtx_reader::traverse_md_constants (htab_trav callback, void *info)
+md_reader::traverse_md_constants (htab_trav callback, void *info)
 {
   htab_traverse (get_md_constants (), callback, info);
 }
@@ -838,7 +840,7 @@ md_decimal_string (int number)
    directive is a define_enum rather than a define_c_enum.  */
 
 void
-rtx_reader::handle_enum (file_location loc, bool md_p)
+md_reader::handle_enum (file_location loc, bool md_p)
 {
   char *enum_name, *value_name;
   struct md_name name;
@@ -904,7 +906,7 @@ rtx_reader::handle_enum (file_location loc, bool md_p)
 /* Try to find the definition of the given enum.  Return null on failure.  */
 
 struct enum_type *
-rtx_reader::lookup_enum_type (const char *name)
+md_reader::lookup_enum_type (const char *name)
 {
   return (struct enum_type *) htab_find (m_enum_types, &name);
 }
@@ -914,15 +916,15 @@ rtx_reader::lookup_enum_type (const char *name)
    returns zero.  */
 
 void
-rtx_reader::traverse_enum_types (htab_trav callback, void *info)
+md_reader::traverse_enum_types (htab_trav callback, void *info)
 {
   htab_traverse (m_enum_types, callback, info);
 }
 
 
-/* Constructor for rtx_reader.  */
+/* Constructor for md_reader.  */
 
-rtx_reader::rtx_reader ()
+md_reader::md_reader ()
 : m_toplevel_fname (NULL),
   m_base_dir (NULL),
   m_read_md_file (NULL),
@@ -933,7 +935,7 @@ rtx_reader::rtx_reader ()
   m_last_dir_md_include_ptr (&m_first_dir_md_include)
 {
   /* Set the global singleton pointer.  */
-  rtx_reader_ptr = this;
+  md_reader_ptr = this;
 
   obstack_init (&m_string_obstack);
 
@@ -953,9 +955,9 @@ rtx_reader::rtx_reader ()
   unlock_std_streams ();
 }
 
-/* rtx_reader's destructor.  */
+/* md_reader's destructor.  */
 
-rtx_reader::~rtx_reader ()
+md_reader::~md_reader ()
 {
   free (m_base_dir);
 
@@ -972,7 +974,7 @@ rtx_reader::~rtx_reader ()
   obstack_free (&m_string_obstack, NULL);
 
   /* Clear the global singleton pointer.  */
-  rtx_reader_ptr = NULL;
+  md_reader_ptr = NULL;
 }
 
 /* Process an "include" directive, starting with the optional space
@@ -981,7 +983,7 @@ rtx_reader::~rtx_reader ()
    which the "include" occurred.  */
 
 void
-rtx_reader::handle_include (file_location loc)
+md_reader::handle_include (file_location loc)
 {
   const char *filename;
   const char *old_filename;
@@ -1059,7 +1061,7 @@ rtx_reader::handle_include (file_location loc)
    unknown directives.  */
 
 void
-rtx_reader::handle_file ()
+md_reader::handle_file ()
 {
   struct md_name directive;
   int c;
@@ -1093,7 +1095,7 @@ rtx_reader::handle_file ()
    and m_base_dir accordingly.  */
 
 void
-rtx_reader::handle_toplevel_file ()
+md_reader::handle_toplevel_file ()
 {
   const char *base;
 
@@ -1108,7 +1110,7 @@ rtx_reader::handle_toplevel_file ()
 }
 
 file_location
-rtx_reader::get_current_location () const
+md_reader::get_current_location () const
 {
   return file_location (m_read_md_filename, m_read_md_lineno, m_read_md_colno);
 }
@@ -1116,7 +1118,7 @@ rtx_reader::get_current_location () const
 /* Parse a -I option with argument ARG.  */
 
 void
-rtx_reader::add_include_path (const char *arg)
+md_reader::add_include_path (const char *arg)
 {
   struct file_name_list *dirtmp;
 
@@ -1137,8 +1139,8 @@ rtx_reader::add_include_path (const char *arg)
    generic error should be reported.  */
 
 bool
-rtx_reader::read_md_files (int argc, const char **argv,
-			   bool (*parse_opt) (const char *))
+md_reader::read_md_files (int argc, const char **argv,
+			  bool (*parse_opt) (const char *))
 {
   int i;
   bool no_more_options;
@@ -1233,7 +1235,7 @@ rtx_reader::read_md_files (int argc, const char **argv,
   return !have_error;
 }
 
-/* class noop_reader : public rtx_reader */
+/* class noop_reader : public md_reader */
 
 /* A dummy implementation which skips unknown directives.  */
 void
diff --git a/gcc/read-md.h b/gcc/read-md.h
index 06b89b4..27fc9c2 100644
--- a/gcc/read-md.h
+++ b/gcc/read-md.h
@@ -91,11 +91,23 @@ struct enum_type {
   unsigned int num_values;
 };
 
-class rtx_reader
+/* A class for reading .md files and RTL dump files.
+
+   Implemented in read-md.c.
+
+   This class has responsibility for reading chars from input files, and
+   for certain common top-level directives including the "include"
+   directive.
+
+   It does not handle parsing the hierarchically-nested expressions of
+   rtl.def; for that see the rtx_reader subclass below (implemented in
+   read-rtl.c).  */
+
+class md_reader
 {
  public:
-  rtx_reader ();
-  virtual ~rtx_reader ();
+  md_reader ();
+  virtual ~md_reader ();
 
   bool read_md_files (int, const char **, bool (*) (const char *));
 
@@ -145,11 +157,6 @@ class rtx_reader
   void record_potential_iterator_use (struct iterator_group *group,
 				      void *ptr, const char *name);
   struct mapping *read_mapping (struct iterator_group *group, htab_t table);
-  bool read_rtx (const char *rtx_name, vec<rtx> *rtxen);
-  rtx read_rtx_code (const char *code_name);
-  void read_rtx_operand (rtx return_rtx, int idx);
-  rtx read_nested_rtx ();
-  rtx read_rtx_variadic (rtx form);
 
   const char *get_top_level_filename () const { return m_toplevel_fname; }
   const char *get_filename () const { return m_read_md_filename; }
@@ -231,20 +238,42 @@ class rtx_reader
   htab_t m_enum_types;
 };
 
-/* Global singleton.  */
-extern rtx_reader *rtx_reader_ptr;
+/* Global singleton; constrast with rtx_reader_ptr below.  */
+extern md_reader *md_reader_ptr;
 
-/* An rtx_reader subclass which skips unknown directives.  */
+/* An md_reader subclass which skips unknown directives, for
+   the gen* tools that purely use read-md.o.  */
 
-class noop_reader : public rtx_reader
+class noop_reader : public md_reader
 {
  public:
-  noop_reader () : rtx_reader () {}
+  noop_reader () : md_reader () {}
 
   /* A dummy implementation which skips unknown directives.  */
   void handle_unknown_directive (file_location, const char *);
 };
 
+/* An md_reader subclass that actually handles full hierarchical
+   rtx expressions.
+
+   Implemented in read-rtl.c.  */
+
+class rtx_reader : public md_reader
+{
+ public:
+  rtx_reader ();
+  ~rtx_reader ();
+
+  bool read_rtx (const char *rtx_name, vec<rtx> *rtxen);
+  rtx read_rtx_code (const char *code_name);
+  void read_rtx_operand (rtx return_rtx, int idx);
+  rtx read_nested_rtx ();
+  rtx read_rtx_variadic (rtx form);
+};
+
+/* Global singleton; constrast with md_reader_ptr above.  */
+extern rtx_reader *rtx_reader_ptr;
+
 extern void (*include_callback) (const char *);
 
 /* Read the next character from the MD file.  */
@@ -252,7 +281,7 @@ extern void (*include_callback) (const char *);
 static inline int
 read_char (void)
 {
-  return rtx_reader_ptr->read_char ();
+  return md_reader_ptr->read_char ();
 }
 
 /* Put back CH, which was the last character read from the MD file.  */
@@ -260,7 +289,7 @@ read_char (void)
 static inline void
 unread_char (int ch)
 {
-  rtx_reader_ptr->unread_char (ch);
+  md_reader_ptr->unread_char (ch);
 }
 
 extern hashval_t leading_string_hash (const void *);
diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c
index 7a2021a..f74c875 100644
--- a/gcc/read-rtl.c
+++ b/gcc/read-rtl.c
@@ -107,6 +107,9 @@ const char *current_iterator_name;
 
 static void validate_const_int (const char *);
 
+/* Global singleton.  */
+rtx_reader *rtx_reader_ptr = NULL;
+
 /* The mode and code iterator structures.  */
 static struct iterator_group modes, codes, ints, substs;
 
@@ -326,7 +329,7 @@ map_attr_string (const char *p)
    if any changes were needed, otherwise return STRING itself.  */
 
 const char *
-rtx_reader::apply_iterator_to_string (const char *string)
+md_reader::apply_iterator_to_string (const char *string)
 {
   char *base, *copy, *p, *start, *end;
   struct map_value *v;
@@ -365,7 +368,7 @@ rtx_reader::apply_iterator_to_string (const char *string)
    values into any strings.  */
 
 rtx
-rtx_reader::copy_rtx_for_iterators (rtx original)
+md_reader::copy_rtx_for_iterators (rtx original)
 {
   const char *format_ptr, *p;
   int i, j;
@@ -735,7 +738,7 @@ atoll (const char *p)
    slipped in at the beginning of the sequence of MD files read by
    most of the other generators.  */
 void
-rtx_reader::read_conditions ()
+md_reader::read_conditions ()
 {
   int c;
 
@@ -834,8 +837,8 @@ record_attribute_use (struct iterator_group *group, void *ptr,
    callback.  */
 
 void
-rtx_reader::record_potential_iterator_use (struct iterator_group *group,
-					   void *ptr, const char *name)
+md_reader::record_potential_iterator_use (struct iterator_group *group,
+					  void *ptr, const char *name)
 {
   struct mapping *m;
   size_t len;
@@ -869,7 +872,7 @@ rtx_reader::record_potential_iterator_use (struct iterator_group *group,
    (which belongs to GROUP) and return it.  */
 
 struct mapping *
-rtx_reader::read_mapping (struct iterator_group *group, htab_t table)
+md_reader::read_mapping (struct iterator_group *group, htab_t table)
 {
   struct md_name name;
   struct mapping *m;
@@ -1072,7 +1075,7 @@ rtx_reader::read_rtx (const char *rtx_name, vec<rtx> *rtxen)
       return true;
     }
 
-  apply_iterators (read_rtx_code (rtx_name), rtxen);
+  apply_iterators (rtx_reader_ptr->read_rtx_code (rtx_name), rtxen);
   iterator_uses.truncate (0);
   attribute_uses.truncate (0);
 
@@ -1270,6 +1273,7 @@ rtx_reader::read_rtx_operand (rtx return_rtx, int idx)
       {
 	char *stringbuf;
 	int star_if_braced;
+	struct obstack *string_obstack = get_string_obstack ();
 
 	c = read_skip_spaces ();
 	unread_char (c);
@@ -1306,11 +1310,11 @@ rtx_reader::read_rtx_operand (rtx return_rtx, int idx)
 	    for (slash = fn; *slash; slash ++)
 	      if (*slash == '/' || *slash == '\\' || *slash == ':')
 		fn = slash + 1;
-	    obstack_1grow (&m_string_obstack, '*');
-	    obstack_grow (&m_string_obstack, fn, strlen (fn));
+	    obstack_1grow (string_obstack, '*');
+	    obstack_grow (string_obstack, fn, strlen (fn));
 	    sprintf (line_name, ":%d", get_lineno ());
-	    obstack_grow (&m_string_obstack, line_name, strlen (line_name)+1);
-	    stringbuf = XOBFINISH (&m_string_obstack, char *);
+	    obstack_grow (string_obstack, line_name, strlen (line_name)+1);
+	    stringbuf = XOBFINISH (string_obstack, char *);
 	  }
 
 	/* Find attr-names in the string.  */
@@ -1447,3 +1451,20 @@ rtx_reader::read_rtx_variadic (rtx form)
   unread_char (c);
   return form;
 }
+
+/* Constructor for class rtx_reader.  */
+
+rtx_reader::rtx_reader ()
+: md_reader ()
+{
+  /* Set the global singleton pointer.  */
+  rtx_reader_ptr = this;
+}
+
+/* Destructor for class rtx_reader.  */
+
+rtx_reader::~rtx_reader ()
+{
+  /* Clear the global singleton pointer.  */
+  rtx_reader_ptr = NULL;
+}
-- 
cgit v1.1


From 4ba8f0a3a4f8493a03b6229433728c49925e179f Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@cavium.com>
Date: Thu, 8 Dec 2016 04:59:00 +0000
Subject: aarch64.c (aarch64_load_symref_appropriately): Access the lower part
 of RTX appropriately.

2016-12-08  Andrew Pinski  <apinski@cavium.com>

gcc
	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
	Access the lower part of RTX appropriately.

gcc/testsuite
	* gcc.target/aarch64/pr71112.c : New Testcase.

From-SVN: r243427
---
 gcc/ChangeLog                                 |  5 +++++
 gcc/config/aarch64/aarch64.c                  |  3 ++-
 gcc/testsuite/ChangeLog                       |  4 ++++
 gcc/testsuite/gcc.c-torture/compile/pr71112.c | 10 ++++++++++
 4 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr71112.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 929bf4d..a44ba8b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-08  Andrew Pinski  <apinski@cavium.com>
+
+	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
+	Access the lower part of RTX appropriately.
+
 2016-12-07  David Malcolm  <dmalcolm@redhat.com>
 
 	* genpreds.c (write_tm_constrs_h): Update for renaming of
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 128f32b..98f76c1 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1302,7 +1302,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	    emit_move_insn (gp_rtx, gen_rtx_HIGH (Pmode, s));
 
 	    if (mode != GET_MODE (gp_rtx))
-	      gp_rtx = simplify_gen_subreg (mode, gp_rtx, GET_MODE (gp_rtx), 0);
+             gp_rtx = gen_lowpart (mode, gp_rtx);
+
 	  }
 
 	if (mode == ptr_mode)
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 27225c2..d4fb081 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-08  Andrew Pinski  <apinski@cavium.com>
+
+	* gcc.target/aarch64/pr71112.c : New Testcase.
+
 2016-12-07  Martin Sebor  <msebor@redhat.com>
 
 	PR middle-end/77784
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr71112.c b/gcc/testsuite/gcc.c-torture/compile/pr71112.c
new file mode 100644
index 0000000..69e2df6
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr71112.c
@@ -0,0 +1,10 @@
+/* PR target/71112.  */
+/* { dg-additional-options "-fpie" { target pie } } */
+
+extern int dbs[100];
+void f (int *);
+int nscd_init (void)
+{
+  f (dbs);
+  return 0;
+}
-- 
cgit v1.1


From 23b88fda665d2f995c73336f74dcf8931f5fdf71 Mon Sep 17 00:00:00 2001
From: "Naveen H.S" <Naveen.Hurugalawadi@cavium.com>
Date: Thu, 8 Dec 2016 05:09:37 +0000
Subject: aarch64.c (aarch64_load_symref_appropriately): Handle
 SYMBOL_SMALL_TLSGD for ILP32.

2016-12-08  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>

gcc
	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
	Handle SYMBOL_SMALL_TLSGD for ILP32.
	* config/aarch64/aarch64.md : tlsgd_small modified into
	tlsgd_small_<mode> to support SImode and DImode.
	*tlsgd_small modified into *tlsgd_small_<mode> to support SImode and
	DImode.

gcc/testsuite
	* gcc.target/aarch64/pr78382.c : New Testcase.

From-SVN: r243428
---
 gcc/ChangeLog                              |  9 +++++++++
 gcc/config/aarch64/aarch64.c               |  8 ++++++--
 gcc/config/aarch64/aarch64.md              |  8 ++++----
 gcc/testsuite/ChangeLog                    |  4 ++++
 gcc/testsuite/gcc.target/aarch64/pr78382.c | 10 ++++++++++
 5 files changed, 33 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr78382.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index a44ba8b..2026261 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2016-12-08  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
+
+	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
+	Handle SYMBOL_SMALL_TLSGD for ILP32.
+	* config/aarch64/aarch64.md : tlsgd_small modified into
+	tlsgd_small_<mode> to support SImode and DImode.
+	*tlsgd_small modified into *tlsgd_small_<mode> to support SImode and
+	DImode.
+
 2016-12-08  Andrew Pinski  <apinski@cavium.com>
 
 	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 98f76c1..1a06432 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1379,10 +1379,14 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
     case SYMBOL_SMALL_TLSGD:
       {
 	rtx_insn *insns;
-	rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
+	machine_mode mode = GET_MODE (dest);
+	rtx result = gen_rtx_REG (mode, R0_REGNUM);
 
 	start_sequence ();
-	aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
+	if (TARGET_ILP32)
+	  aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
+	else
+	  aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
 	insns = get_insns ();
 	end_sequence ();
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 811a078..65eb326 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5173,20 +5173,20 @@
 ;; The TLS ABI specifically requires that the compiler does not schedule
 ;; instructions in the TLS stubs, in order to enable linker relaxation.
 ;; Therefore we treat the stubs as an atomic sequence.
-(define_expand "tlsgd_small"
+(define_expand "tlsgd_small_<mode>"
  [(parallel [(set (match_operand 0 "register_operand" "")
                   (call (mem:DI (match_dup 2)) (const_int 1)))
-	     (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS)
+	     (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "")] UNSPEC_GOTSMALLTLS)
 	     (clobber (reg:DI LR_REGNUM))])]
  ""
 {
   operands[2] = aarch64_tls_get_addr ();
 })
 
-(define_insn "*tlsgd_small"
+(define_insn "*tlsgd_small_<mode>"
   [(set (match_operand 0 "register_operand" "")
 	(call (mem:DI (match_operand:DI 2 "" "")) (const_int 1)))
-   (unspec:DI [(match_operand:DI 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS)
+   (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] UNSPEC_GOTSMALLTLS)
    (clobber (reg:DI LR_REGNUM))
   ]
   ""
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index d4fb081..bf6db37 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-08  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
+
+	* gcc.target/aarch64/pr78382.c : New Testcase.
+
 2016-12-08  Andrew Pinski  <apinski@cavium.com>
 
 	* gcc.target/aarch64/pr71112.c : New Testcase.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr78382.c b/gcc/testsuite/gcc.target/aarch64/pr78382.c
new file mode 100644
index 0000000..febe7bc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr78382.c
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target fpic } */
+/* { dg-options "-mtls-dialect=trad -fpic" } */
+
+__thread int abc;
+void
+foo ()
+{
+  int *p;
+  p = &abc;
+}
-- 
cgit v1.1


From 1d09844a1cff1a78d24eba41a1443903da0bacfd Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@gcc.gnu.org>
Date: Thu, 8 Dec 2016 09:26:06 +0100
Subject: P0003R5 - removal of dynamic exception specification from C++17

	P0003R5 - removal of dynamic exception specification from C++17
	* parser.c (cp_parser_exception_specification_opt): For C++17
	error out on throw ( type-id-list ), for C++11 and C++14 issue
	-Wdeprecated warning on it.  Formatting fix.  Treat throw()
	in C++17 as noexcept(true).

	* g++.dg/compat/eh/ctor1.h: Adjust for deprecation of
	throw (type-id-list) in C++11 and C++14 and removal in C++17.
	* g++.dg/compat/eh/ctor1_y.C: Likewise.
	* g++.dg/compat/eh/new1_x.C: Likewise.
	* g++.dg/compat/eh/new1_y.C: Likewise.
	* g++.dg/compat/eh/spec3_x.C: Likewise.
	* g++.dg/compat/eh/spec3_y.C: Likewise.
	* g++.dg/compat/eh/template1.h: Likewise.
	* g++.dg/compat/eh/template1_y.C: Likewise.
	* g++.dg/compat/eh/unexpected1_x.C: Likewise.
	* g++.dg/compat/eh/unexpected1_y.C: Likewise.
	* g++.dg/cpp0x/auto9.C: Likewise.
	* g++.dg/cpp0x/defaulted23.C: Likewise.
	* g++.dg/cpp0x/error5.C: Likewise.
	* g++.dg/cpp0x/lambda/lambda-eh2.C: Likewise.
	* g++.dg/cpp0x/noexcept02.C: Likewise.
	* g++.dg/cpp0x/noexcept07.C: Likewise.
	* g++.dg/cpp0x/noexcept08.C: Likewise.
	* g++.dg/cpp0x/noexcept19.C: Likewise.
	* g++.dg/cpp0x/variadic73.C: Likewise.
	* g++.dg/cpp0x/variadic-throw.C: Likewise.
	* g++.dg/cpp1z/noexcept-type1.C: Likewise.
	* g++.dg/eh/async-unwind2.C: Likewise.
	* g++.dg/eh/cond4.C: Likewise.
	* g++.dg/eh/delete1.C: Likewise.
	* g++.dg/eh/ehopt1.C: Likewise.
	* g++.dg/eh/forced3.C: Likewise.
	* g++.dg/eh/forced4.C: Likewise.
	* g++.dg/eh/init-temp2.C: Likewise.
	* g++.dg/eh/pr38662.C: Likewise.
	* g++.dg/eh/pr41819.C: Likewise.
	* g++.dg/eh/shadow1.C: Likewise.
	* g++.dg/eh/spec2.C: Likewise.
	* g++.dg/eh/spec3.C: Likewise.
	* g++.dg/eh/spec5.C: Likewise.
	* g++.dg/eh/spec6.C: Likewise.
	* g++.dg/eh/spec7.C: Likewise.
	* g++.dg/eh/spec8.C: Likewise.
	* g++.dg/eh/spec9.C: Likewise.
	* g++.dg/eh/template1.C: Likewise.
	* g++.dg/eh/unexpected1.C: Likewise.
	* g++.dg/ext/has_nothrow_assign.C: Likewise.
	* g++.dg/ext/has_nothrow_constructor.C: Likewise.
	* g++.dg/ext/has_nothrow_copy-1.C: Likewise.
	* g++.dg/ext/has_nothrow_copy-2.C: Likewise.
	* g++.dg/ext/has_nothrow_copy-4.C: Likewise.
	* g++.dg/ext/has_nothrow_copy-5.C: Likewise.
	* g++.dg/ext/has_nothrow_copy-6.C: Likewise.
	* g++.dg/ext/has_nothrow_copy-7.C: Likewise.
	* g++.dg/gcov/gcov-7.C: Likewise.
	* g++.dg/init/new13.C: Likewise.
	* g++.dg/init/new25.C: Likewise.
	* g++.dg/lookup/exception1.C: Likewise.
	* g++.dg/opt/noreturn-1.C: Likewise.
	* g++.dg/other/error3.C: Likewise.
	* g++.dg/rtti/crash3.C: Likewise.
	* g++.dg/template/eh2.C: Likewise.
	* g++.dg/template/error36.C: Likewise.
	* g++.dg/tm/pr46567.C: Likewise.
	* g++.dg/tm/pr47340.C: Likewise.
	* g++.dg/torture/pr46364.C: Likewise.
	* g++.dg/torture/pr49394.C: Likewise.
	* g++.dg/torture/pr52918-1.C: Likewise.
	* g++.dg/torture/pr57190.C: Likewise.
	* g++.dg/torture/stackalign/eh-alloca-1.C: Likewise.
	* g++.dg/torture/stackalign/eh-fastcall-1.C: Likewise.
	* g++.dg/torture/stackalign/eh-global-1.C: Likewise.
	* g++.dg/torture/stackalign/eh-inline-1.C: Likewise.
	* g++.dg/torture/stackalign/eh-inline-2.C: Likewise.
	* g++.dg/torture/stackalign/eh-thiscall-1.C: Likewise.
	* g++.dg/torture/stackalign/eh-vararg-1.C: Likewise.
	* g++.dg/torture/stackalign/eh-vararg-2.C: Likewise.
	* g++.dg/tree-ssa/pr45605.C: Likewise.
	* g++.dg/warn/Wreturn-type-3.C: Likewise.
	* g++.old-deja/g++.eh/badalloc1.C: Likewise.
	* g++.old-deja/g++.eh/cleanup2.C: Likewise.
	* g++.old-deja/g++.eh/spec1.C: Likewise.
	* g++.old-deja/g++.eh/spec2.C: Likewise.
	* g++.old-deja/g++.eh/spec3.C: Likewise.
	* g++.old-deja/g++.eh/spec4.C: Likewise.
	* g++.old-deja/g++.eh/spec6.C: Likewise.
	* g++.old-deja/g++.eh/throw1.C: Likewise.
	* g++.old-deja/g++.eh/throw2.C: Likewise.
	* g++.old-deja/g++.eh/tmpl1.C: Likewise.
	* g++.old-deja/g++.eh/tmpl3.C: Likewise.
	* g++.old-deja/g++.mike/eh15.C: Likewise.
	* g++.old-deja/g++.mike/eh25.C: Likewise.
	* g++.old-deja/g++.mike/eh33.C: Likewise.
	* g++.old-deja/g++.mike/eh34.C: Likewise.
	* g++.old-deja/g++.mike/eh50.C: Likewise.
	* g++.old-deja/g++.mike/eh51.C: Likewise.
	* g++.old-deja/g++.mike/eh55.C: Likewise.
	* g++.old-deja/g++.mike/p10416.C: Likewise.
	* g++.old-deja/g++.other/crash28.C: Likewise.
	* g++.old-deja/g++.other/crash30.C: Likewise.
	* g++.old-deja/g++.other/new7.C: Likewise.
	* g++.old-deja/g++.pt/ehspec1.C: Likewise.
	* g++.old-deja/g++.robertl/eb123.C: Likewise.

	* testsuite/util/testsuite_new_operators.h: Include testsuite_hooks.h.
	(operator new): Use THROW macro.

From-SVN: r243429
---
 gcc/cp/ChangeLog                                   |   8 ++
 gcc/cp/parser.c                                    |  27 ++--
 gcc/testsuite/ChangeLog                            | 141 ++++++++++++++++++---
 gcc/testsuite/g++.dg/compat/eh/ctor1.h             |   8 +-
 gcc/testsuite/g++.dg/compat/eh/ctor1_y.C           |   7 +-
 gcc/testsuite/g++.dg/compat/eh/new1_x.C            |   6 +-
 gcc/testsuite/g++.dg/compat/eh/new1_y.C            |   5 +-
 gcc/testsuite/g++.dg/compat/eh/spec3_x.C           |   6 +-
 gcc/testsuite/g++.dg/compat/eh/spec3_y.C           |   5 +-
 gcc/testsuite/g++.dg/compat/eh/template1.h         |   6 +-
 gcc/testsuite/g++.dg/compat/eh/template1_y.C       |   5 +-
 gcc/testsuite/g++.dg/compat/eh/unexpected1_x.C     |   2 +
 gcc/testsuite/g++.dg/compat/eh/unexpected1_y.C     |   2 +
 gcc/testsuite/g++.dg/cpp0x/auto9.C                 |   8 +-
 gcc/testsuite/g++.dg/cpp0x/defaulted23.C           |  16 +--
 gcc/testsuite/g++.dg/cpp0x/error5.C                |   6 +-
 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C     |   4 +-
 gcc/testsuite/g++.dg/cpp0x/noexcept02.C            |   6 +-
 gcc/testsuite/g++.dg/cpp0x/noexcept07.C            |   4 +-
 gcc/testsuite/g++.dg/cpp0x/noexcept08.C            |  21 ++-
 gcc/testsuite/g++.dg/cpp0x/noexcept19.C            |   6 +-
 gcc/testsuite/g++.dg/cpp0x/variadic-throw.C        |   8 +-
 gcc/testsuite/g++.dg/cpp0x/variadic73.C            |   6 +-
 gcc/testsuite/g++.dg/cpp1z/noexcept-type1.C        |   2 +-
 gcc/testsuite/g++.dg/eh/async-unwind2.C            |  22 +++-
 gcc/testsuite/g++.dg/eh/cond4.C                    |   8 +-
 gcc/testsuite/g++.dg/eh/delete1.C                  |  16 ++-
 gcc/testsuite/g++.dg/eh/ehopt1.C                   |  16 ++-
 gcc/testsuite/g++.dg/eh/forced3.C                  |   1 +
 gcc/testsuite/g++.dg/eh/forced4.C                  |   5 +-
 gcc/testsuite/g++.dg/eh/init-temp2.C               |  24 +++-
 gcc/testsuite/g++.dg/eh/pr38662.C                  |   6 +-
 gcc/testsuite/g++.dg/eh/pr41819.C                  |   5 +-
 gcc/testsuite/g++.dg/eh/shadow1.C                  |  12 +-
 gcc/testsuite/g++.dg/eh/spec2.C                    |  13 +-
 gcc/testsuite/g++.dg/eh/spec3.C                    |   5 +-
 gcc/testsuite/g++.dg/eh/spec5.C                    |  12 +-
 gcc/testsuite/g++.dg/eh/spec6.C                    |   1 +
 gcc/testsuite/g++.dg/eh/spec7.C                    |   6 +-
 gcc/testsuite/g++.dg/eh/spec8.C                    |   2 +
 gcc/testsuite/g++.dg/eh/spec9.C                    |   5 +-
 gcc/testsuite/g++.dg/eh/template1.C                |   6 +-
 gcc/testsuite/g++.dg/eh/unexpected1.C              |   4 +-
 gcc/testsuite/g++.dg/ext/has_nothrow_assign.C      |  22 ++--
 gcc/testsuite/g++.dg/ext/has_nothrow_constructor.C |  12 +-
 gcc/testsuite/g++.dg/ext/has_nothrow_copy-1.C      |  20 ++-
 gcc/testsuite/g++.dg/ext/has_nothrow_copy-2.C      |  17 ++-
 gcc/testsuite/g++.dg/ext/has_nothrow_copy-4.C      |   6 +-
 gcc/testsuite/g++.dg/ext/has_nothrow_copy-5.C      |   6 +-
 gcc/testsuite/g++.dg/ext/has_nothrow_copy-6.C      |   6 +-
 gcc/testsuite/g++.dg/ext/has_nothrow_copy-7.C      |   6 +-
 gcc/testsuite/g++.dg/gcov/gcov-7.C                 |   5 +-
 gcc/testsuite/g++.dg/init/new13.C                  |   4 +-
 gcc/testsuite/g++.dg/init/new25.C                  |   4 +
 gcc/testsuite/g++.dg/lookup/exception1.C           |  24 +++-
 gcc/testsuite/g++.dg/opt/noreturn-1.C              |  26 +++-
 gcc/testsuite/g++.dg/other/error3.C                |   3 +-
 gcc/testsuite/g++.dg/rtti/crash3.C                 |   5 +-
 gcc/testsuite/g++.dg/template/eh2.C                |   4 +-
 gcc/testsuite/g++.dg/template/error36.C            |   4 +-
 gcc/testsuite/g++.dg/tm/pr46567.C                  |  12 +-
 gcc/testsuite/g++.dg/tm/pr47340.C                  |   6 +-
 gcc/testsuite/g++.dg/torture/pr46364.C             |   6 +-
 gcc/testsuite/g++.dg/torture/pr49394.C             |   5 +-
 gcc/testsuite/g++.dg/torture/pr52918-1.C           |  11 +-
 gcc/testsuite/g++.dg/torture/pr57190.C             |  23 +++-
 .../g++.dg/torture/stackalign/eh-alloca-1.C        |   5 +-
 .../g++.dg/torture/stackalign/eh-fastcall-1.C      |   5 +-
 .../g++.dg/torture/stackalign/eh-global-1.C        |   5 +-
 .../g++.dg/torture/stackalign/eh-inline-1.C        |   5 +-
 .../g++.dg/torture/stackalign/eh-inline-2.C        |   5 +-
 .../g++.dg/torture/stackalign/eh-thiscall-1.C      |   5 +-
 .../g++.dg/torture/stackalign/eh-vararg-1.C        |   5 +-
 .../g++.dg/torture/stackalign/eh-vararg-2.C        |   5 +-
 gcc/testsuite/g++.dg/tree-ssa/pr45605.C            |   6 +-
 gcc/testsuite/g++.dg/warn/Wreturn-type-3.C         |   5 +-
 gcc/testsuite/g++.old-deja/g++.eh/badalloc1.C      |  15 ++-
 gcc/testsuite/g++.old-deja/g++.eh/cleanup2.C       |  22 +++-
 gcc/testsuite/g++.old-deja/g++.eh/spec1.C          |   4 +-
 gcc/testsuite/g++.old-deja/g++.eh/spec2.C          |   4 +-
 gcc/testsuite/g++.old-deja/g++.eh/spec3.C          |   4 +-
 gcc/testsuite/g++.old-deja/g++.eh/spec4.C          |   4 +-
 gcc/testsuite/g++.old-deja/g++.eh/spec6.C          |   3 +-
 gcc/testsuite/g++.old-deja/g++.eh/throw1.C         |   5 +-
 gcc/testsuite/g++.old-deja/g++.eh/throw2.C         |   5 +-
 gcc/testsuite/g++.old-deja/g++.eh/tmpl1.C          |   5 +-
 gcc/testsuite/g++.old-deja/g++.eh/tmpl3.C          |  11 +-
 gcc/testsuite/g++.old-deja/g++.mike/eh15.C         |   4 +-
 gcc/testsuite/g++.old-deja/g++.mike/eh25.C         |   8 +-
 gcc/testsuite/g++.old-deja/g++.mike/eh33.C         |   3 +-
 gcc/testsuite/g++.old-deja/g++.mike/eh34.C         |   1 +
 gcc/testsuite/g++.old-deja/g++.mike/eh50.C         |   3 +-
 gcc/testsuite/g++.old-deja/g++.mike/eh51.C         |   3 +-
 gcc/testsuite/g++.old-deja/g++.mike/eh55.C         |   2 +-
 gcc/testsuite/g++.old-deja/g++.mike/p10416.C       |   6 +-
 gcc/testsuite/g++.old-deja/g++.other/crash28.C     |  11 +-
 gcc/testsuite/g++.old-deja/g++.other/crash30.C     |   2 +-
 gcc/testsuite/g++.old-deja/g++.other/new7.C        |   5 +-
 gcc/testsuite/g++.old-deja/g++.pt/ehspec1.C        |   4 +-
 gcc/testsuite/g++.old-deja/g++.robertl/eb123.C     |   5 +-
 libstdc++-v3/ChangeLog                             |   5 +
 .../testsuite/util/testsuite_new_operators.h       |   3 +-
 102 files changed, 720 insertions(+), 199 deletions(-)

diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index a59d25b..e325164 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-08  Jakub Jelinek  <jakub@redhat.com>
+
+	P0003R5 - removal of dynamic exception specification from C++17
+	* parser.c (cp_parser_exception_specification_opt): For C++17
+	error out on throw ( type-id-list ), for C++11 and C++14 issue
+	-Wdeprecated warning on it.  Formatting fix.  Treat throw()
+	in C++17 as noexcept(true).
+
 2016-12-07  Martin Jambor  <mjambor@suse.cz>
 
 	PR c++/78589
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 08f5f9e..70e5fb3 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -23786,8 +23786,8 @@ cp_parser_exception_specification_opt (cp_parser* parser)
   token = cp_lexer_peek_token (parser->lexer);
 
   /* Is it a noexcept-specification?  */
-  type_id_list = cp_parser_noexcept_specification_opt(parser, true, NULL,
-						      false);
+  type_id_list = cp_parser_noexcept_specification_opt (parser, true, NULL,
+						       false);
   if (type_id_list != NULL_TREE)
     return type_id_list;
 
@@ -23795,12 +23795,7 @@ cp_parser_exception_specification_opt (cp_parser* parser)
   if (!cp_parser_is_keyword (token, RID_THROW))
     return NULL_TREE;
 
-#if 0
-  /* Enable this once a lot of code has transitioned to noexcept?  */
-  if (cxx_dialect >= cxx11 && !in_system_header_at (input_location))
-    warning (OPT_Wdeprecated, "dynamic exception specifications are "
-	     "deprecated in C++0x; use %<noexcept%> instead");
-#endif
+  location_t loc = token->location;
 
   /* Consume the `throw'.  */
   cp_lexer_consume_token (parser->lexer);
@@ -23821,7 +23816,23 @@ cp_parser_exception_specification_opt (cp_parser* parser)
       type_id_list = cp_parser_type_id_list (parser);
       /* Restore the saved message.  */
       parser->type_definition_forbidden_message = saved_message;
+
+      if (cxx_dialect >= cxx1z)
+	{
+	  error_at (loc, "ISO C++1z does not allow dynamic exception "
+			 "specifications");
+	  type_id_list = NULL_TREE;
+	}
+      else if (cxx_dialect >= cxx11 && !in_system_header_at (loc))
+	warning_at (loc, OPT_Wdeprecated,
+		    "dynamic exception specifications are deprecated in C++11;"
+		    " use %<noexcept%> instead");
     }
+  /* In C++17, throw() is equivalent to noexcept (true).  throw()
+     is deprecated in C++11 and above as well, but is still widely used,
+     so don't warn about it yet.  */
+  else if (cxx_dialect >= cxx1z)
+    type_id_list = noexcept_true_spec;
   else
     type_id_list = empty_except_spec;
 
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index bf6db37..b6b0096 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,17 +1,118 @@
+2016-12-08  Jakub Jelinek  <jakub@redhat.com>
+
+	P0003R5 - removal of dynamic exception specification from C++17
+	* g++.dg/compat/eh/ctor1.h: Adjust for deprecation of
+	throw (type-id-list) in C++11 and C++14 and removal in C++17.
+	* g++.dg/compat/eh/ctor1_y.C: Likewise.
+	* g++.dg/compat/eh/new1_x.C: Likewise.
+	* g++.dg/compat/eh/new1_y.C: Likewise.
+	* g++.dg/compat/eh/spec3_x.C: Likewise.
+	* g++.dg/compat/eh/spec3_y.C: Likewise.
+	* g++.dg/compat/eh/template1.h: Likewise.
+	* g++.dg/compat/eh/template1_y.C: Likewise.
+	* g++.dg/compat/eh/unexpected1_x.C: Likewise.
+	* g++.dg/compat/eh/unexpected1_y.C: Likewise.
+	* g++.dg/cpp0x/auto9.C: Likewise.
+	* g++.dg/cpp0x/defaulted23.C: Likewise.
+	* g++.dg/cpp0x/error5.C: Likewise.
+	* g++.dg/cpp0x/lambda/lambda-eh2.C: Likewise.
+	* g++.dg/cpp0x/noexcept02.C: Likewise.
+	* g++.dg/cpp0x/noexcept07.C: Likewise.
+	* g++.dg/cpp0x/noexcept08.C: Likewise.
+	* g++.dg/cpp0x/noexcept19.C: Likewise.
+	* g++.dg/cpp0x/variadic73.C: Likewise.
+	* g++.dg/cpp0x/variadic-throw.C: Likewise.
+	* g++.dg/cpp1z/noexcept-type1.C: Likewise.
+	* g++.dg/eh/async-unwind2.C: Likewise.
+	* g++.dg/eh/cond4.C: Likewise.
+	* g++.dg/eh/delete1.C: Likewise.
+	* g++.dg/eh/ehopt1.C: Likewise.
+	* g++.dg/eh/forced3.C: Likewise.
+	* g++.dg/eh/forced4.C: Likewise.
+	* g++.dg/eh/init-temp2.C: Likewise.
+	* g++.dg/eh/pr38662.C: Likewise.
+	* g++.dg/eh/pr41819.C: Likewise.
+	* g++.dg/eh/shadow1.C: Likewise.
+	* g++.dg/eh/spec2.C: Likewise.
+	* g++.dg/eh/spec3.C: Likewise.
+	* g++.dg/eh/spec5.C: Likewise.
+	* g++.dg/eh/spec6.C: Likewise.
+	* g++.dg/eh/spec7.C: Likewise.
+	* g++.dg/eh/spec8.C: Likewise.
+	* g++.dg/eh/spec9.C: Likewise.
+	* g++.dg/eh/template1.C: Likewise.
+	* g++.dg/eh/unexpected1.C: Likewise.
+	* g++.dg/ext/has_nothrow_assign.C: Likewise.
+	* g++.dg/ext/has_nothrow_constructor.C: Likewise.
+	* g++.dg/ext/has_nothrow_copy-1.C: Likewise.
+	* g++.dg/ext/has_nothrow_copy-2.C: Likewise.
+	* g++.dg/ext/has_nothrow_copy-4.C: Likewise.
+	* g++.dg/ext/has_nothrow_copy-5.C: Likewise.
+	* g++.dg/ext/has_nothrow_copy-6.C: Likewise.
+	* g++.dg/ext/has_nothrow_copy-7.C: Likewise.
+	* g++.dg/gcov/gcov-7.C: Likewise.
+	* g++.dg/init/new13.C: Likewise.
+	* g++.dg/init/new25.C: Likewise.
+	* g++.dg/lookup/exception1.C: Likewise.
+	* g++.dg/opt/noreturn-1.C: Likewise.
+	* g++.dg/other/error3.C: Likewise.
+	* g++.dg/rtti/crash3.C: Likewise.
+	* g++.dg/template/eh2.C: Likewise.
+	* g++.dg/template/error36.C: Likewise.
+	* g++.dg/tm/pr46567.C: Likewise.
+	* g++.dg/tm/pr47340.C: Likewise.
+	* g++.dg/torture/pr46364.C: Likewise.
+	* g++.dg/torture/pr49394.C: Likewise.
+	* g++.dg/torture/pr52918-1.C: Likewise.
+	* g++.dg/torture/pr57190.C: Likewise.
+	* g++.dg/torture/stackalign/eh-alloca-1.C: Likewise.
+	* g++.dg/torture/stackalign/eh-fastcall-1.C: Likewise.
+	* g++.dg/torture/stackalign/eh-global-1.C: Likewise.
+	* g++.dg/torture/stackalign/eh-inline-1.C: Likewise.
+	* g++.dg/torture/stackalign/eh-inline-2.C: Likewise.
+	* g++.dg/torture/stackalign/eh-thiscall-1.C: Likewise.
+	* g++.dg/torture/stackalign/eh-vararg-1.C: Likewise.
+	* g++.dg/torture/stackalign/eh-vararg-2.C: Likewise.
+	* g++.dg/tree-ssa/pr45605.C: Likewise.
+	* g++.dg/warn/Wreturn-type-3.C: Likewise.
+	* g++.old-deja/g++.eh/badalloc1.C: Likewise.
+	* g++.old-deja/g++.eh/cleanup2.C: Likewise.
+	* g++.old-deja/g++.eh/spec1.C: Likewise.
+	* g++.old-deja/g++.eh/spec2.C: Likewise.
+	* g++.old-deja/g++.eh/spec3.C: Likewise.
+	* g++.old-deja/g++.eh/spec4.C: Likewise.
+	* g++.old-deja/g++.eh/spec6.C: Likewise.
+	* g++.old-deja/g++.eh/throw1.C: Likewise.
+	* g++.old-deja/g++.eh/throw2.C: Likewise.
+	* g++.old-deja/g++.eh/tmpl1.C: Likewise.
+	* g++.old-deja/g++.eh/tmpl3.C: Likewise.
+	* g++.old-deja/g++.mike/eh15.C: Likewise.
+	* g++.old-deja/g++.mike/eh25.C: Likewise.
+	* g++.old-deja/g++.mike/eh33.C: Likewise.
+	* g++.old-deja/g++.mike/eh34.C: Likewise.
+	* g++.old-deja/g++.mike/eh50.C: Likewise.
+	* g++.old-deja/g++.mike/eh51.C: Likewise.
+	* g++.old-deja/g++.mike/eh55.C: Likewise.
+	* g++.old-deja/g++.mike/p10416.C: Likewise.
+	* g++.old-deja/g++.other/crash28.C: Likewise.
+	* g++.old-deja/g++.other/crash30.C: Likewise.
+	* g++.old-deja/g++.other/new7.C: Likewise.
+	* g++.old-deja/g++.pt/ehspec1.C: Likewise.
+	* g++.old-deja/g++.robertl/eb123.C: Likewise.
+
 2016-12-08  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
 
-	* gcc.target/aarch64/pr78382.c : New Testcase.
+	* gcc.target/aarch64/pr78382.c: New testcase.
 
 2016-12-08  Andrew Pinski  <apinski@cavium.com>
 
-	* gcc.target/aarch64/pr71112.c : New Testcase.
+	* gcc.target/aarch64/pr71112.c: New testcase.
 
 2016-12-07  Martin Sebor  <msebor@redhat.com>
 
 	PR middle-end/77784
 	PR middle-end/78149
 	PR middle-end/78138
-
 	* c-c++-common/Wsizeof-pointer-memaccess2.c: Adjust expected diagnostic.
 	* g++.dg/ext/builtin-object-size3.C (bar): Same.
 	* g++.dg/ext/strncpy-chk1.C: Same.
@@ -84,7 +185,7 @@
 
 2016-12-07  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
 
-	* gcc.target/aarch64/pr71727.c : New Testcase.
+	* gcc.target/aarch64/pr71727.c: New testcase.
 
 2016-12-06  Tom de Vries  <tom@codesourcery.com>
 
@@ -873,23 +974,23 @@
 
 2016-11-23  Naveen H.S  <Naveen.Hurugalawadi@caviumnetworks.com>
 
-	* gcc.target/aarch64/ldp_stp_1.c : Add -mcpu=generic.
-	* gcc.target/aarch64/store-pair-1.c : Likewise.
+	* gcc.target/aarch64/ldp_stp_1.c: Add -mcpu=generic.
+	* gcc.target/aarch64/store-pair-1.c: Likewise.
 
 2016-11-23  Naveen H.S  <Naveen.Hurugalawadi@caviumnetworks.com>
 
-	* gcc.target/aarch64/fmaxmin.c : Add -fno-vect-cost-model.
-	* gcc.target/aarch64/fmul_fcvt_2.c : Likewise.
-	* gcc.target/aarch64/vect-abs-compile.c : Likewise.
-	* gcc.target/aarch64/vect-clz.c : Likewise.
-	* gcc.target/aarch64/vect-fcm-eq-d.c : Likewise.
-	* gcc.target/aarch64/vect-fcm-ge-d.c : Likewise.
-	* gcc.target/aarch64/vect-fcm-gt-d.c : Likewise.
-	* gcc.target/aarch64/vect-fmovd-zero.c : Likewise.
-	* gcc.target/aarch64/vect-fmovd.c : Likewise.
-	* gcc.target/aarch64/vect-fmovf-zero.c : Likewise.
-	* gcc.target/aarch64/vect-fmovf.c : Likewise.
-	* gcc.target/aarch64/vect_ctz_1.c : Likewise.
+	* gcc.target/aarch64/fmaxmin.c: Add -fno-vect-cost-model.
+	* gcc.target/aarch64/fmul_fcvt_2.c: Likewise.
+	* gcc.target/aarch64/vect-abs-compile.c: Likewise.
+	* gcc.target/aarch64/vect-clz.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-eq-d.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
+	* gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
+	* gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
+	* gcc.target/aarch64/vect-fmovd.c: Likewise.
+	* gcc.target/aarch64/vect-fmovf-zero.c: Likewise.
+	* gcc.target/aarch64/vect-fmovf.c: Likewise.
+	* gcc.target/aarch64/vect_ctz_1.c: Likewise.
 
 2016-11-23  Peter Bergner  <bergner@vnet.ibm.com>
 
@@ -7061,7 +7162,7 @@
 2016-07-28  Paul Thomas  <pault@gcc.gnu.org>
 
 	PR fortran/71883
-	* gfortran.dg/pr71883.f90 : New test.
+	* gfortran.dg/pr71883.f90: New test.
 
 2016-07-28  Yuri Rumyantsev  <ysrumyan@gmail.com>
 
@@ -14685,7 +14786,7 @@
 2016-02-20  Paul Thomas  <pault@gcc.gnu.org>
 
 	PR fortran/69423
-	* gfortran.dg/deferred_character_15.f90 : New test.
+	* gfortran.dg/deferred_character_15.f90: New test.
 
 2016-02-20  Dominique d'Humieres  <dominiq@lps.ens.fr>
 
diff --git a/gcc/testsuite/g++.dg/compat/eh/ctor1.h b/gcc/testsuite/g++.dg/compat/eh/ctor1.h
index 9ad1860..e88a62d 100644
--- a/gcc/testsuite/g++.dg/compat/eh/ctor1.h
+++ b/gcc/testsuite/g++.dg/compat/eh/ctor1.h
@@ -5,6 +5,12 @@ struct Foo
 
 struct Bar
 {
-  ~Bar () throw(int);
+  ~Bar ()
+#if __cplusplus < 201103L
+  throw(int)
+#else
+  noexcept(false)
+#endif
+  ;
   Foo f;
 };
diff --git a/gcc/testsuite/g++.dg/compat/eh/ctor1_y.C b/gcc/testsuite/g++.dg/compat/eh/ctor1_y.C
index ca1cf38..5478bb7 100644
--- a/gcc/testsuite/g++.dg/compat/eh/ctor1_y.C
+++ b/gcc/testsuite/g++.dg/compat/eh/ctor1_y.C
@@ -7,7 +7,12 @@ Foo::~Foo()
   was_f_in_Bar_destroyed=true;
 }
 
-Bar::~Bar() throw(int)
+Bar::~Bar()
+#if __cplusplus < 201103L
+throw(int)
+#else
+noexcept(false)
+#endif
 {
   throw 1;
 }
diff --git a/gcc/testsuite/g++.dg/compat/eh/new1_x.C b/gcc/testsuite/g++.dg/compat/eh/new1_x.C
index 1212871..23ac80b 100644
--- a/gcc/testsuite/g++.dg/compat/eh/new1_x.C
+++ b/gcc/testsuite/g++.dg/compat/eh/new1_x.C
@@ -4,7 +4,11 @@
 extern "C" void exit (int);
 extern "C" void abort (void);
 
-extern void * operator new[] (std::size_t s) throw (std::bad_alloc);
+extern void * operator new[] (std::size_t s)
+#if __cplusplus < 201103L
+throw (std::bad_alloc)
+#endif
+;
 extern void operator delete[] (void *p) throw ();
 
 struct A
diff --git a/gcc/testsuite/g++.dg/compat/eh/new1_y.C b/gcc/testsuite/g++.dg/compat/eh/new1_y.C
index fbe0e21..1dd373d 100644
--- a/gcc/testsuite/g++.dg/compat/eh/new1_y.C
+++ b/gcc/testsuite/g++.dg/compat/eh/new1_y.C
@@ -4,7 +4,10 @@
 extern int ret;
 
 void *ptr;
-void * operator new[] (std::size_t s) throw (std::bad_alloc)
+void * operator new[] (std::size_t s)
+#if __cplusplus < 201103L
+throw (std::bad_alloc)
+#endif
 {
   ptr = operator new (s);
   return ptr;
diff --git a/gcc/testsuite/g++.dg/compat/eh/spec3_x.C b/gcc/testsuite/g++.dg/compat/eh/spec3_x.C
index b8e5fbe..4c2aefd 100644
--- a/gcc/testsuite/g++.dg/compat/eh/spec3_x.C
+++ b/gcc/testsuite/g++.dg/compat/eh/spec3_x.C
@@ -1,6 +1,10 @@
 #include "spec3.h"
 
-extern void func () throw (B,A);
+extern void func ()
+#if __cplusplus < 201103L
+throw (B,A)
+#endif
+;
 
 void spec3_x (void)
 {
diff --git a/gcc/testsuite/g++.dg/compat/eh/spec3_y.C b/gcc/testsuite/g++.dg/compat/eh/spec3_y.C
index fef6b36..78b0914 100644
--- a/gcc/testsuite/g++.dg/compat/eh/spec3_y.C
+++ b/gcc/testsuite/g++.dg/compat/eh/spec3_y.C
@@ -2,7 +2,10 @@
 
 A::A() {}
 
-void func() throw (B,A)
+void func()
+#if __cplusplus < 201103L
+throw (B,A)
+#endif
 {
   throw A();
 }
diff --git a/gcc/testsuite/g++.dg/compat/eh/template1.h b/gcc/testsuite/g++.dg/compat/eh/template1.h
index 93999a1..c807b47 100644
--- a/gcc/testsuite/g++.dg/compat/eh/template1.h
+++ b/gcc/testsuite/g++.dg/compat/eh/template1.h
@@ -11,5 +11,9 @@ struct C
 {
   typedef B<T> D;
   typedef typename D::E E;
-  void f() throw(E);
+  void f()
+#if __cplusplus < 201103L
+  throw(E)
+#endif
+  ;
 };
diff --git a/gcc/testsuite/g++.dg/compat/eh/template1_y.C b/gcc/testsuite/g++.dg/compat/eh/template1_y.C
index 1942537..39559d9 100644
--- a/gcc/testsuite/g++.dg/compat/eh/template1_y.C
+++ b/gcc/testsuite/g++.dg/compat/eh/template1_y.C
@@ -1,6 +1,9 @@
 #include "template1.h"
 
-template<class T> void C<T>::f (void) throw (E)
+template<class T> void C<T>::f (void)
+#if __cplusplus < 201103L
+throw (E)
+#endif
 {
   throw E();
 }
diff --git a/gcc/testsuite/g++.dg/compat/eh/unexpected1_x.C b/gcc/testsuite/g++.dg/compat/eh/unexpected1_x.C
index 61361a6..e8f87f1 100644
--- a/gcc/testsuite/g++.dg/compat/eh/unexpected1_x.C
+++ b/gcc/testsuite/g++.dg/compat/eh/unexpected1_x.C
@@ -1,3 +1,5 @@
+// { dg-options "-std=c++98" }
+
 #include <exception>
 
 struct One { };
diff --git a/gcc/testsuite/g++.dg/compat/eh/unexpected1_y.C b/gcc/testsuite/g++.dg/compat/eh/unexpected1_y.C
index 0c42c45..182b956 100644
--- a/gcc/testsuite/g++.dg/compat/eh/unexpected1_y.C
+++ b/gcc/testsuite/g++.dg/compat/eh/unexpected1_y.C
@@ -1,3 +1,5 @@
+// { dg-options "-std=c++98" }
+
 struct One { };
 struct Two { };
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/auto9.C b/gcc/testsuite/g++.dg/cpp0x/auto9.C
index 771ce0e..d71e55f 100644
--- a/gcc/testsuite/g++.dg/cpp0x/auto9.C
+++ b/gcc/testsuite/g++.dg/cpp0x/auto9.C
@@ -103,13 +103,13 @@ auto fnlate2 () -> auto *;			// { dg-error "invalid use of|expected" "" { target
 
 void
 badthrow () throw (auto)			// { dg-error "invalid use of" }
-{
-}
+{						// { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+}						// { dg-warning "deprecated" "" { target { ! c++1z } } .-2 }
 
 void
 badthrow2 () throw (auto &)			// { dg-error "invalid use of|expected" }
-{
-}
+{						// { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+}						// { dg-warning "deprecated" "" { target { ! c++1z } } .-2 }
 
 template <auto V = 4> struct G {};		// { dg-error "auto" "" { target { ! c++1z } } }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/defaulted23.C b/gcc/testsuite/g++.dg/cpp0x/defaulted23.C
index 9e6cbc3..4d41887 100644
--- a/gcc/testsuite/g++.dg/cpp0x/defaulted23.C
+++ b/gcc/testsuite/g++.dg/cpp0x/defaulted23.C
@@ -10,22 +10,22 @@ A a;
 
 struct B
 {
-  B() throw (int) = default; // { dg-message "exception-specification" }
-};
-
-B b;				// { dg-error "deleted" }
+  B() throw (int) = default; // { dg-message "exception-specification" "" { target { ! c++1z } } }
+};				// { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+				// { dg-warning "deprecated" "" { target { ! c++1z } } .-2 }
+B b;				// { dg-error "deleted" "" { target { ! c++1z } } }
 
 struct C
 {
-  C() throw (int) { }
-};
+  C() throw (int) { }		// { dg-error "dynamic exception specification" "" { target c++1z } }
+};				// { dg-warning "deprecated" "" { target { ! c++1z } } .-1 }
 
 C c;
 
 struct D: C
 {
-  D() throw (int) = default;
-};
+  D() throw (int) = default;	// { dg-error "dynamic exception specification" "" { target c++1z } }
+};				// { dg-warning "deprecated" "" { target { ! c++1z } } .-1 }
 
 D d;
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/error5.C b/gcc/testsuite/g++.dg/cpp0x/error5.C
index e46c46d..99fb8e0 100644
--- a/gcc/testsuite/g++.dg/cpp0x/error5.C
+++ b/gcc/testsuite/g++.dg/cpp0x/error5.C
@@ -39,7 +39,11 @@ namespace std
 struct bad_alloc { };
 }
 
-void* operator new(std::size_t) throw (std::bad_alloc);
+void* operator new(std::size_t)
+#if __cplusplus <= 201402L
+throw (std::bad_alloc)			// { dg-warning "deprecated" "" { target { ! c++1z } } }
+#endif
+;
 
 namespace std
 {
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C
index 1ef510a..330cf95 100644
--- a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-eh2.C
@@ -1,7 +1,7 @@
 // PR c++/47263
 // PR c++/49260
 // { dg-options "-fno-asynchronous-unwind-tables -fno-dwarf2-cfi-asm" }
-// { dg-do run { target c++11 } }
+// { dg-do run { target { c++11 && { ! c++1z } } } }
 
 #include <exception>
 
@@ -10,7 +10,7 @@ int main( void )
   std::set_unexpected( []{ throw 0; } );
   try
     {
-      []() throw( int ) { throw nullptr; }();
+      []() throw( int ) { throw nullptr; }();	// { dg-warning "deprecated" }
     }
   catch( int )
     { }
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept02.C b/gcc/testsuite/g++.dg/cpp0x/noexcept02.C
index 14bb282..7719541 100644
--- a/gcc/testsuite/g++.dg/cpp0x/noexcept02.C
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept02.C
@@ -10,8 +10,10 @@ void f();
 
 SA(!noexcept(f()));
 
-void g() throw (int);		// { dg-message "previous declaration" }
-void g() noexcept(false);	// { dg-error "different exception" }
+void g() throw (int);		// { dg-message "previous declaration" "" { target { ! c++1z } } }
+				// { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+				// { dg-warning "deprecated" "" { target { ! c++1z } } .-2 }
+void g() noexcept(false);	// { dg-error "different exception" "" { target { ! c++1z } } }
 void g();
 
 void h() throw();
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept07.C b/gcc/testsuite/g++.dg/cpp0x/noexcept07.C
index 90c50a9..de16e01 100644
--- a/gcc/testsuite/g++.dg/cpp0x/noexcept07.C
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept07.C
@@ -1,6 +1,6 @@
 // Test that checking of a nothrow specification uses the one on the
-// definition.
-// { dg-do run { target c++11 } }
+// definition.  In C++17 throw() is equivalent to noexcept(true).
+// { dg-do run { target { c++11 && c++14_down } } }
 
 #include <exception>
 #include <cstdlib>
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept08.C b/gcc/testsuite/g++.dg/cpp0x/noexcept08.C
index 96af0fe..5a554b7 100644
--- a/gcc/testsuite/g++.dg/cpp0x/noexcept08.C
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept08.C
@@ -7,8 +7,8 @@ struct A
   virtual void g() throw();
   virtual void h() noexcept;
   virtual void i() noexcept(false);
-  virtual void j() throw(int);
-};
+  virtual void j() throw(int);	// { dg-error "dynamic exception specification" "" { target c++1z } }
+};				// { dg-warning "deprecated" "" { target { ! c++1z } } .-1 }
 
 struct B: A
 {
@@ -34,16 +34,23 @@ struct D: A
   void g() noexcept(false);	// { dg-error "looser" }
   void h() noexcept(false);	// { dg-error "looser" }
   void i() noexcept(false);
-  void j() noexcept(false);	// { dg-error "looser" }
+  void j() noexcept(false);	// { dg-error "looser" "" { target { ! c++1z } } }
 };
 
 struct E: A
 {
-  void f() throw(int);
+  void f() throw(int);		// { dg-error "dynamic exception specification" "" { target c++1z } }
+				// { dg-warning "deprecated" "" { target { ! c++1z } } .-1 }
   void g() throw(int);		// { dg-error "looser" }
+				// { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+				// { dg-warning "deprecated" "" { target { ! c++1z } } .-2 }
   void h() throw(int);		// { dg-error "looser" }
-  void i() throw(int);
-  void j() throw(int);
+				// { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+				// { dg-warning "deprecated" "" { target { ! c++1z } } .-2 }
+  void i() throw(int);		// { dg-error "dynamic exception specification" "" { target c++1z } }
+				// { dg-warning "deprecated" "" { target { ! c++1z } } .-1 }
+  void j() throw(int);		// { dg-error "dynamic exception specification" "" { target c++1z } }
+				// { dg-warning "deprecated" "" { target { ! c++1z } } .-1 }
 };
 
 struct F: A
@@ -52,5 +59,5 @@ struct F: A
   void g();			// { dg-error "looser" }
   void h();			// { dg-error "looser" }
   void i();
-  void j();			// { dg-error "looser" }
+  void j();			// { dg-error "looser" "" { target { ! c++1z } } }
 };
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept19.C b/gcc/testsuite/g++.dg/cpp0x/noexcept19.C
index d303a8c..8ec4d7d 100644
--- a/gcc/testsuite/g++.dg/cpp0x/noexcept19.C
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept19.C
@@ -22,7 +22,11 @@ struct C
 
 struct D
 {
-  D () throw (int);
+  D ()
+#if __cplusplus <= 201402L
+  throw (int)			// { dg-warning "deprecated" "" { target { ! c++1z } } }
+#endif
+  ;
 };
 
 C <D, B <D>> c;
diff --git a/gcc/testsuite/g++.dg/cpp0x/variadic-throw.C b/gcc/testsuite/g++.dg/cpp0x/variadic-throw.C
index fc81e9d..368a67b 100644
--- a/gcc/testsuite/g++.dg/cpp0x/variadic-throw.C
+++ b/gcc/testsuite/g++.dg/cpp0x/variadic-throw.C
@@ -9,9 +9,9 @@ template<int M, int N> struct pair
 
 template<int... M> struct S
 {
-  template<int... N> static int foo() throw (pair <M, N>...) // { dg-error "mismatched" }
-  {
-    return 1;
+  template<int... N> static int foo() throw (pair <M, N>...) // { dg-error "mismatched" "" { target { ! c++1z } } }
+  {							     // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+    return 1;						     // { dg-warning "deprecated" "" { target { ! c++1z } } .-2 }
   }
 };
 
@@ -22,5 +22,5 @@ int bar ()
 
 int wibble()
 {
-  return S<0, 1, 2>::foo<0, 1> (); // { dg-error "no matching" }
+  return S<0, 1, 2>::foo<0, 1> (); // { dg-error "no matching" "" { target { ! c++1z } } }
 }
diff --git a/gcc/testsuite/g++.dg/cpp0x/variadic73.C b/gcc/testsuite/g++.dg/cpp0x/variadic73.C
index 533ed46..be998d2 100644
--- a/gcc/testsuite/g++.dg/cpp0x/variadic73.C
+++ b/gcc/testsuite/g++.dg/cpp0x/variadic73.C
@@ -3,7 +3,11 @@ struct A {};
 struct B {};
 struct C {};
 
-template<typename... Exceptions> void f(int idx) throw(Exceptions...) {
+template<typename... Exceptions> void f(int idx)
+#if __cplusplus <= 201402L
+throw(Exceptions...)		// { dg-warning "deprecated" "" { target { ! c++1z } } }
+#endif
+{
   if (idx == 0) throw A();
   else if (idx == 1) throw B();
   else if (idx == 2) throw C();
diff --git a/gcc/testsuite/g++.dg/cpp1z/noexcept-type1.C b/gcc/testsuite/g++.dg/cpp1z/noexcept-type1.C
index 62e1322..dfe64e5 100644
--- a/gcc/testsuite/g++.dg/cpp1z/noexcept-type1.C
+++ b/gcc/testsuite/g++.dg/cpp1z/noexcept-type1.C
@@ -1,7 +1,7 @@
 // Testcase from P0012r1
 // { dg-options -std=c++1z }
 
-void (*p)() throw(int);
+void (*p)() throw(int);	       // { dg-error "dynamic exception specification" }
 void (**pp)() noexcept = &p;   // { dg-error "" } cannot convert to pointer to noexcept function
 
 struct S { typedef void (*p)(); operator p(); };
diff --git a/gcc/testsuite/g++.dg/eh/async-unwind2.C b/gcc/testsuite/g++.dg/eh/async-unwind2.C
index 0c31f80..0f84901 100644
--- a/gcc/testsuite/g++.dg/eh/async-unwind2.C
+++ b/gcc/testsuite/g++.dg/eh/async-unwind2.C
@@ -87,8 +87,16 @@ struct Y
 
 struct Z;
 
-X <V> baz1 (const S &) throw (E);
-X <Z> baz2 (const X <Z> &) throw (E);
+X <V> baz1 (const S &)
+#if __cplusplus <= 201402L
+throw (E)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+;
+X <Z> baz2 (const X <Z> &)
+#if __cplusplus <= 201402L
+throw (E)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+;
 
 template <typename T> X<T>::X ()
 {
@@ -112,7 +120,10 @@ template <typename T> T *X<T>::operator -> () const
   return &y;
 }
 
-X <V> baz1 (const S &) throw (E)
+X <V> baz1 (const S &)
+#if __cplusplus <= 201402L
+throw (E)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   return X<V> ();
 }
@@ -125,7 +136,10 @@ E::~E ()
 {
 }
 
-X <Z> baz2 (const X <Z> &) throw (E)
+X <Z> baz2 (const X <Z> &)
+#if __cplusplus <= 201402L
+throw (E)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   throw E ();
 }
diff --git a/gcc/testsuite/g++.dg/eh/cond4.C b/gcc/testsuite/g++.dg/eh/cond4.C
index 4d312e4..17e9149 100644
--- a/gcc/testsuite/g++.dg/eh/cond4.C
+++ b/gcc/testsuite/g++.dg/eh/cond4.C
@@ -12,7 +12,13 @@ void my_terminate ()
 
 struct A {
   A(int) { }
-  ~A() throw(int) { throw 1; };
+  ~A()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  { throw 1; };
 };
 struct B {
   B(A) { }
diff --git a/gcc/testsuite/g++.dg/eh/delete1.C b/gcc/testsuite/g++.dg/eh/delete1.C
index 64ccb50..46b5307 100644
--- a/gcc/testsuite/g++.dg/eh/delete1.C
+++ b/gcc/testsuite/g++.dg/eh/delete1.C
@@ -12,7 +12,13 @@ void operator delete (void *) throw ()
 }
 
 struct Foo {
-  ~Foo() throw(int) {throw 1;}
+  ~Foo()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  {throw 1;}
 };
 
 struct Baz {
@@ -20,7 +26,13 @@ struct Baz {
   {
     deleted = 2;
   }
-  virtual ~Baz() throw(int) {throw 1;}
+  virtual ~Baz()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  {throw 1;}
 };
 
 int non_virt ()
diff --git a/gcc/testsuite/g++.dg/eh/ehopt1.C b/gcc/testsuite/g++.dg/eh/ehopt1.C
index b2fb412..852d324 100644
--- a/gcc/testsuite/g++.dg/eh/ehopt1.C
+++ b/gcc/testsuite/g++.dg/eh/ehopt1.C
@@ -15,7 +15,13 @@ class A<int, int>
 public:
   A(int) { ++count; if (b) throw 1; }
   A(const A&) { ++count; if (b) throw 1; }
-  ~A() throw(int) { --count; if (b) throw 1; }
+  ~A()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  { --count; if (b) throw 1; }
 };
 
 typedef A<int, int> B;
@@ -26,7 +32,13 @@ class A<void *, void *>
 public:
   A() { if (b) throw 1; }
   A(const B&) { if (b) throw 1; }
-  ~A() throw(int) { if (b) throw 1; }
+  ~A()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  { if (b) throw 1; }
 };
 
 typedef A<void *, void *> C;
diff --git a/gcc/testsuite/g++.dg/eh/forced3.C b/gcc/testsuite/g++.dg/eh/forced3.C
index b8f47df..9e92daf 100644
--- a/gcc/testsuite/g++.dg/eh/forced3.C
+++ b/gcc/testsuite/g++.dg/eh/forced3.C
@@ -1,5 +1,6 @@
 // HP-UX libunwind.so doesn't provide _UA_END_OF_STACK.
 // { dg-do run { xfail "ia64-hp-hpux11.*" } }
+// { dg-require-effective-target c++14_down }
 
 // Test that forced unwinding calls std::unexpected going 
 // through a nothrow function.
diff --git a/gcc/testsuite/g++.dg/eh/forced4.C b/gcc/testsuite/g++.dg/eh/forced4.C
index 17fd94a..96408d2 100644
--- a/gcc/testsuite/g++.dg/eh/forced4.C
+++ b/gcc/testsuite/g++.dg/eh/forced4.C
@@ -38,7 +38,10 @@ force_unwind ()
 }
 
 static void
-doit () throw(int)
+doit ()
+#if __cplusplus <= 201402L
+throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   force_unwind ();
 }
diff --git a/gcc/testsuite/g++.dg/eh/init-temp2.C b/gcc/testsuite/g++.dg/eh/init-temp2.C
index 9cf8797..127883a 100644
--- a/gcc/testsuite/g++.dg/eh/init-temp2.C
+++ b/gcc/testsuite/g++.dg/eh/init-temp2.C
@@ -8,18 +8,36 @@ template <class _Tp> class AutoPtr
 public:
   explicit AutoPtr(_Tp* __p = 0)  : _M_ptr(__p) {}
 
-  ~AutoPtr() throw(int) { delete _M_ptr; }
+  ~AutoPtr()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  { delete _M_ptr; }
 };
 
 struct A
 {
   A() { }
-  ~A() throw(int) { throw 1; }
+  ~A()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  { throw 1; }
 };
 
 struct B
 {
-  virtual ~B() throw(int);
+  virtual ~B()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  ;
 };
 
 B* f (const A &s) { throw 1; }
diff --git a/gcc/testsuite/g++.dg/eh/pr38662.C b/gcc/testsuite/g++.dg/eh/pr38662.C
index 294a129..be3adbbd9 100644
--- a/gcc/testsuite/g++.dg/eh/pr38662.C
+++ b/gcc/testsuite/g++.dg/eh/pr38662.C
@@ -2,7 +2,11 @@
 class E { };
 
 class T {
-  int foo(bool a) throw (E) __attribute__((regparm(1)));
+  int foo(bool a)
+#if __cplusplus <= 201402L
+  throw (E)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  __attribute__((regparm(1)));
   int bar(bool b) __attribute__((regparm(1)));
 };
 
diff --git a/gcc/testsuite/g++.dg/eh/pr41819.C b/gcc/testsuite/g++.dg/eh/pr41819.C
index 07894a2..61c6572 100644
--- a/gcc/testsuite/g++.dg/eh/pr41819.C
+++ b/gcc/testsuite/g++.dg/eh/pr41819.C
@@ -15,7 +15,10 @@ void f1()
   try {} catch (...) {}		// { dg-error "" }
 }
 
-void f2() throw(int)
+void f2()
+#if __cplusplus <= 201402L
+throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   bar();
 }
diff --git a/gcc/testsuite/g++.dg/eh/shadow1.C b/gcc/testsuite/g++.dg/eh/shadow1.C
index 15f666a..b96f65a 100644
--- a/gcc/testsuite/g++.dg/eh/shadow1.C
+++ b/gcc/testsuite/g++.dg/eh/shadow1.C
@@ -13,14 +13,14 @@ struct D : private B
   friend class E;
   
   static B *baz (D *);
-  virtual void V () throw (B);  // { dg-error "overriding" "" }
-};
-
+  virtual void V () throw (B);  // { dg-error "overriding" "" { target { ! c++1z } } }
+};				// { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+				// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
 struct E : public D
 {
-  virtual void V () throw (D); // { dg-error "looser throw" "" }
-};
-
+  virtual void V () throw (D); // { dg-error "looser throw" "" { target { ! c++1z } } }
+};			       // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+			       // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
 B* foo (D *);
 
 B *D::baz (D *p)
diff --git a/gcc/testsuite/g++.dg/eh/spec2.C b/gcc/testsuite/g++.dg/eh/spec2.C
index 8107f01..0f51264 100644
--- a/gcc/testsuite/g++.dg/eh/spec2.C
+++ b/gcc/testsuite/g++.dg/eh/spec2.C
@@ -3,8 +3,15 @@
 struct S { void f (void); };
 
 typedef void f1 (void) throw (int); // { dg-error "exception" "" { target c++14_down } }
+				    // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+				    // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
 typedef void (*f2) (void) throw (int); // { dg-error "exception" "" { target c++14_down } }
+				       // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+				       // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
 typedef void (S::*f3) (void) throw (int); // { dg-error "exception" "" { target c++14_down } }
-
-void (*f4) (void) throw (int);
-void (S::*f5) (void) throw (int);
+					  // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+					  // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
+void (*f4) (void) throw (int); // { dg-error "dynamic exception specification" "" { target c++1z } }
+			       // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-1 }
+void (S::*f5) (void) throw (int); // { dg-error "dynamic exception specification" "" { target c++1z } }
+				  // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-1 }
diff --git a/gcc/testsuite/g++.dg/eh/spec3.C b/gcc/testsuite/g++.dg/eh/spec3.C
index 20bcfc3..49bbdc3 100644
--- a/gcc/testsuite/g++.dg/eh/spec3.C
+++ b/gcc/testsuite/g++.dg/eh/spec3.C
@@ -12,7 +12,10 @@ struct A : virtual public Base
 
 struct B {};
 
-void func() throw (B,A)
+void func()
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   throw A();
 }
diff --git a/gcc/testsuite/g++.dg/eh/spec5.C b/gcc/testsuite/g++.dg/eh/spec5.C
index be8f327..5735e8d 100644
--- a/gcc/testsuite/g++.dg/eh/spec5.C
+++ b/gcc/testsuite/g++.dg/eh/spec5.C
@@ -8,12 +8,20 @@ struct A;
 
 struct B
 {
-  void f () throw (A);
+  void f ()
+#if __cplusplus <= 201402L
+  throw (A)
+#endif
+  ;
 };
 
 struct A {};
 
-void B::f () throw (A) {}
+void B::f ()
+#if __cplusplus <= 201402L
+throw (A)
+#endif
+{}
 
 int main ()
 {
diff --git a/gcc/testsuite/g++.dg/eh/spec6.C b/gcc/testsuite/g++.dg/eh/spec6.C
index d6d8176..d08bd86 100644
--- a/gcc/testsuite/g++.dg/eh/spec6.C
+++ b/gcc/testsuite/g++.dg/eh/spec6.C
@@ -1,6 +1,7 @@
 // Test that we don't allow incomplete types in an exception-specification
 // for a definition, or at a call site.
 
+// { dg-do compile { target c++14_down } }
 // { dg-options "-fpermissive -w" }
 
 struct A;			// { dg-message "" }
diff --git a/gcc/testsuite/g++.dg/eh/spec7.C b/gcc/testsuite/g++.dg/eh/spec7.C
index 08586a2..37efedc 100644
--- a/gcc/testsuite/g++.dg/eh/spec7.C
+++ b/gcc/testsuite/g++.dg/eh/spec7.C
@@ -19,7 +19,11 @@ struct D : public B {
         } o; 
          
         struct Raiser { 
-            Raiser()  throw( int ) {throw 1;}; 
+            Raiser()
+#if __cplusplus <= 201402L
+	    throw( int )			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+	    {throw 1;}; 
         } raiser; 
       }; 
 }; 
diff --git a/gcc/testsuite/g++.dg/eh/spec8.C b/gcc/testsuite/g++.dg/eh/spec8.C
index c76032e..8967e8c 100644
--- a/gcc/testsuite/g++.dg/eh/spec8.C
+++ b/gcc/testsuite/g++.dg/eh/spec8.C
@@ -1,8 +1,10 @@
 // PR c++/24817
+// { dg-do compile { target c++14_down } }
 
 struct exception {};
 
 template <typename T> void foo() throw(exception); // { dg-message "declaration" }
+						   // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-1 }
 template <typename T> void foo(); // { dg-error "exception" }
 
 struct bar
diff --git a/gcc/testsuite/g++.dg/eh/spec9.C b/gcc/testsuite/g++.dg/eh/spec9.C
index 9e00d1c..cceb98f 100644
--- a/gcc/testsuite/g++.dg/eh/spec9.C
+++ b/gcc/testsuite/g++.dg/eh/spec9.C
@@ -4,7 +4,10 @@
 typedef int IntArray[10];
 IntArray i;
 
-void test_array() throw (IntArray)
+void test_array()
+#if __cplusplus <= 201402L
+throw (IntArray)	// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   throw i;
 }
diff --git a/gcc/testsuite/g++.dg/eh/template1.C b/gcc/testsuite/g++.dg/eh/template1.C
index 2cbf9c6..fb471b2 100644
--- a/gcc/testsuite/g++.dg/eh/template1.C
+++ b/gcc/testsuite/g++.dg/eh/template1.C
@@ -17,7 +17,11 @@ struct C
 {
   typedef B<T> D;
   typedef typename D::E E;
-  void f() throw(E) { throw E(); }
+  void f()
+#if __cplusplus <= 201402L
+  throw(E)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  { throw E(); }
 };
 
 int main()
diff --git a/gcc/testsuite/g++.dg/eh/unexpected1.C b/gcc/testsuite/g++.dg/eh/unexpected1.C
index e5982ff..cd5585f 100644
--- a/gcc/testsuite/g++.dg/eh/unexpected1.C
+++ b/gcc/testsuite/g++.dg/eh/unexpected1.C
@@ -1,6 +1,6 @@
 // PR 3719
 // Test that an unexpected handler can rethrow to categorize.
-// { dg-do run }
+// { dg-do run { target c++14_down } }
 
 #include <exception>
 
@@ -23,7 +23,7 @@ handle_unexpected ()
 }
 
 static void
-doit () throw (Two)
+doit () throw (Two)			// { dg-warning "deprecated" "" { target { c++11 } } }
 {
   throw One ();
 }
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_assign.C b/gcc/testsuite/g++.dg/ext/has_nothrow_assign.C
index e6e9fc6..d84a81c 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_assign.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_assign.C
@@ -15,6 +15,12 @@ struct B
 struct C
 : public A { };
 
+#if __cplusplus > 201402L
+#define THROW_INT
+#else
+#define THROW_INT throw(int)	// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+
 struct D
 {
   D& operator=(const D&) throw() { return *this; }
@@ -22,43 +28,43 @@ struct D
 
 struct E
 {
-  E& operator=(const E&) throw(int) { return *this; }
+  E& operator=(const E&) THROW_INT { return *this; }
 };
 
 struct E1
 {
-  E1& operator=(const E1&) throw(int) { throw int(); return *this; }
+  E1& operator=(const E1&) THROW_INT { throw int(); return *this; }
 };
 
 struct F
 {
-  F() throw(int) { }
+  F() THROW_INT { }
 };
 
 struct G
 {
-  G() throw(int) { throw int(); }
+  G() THROW_INT { throw int(); }
 };
 
 struct H
 {
-  H& operator=(H&) throw(int) { return *this; }
+  H& operator=(H&) THROW_INT { return *this; }
 };
 
 struct H1
 {
-  H1& operator=(H1&) throw(int) { throw int(); return *this; }
+  H1& operator=(H1&) THROW_INT { throw int(); return *this; }
 };
 
 struct I
 {
-  I& operator=(I&) throw(int) { return *this; }
+  I& operator=(I&) THROW_INT { return *this; }
   I& operator=(const I&) throw() { return *this; }
 };
 
 struct I1
 {
-  I1& operator=(I1&) throw(int) { throw int(); return *this; }
+  I1& operator=(I1&) THROW_INT { throw int(); return *this; }
   I1& operator=(const I1&) throw() { return *this; }
 };
 
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_constructor.C b/gcc/testsuite/g++.dg/ext/has_nothrow_constructor.C
index 1f1227c..d69b312 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_constructor.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_constructor.C
@@ -12,6 +12,12 @@ struct B
   A a;
 };
 
+#if __cplusplus > 201402L
+#define THROW_INT
+#else
+#define THROW_INT throw(int)	// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+
 struct C 
 : public A { };
 
@@ -22,12 +28,12 @@ struct D
 
 struct E
 {
-  E() throw(int) { }
+  E() THROW_INT { }
 };
 
 struct E1
 {
-  E1() throw(int) { throw int(); }
+  E1() THROW_INT { throw int(); }
 };
 
 struct F
@@ -37,7 +43,7 @@ struct F
 
 struct G
 {
-  G(const G&) throw(int) { throw int(); }
+  G(const G&) THROW_INT { throw int(); }
 };
 
 template<typename T>
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-1.C b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-1.C
index 87785ae..0e5a9de 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-1.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-1.C
@@ -15,6 +15,12 @@ struct B
 struct C
 : public A { };
 
+#if __cplusplus > 201402L
+#define THROW_INT
+#else
+#define THROW_INT throw(int)	// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+
 struct D
 {
   D(const D&) throw() { }
@@ -22,12 +28,12 @@ struct D
 
 struct E
 {
-  E(const E&) throw(int) { }
+  E(const E&) THROW_INT { }
 };
 
 struct E1
 {
-  E1(const E1&) throw(int) { throw int(); }
+  E1(const E1&) THROW_INT { throw int(); }
 };
 
 struct F
@@ -37,28 +43,28 @@ struct F
 
 struct G
 {
-  G() throw(int) { throw int(); }
+  G() THROW_INT { throw int(); }
 };
 
 struct H
 {
-  H(H&) throw(int) { }
+  H(H&) THROW_INT { }
 };
 
 struct H1
 {
-  H1(H1&) throw(int) { throw int(); }
+  H1(H1&) THROW_INT { throw int(); }
 };
 
 struct I
 {
-  I(I&) throw(int) { }
+  I(I&) THROW_INT { }
   I(const I&) throw() { }
 };
 
 struct I1
 {
-  I1(I1&) throw(int) { throw int(); }
+  I1(I1&) THROW_INT { throw int(); }
   I1(const I1&) throw() { }
 };
 
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-2.C b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-2.C
index b2eb203..f15c417 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-2.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-2.C
@@ -2,8 +2,21 @@
 // { dg-do run }
 #include <cassert>
 
-struct A { template <class T> A (T) throw (int); };
-struct B { B (B&) throw (); template <class T> B (T) throw (int); };
+struct A {
+  template <class T> A (T)
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  ;
+};
+struct B {
+  B (B&) throw ();
+  template <class T> B (T)
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  ;
+};
 
 int main ()
 {
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-4.C b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-4.C
index 69e9a6b..4c63b96 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-4.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-4.C
@@ -4,7 +4,11 @@
 
 struct S {
     S (const S&) throw ();
-    S (...) throw (int);
+    S (...)
+#if __cplusplus <= 201402L
+    throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
 };
 
 int main ()
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-5.C b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-5.C
index b94b338..768c9af 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-5.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-5.C
@@ -4,7 +4,11 @@
 
 struct S {
     S (const S&) throw ();
-    S (int) throw (int);
+    S (int)
+#if __cplusplus <= 201402L
+    throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
 };
 
 int main ()
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-6.C b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-6.C
index 6268ee2..27f4c0a 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-6.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-6.C
@@ -3,7 +3,11 @@
 
 struct S {
     S (S&) throw ();
-    S (const S&, int) throw (int);
+    S (const S&, int)
+#if __cplusplus <= 201402L
+    throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
 };
 
 int main ()
diff --git a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-7.C b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-7.C
index 10891b6..ac2dd8c 100644
--- a/gcc/testsuite/g++.dg/ext/has_nothrow_copy-7.C
+++ b/gcc/testsuite/g++.dg/ext/has_nothrow_copy-7.C
@@ -3,7 +3,11 @@
 
 struct S {
     S (const S&) throw ();
-    S (S&&) throw (int);
+    S (S&&)
+#if __cplusplus <= 201402L
+    throw (int)			// { dg-warning "deprecated" "" { target { ! c++1z } } }
+#endif
+    ;
 };
 
 int main ()
diff --git a/gcc/testsuite/g++.dg/gcov/gcov-7.C b/gcc/testsuite/g++.dg/gcov/gcov-7.C
index db3e95a..bbc0f10 100644
--- a/gcc/testsuite/g++.dg/gcov/gcov-7.C
+++ b/gcc/testsuite/g++.dg/gcov/gcov-7.C
@@ -7,7 +7,10 @@
 
 struct foo
 {
-  foo () throw (int)
+  foo ()
+#if __cplusplus <= 201402L
+    throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
     {			/* count (-) */
       throw (1);
     }
diff --git a/gcc/testsuite/g++.dg/init/new13.C b/gcc/testsuite/g++.dg/init/new13.C
index 2ced6e3..832942e 100644
--- a/gcc/testsuite/g++.dg/init/new13.C
+++ b/gcc/testsuite/g++.dg/init/new13.C
@@ -6,6 +6,6 @@
 struct A
 {
   void* operator new(__SIZE_TYPE__) throw(X);  // { dg-error "expected|type" }
-};
-
+};					       // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+					       // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
 A* p = new A;
diff --git a/gcc/testsuite/g++.dg/init/new25.C b/gcc/testsuite/g++.dg/init/new25.C
index 2d447f8..754ac92 100644
--- a/gcc/testsuite/g++.dg/init/new25.C
+++ b/gcc/testsuite/g++.dg/init/new25.C
@@ -5,7 +5,11 @@ class C
 {
 public:
   void* operator new(std::size_t = 32) throw (std::bad_alloc); // { dg-error "first parameter" }
+							       // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+							       // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
   void* operator new[](std::size_t = 32) throw (std::bad_alloc); // { dg-error "first parameter" }
+								 // { dg-error "dynamic exception specification" "" { target c++1z } .-1 }
+								 // { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-2 }
   void* operator new(std::size_t = 32, const std::nothrow_t&) throw(); // { dg-error "first parameter" }
   void* operator new[](std::size_t = 32, const std::nothrow_t&) throw(); // { dg-error "first parameter" }
 };
diff --git a/gcc/testsuite/g++.dg/lookup/exception1.C b/gcc/testsuite/g++.dg/lookup/exception1.C
index b5fcd0a..00ef817 100644
--- a/gcc/testsuite/g++.dg/lookup/exception1.C
+++ b/gcc/testsuite/g++.dg/lookup/exception1.C
@@ -11,8 +11,16 @@ namespace ns
 {
   class Test {
     public:
-      inline Test() throw( Exception );
-      inline Test(int n ) throw( Exception );
+      inline Test()
+#if __cplusplus <= 201402L
+      throw( Exception )			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+      ;
+      inline Test(int n )
+#if __cplusplus <= 201402L
+      throw( Exception )			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+      ;
     private:
       int i;
   };
@@ -20,11 +28,19 @@ namespace ns
 
 // This line used to fail because Exception wasn't looked up in the
 // right scope.
-ns::Test::Test() throw( Exception ) : i( 1 )
+ns::Test::Test()
+#if __cplusplus <= 201402L
+throw( Exception )				// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+: i( 1 )
 {
 }
 
-ns::Test::Test( int n ) throw( Exception ) : i( n )
+ns::Test::Test( int n )
+#if __cplusplus <= 201402L
+throw( Exception )				// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+: i( n )
 {
 }
 
diff --git a/gcc/testsuite/g++.dg/opt/noreturn-1.C b/gcc/testsuite/g++.dg/opt/noreturn-1.C
index 9b2fc0c..8fe2db0 100644
--- a/gcc/testsuite/g++.dg/opt/noreturn-1.C
+++ b/gcc/testsuite/g++.dg/opt/noreturn-1.C
@@ -58,16 +58,34 @@ struct Egeneric {
 };
 
 struct infinint {
-    void detruit() throw(Egeneric);
-    template<class T> void infinint_from(T a) throw(Egeneric);
-    infinint(long a = 0) throw(Egeneric) {
+    void detruit()
+#if __cplusplus <= 201402L
+    throw(Egeneric)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
+    template<class T> void infinint_from(T a)
+#if __cplusplus <= 201402L
+    throw(Egeneric)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
+    infinint(long a = 0)
+#if __cplusplus <= 201402L
+    throw(Egeneric)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    {
 	try {
 	    infinint_from(a);
 	} catch(Egeneric& e) {
 	    e.stack("infinint::infinint", "long");
 	}
     }
-    ~infinint() throw(Egeneric) {
+    ~infinint()
+#if __cplusplus <= 201402L
+    throw(Egeneric)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+    noexcept(false)
+#endif
+    {
 	try {
 	    detruit();
 	} catch(Egeneric& e) { }
diff --git a/gcc/testsuite/g++.dg/other/error3.C b/gcc/testsuite/g++.dg/other/error3.C
index 37ea51e..1e64563 100644
--- a/gcc/testsuite/g++.dg/other/error3.C
+++ b/gcc/testsuite/g++.dg/other/error3.C
@@ -1,5 +1,6 @@
 // Test for proper error message formatting; the throw() should go inside
 // the parens, as below.
 
-void (*g() throw())();		// { dg-message "g\\(\\) throw" "" }
+void (*g() throw())();		// { dg-message "g\\(\\) throw" "" { target { ! c++1z } } }
+				// { dg-message "g\\(\\) noexcept" "" { target c++1z } .-1 }
 void (*g())();			// { dg-error "" "" }
diff --git a/gcc/testsuite/g++.dg/rtti/crash3.C b/gcc/testsuite/g++.dg/rtti/crash3.C
index 076e360..111d3b3 100644
--- a/gcc/testsuite/g++.dg/rtti/crash3.C
+++ b/gcc/testsuite/g++.dg/rtti/crash3.C
@@ -5,6 +5,9 @@ class A {};
 class B {};
 class C : public A, public B {};
 class D : public C {};
-void f () throw (D)
+void f ()
+#if __cplusplus <= 201402L
+throw (D)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
 }
diff --git a/gcc/testsuite/g++.dg/template/eh2.C b/gcc/testsuite/g++.dg/template/eh2.C
index 9559b4e..3ed80a4 100644
--- a/gcc/testsuite/g++.dg/template/eh2.C
+++ b/gcc/testsuite/g++.dg/template/eh2.C
@@ -1,10 +1,10 @@
 // PR c++/23191
 // Origin: Volker Reichelt  <reichelt@igpm.rwth-aachen.de>
-// { dg-do compile }
+// { dg-do compile { target c++14_down } }
 
 template<typename T> struct A
 {
     void foo() throw(typename T::X);  // { dg-error "not a class" }
-};
+};				      // { dg-warning "deprecated" "" { target c++11 } .-1 }
 
 A<void> a;                            // { dg-message "required" }
diff --git a/gcc/testsuite/g++.dg/template/error36.C b/gcc/testsuite/g++.dg/template/error36.C
index 8d3dc63..d808c14 100644
--- a/gcc/testsuite/g++.dg/template/error36.C
+++ b/gcc/testsuite/g++.dg/template/error36.C
@@ -1,9 +1,11 @@
 // PR c++/37719.C
+// { dg-do compile { target c++14_down } }
 
 template <typename T>
 class foo {
     void bar() throw(int); // { dg-message "throw \\(int\\)" }
-};
+};			   // { dg-warning "deprecated" "" { target c++11 } .-1 }
 
 template <>
 void foo<int>::bar() throw(float) {} // { dg-error "throw \\(float\\)" }
+				     // { dg-warning "deprecated" "" { target c++11 } .-1 }
diff --git a/gcc/testsuite/g++.dg/tm/pr46567.C b/gcc/testsuite/g++.dg/tm/pr46567.C
index 2f0ef93..448c92c 100644
--- a/gcc/testsuite/g++.dg/tm/pr46567.C
+++ b/gcc/testsuite/g++.dg/tm/pr46567.C
@@ -1667,8 +1667,16 @@ namespace std
   typedef void (*new_handler)();
   new_handler set_new_handler(new_handler) throw();
 }
-void* operator new(std::size_t) throw (std::bad_alloc);
-void* operator new[](std::size_t) throw (std::bad_alloc);
+void* operator new(std::size_t)
+#if __cplusplus <= 201402L
+throw (std::bad_alloc)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+;
+void* operator new[](std::size_t)
+#if __cplusplus <= 201402L
+throw (std::bad_alloc)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+;
 void operator delete(void*) throw();
 void operator delete[](void*) throw();
 void* operator new(std::size_t, const std::nothrow_t&) throw();
diff --git a/gcc/testsuite/g++.dg/tm/pr47340.C b/gcc/testsuite/g++.dg/tm/pr47340.C
index ead3361..e5075e2 100644
--- a/gcc/testsuite/g++.dg/tm/pr47340.C
+++ b/gcc/testsuite/g++.dg/tm/pr47340.C
@@ -1,7 +1,11 @@
 // { dg-do compile }
 // { dg-options "-fgnu-tm" }
 
-void* operator new(__SIZE_TYPE__) throw (int);
+void* operator new(__SIZE_TYPE__)
+#if __cplusplus <= 201402L
+throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+;
 
 void *point;
 
diff --git a/gcc/testsuite/g++.dg/torture/pr46364.C b/gcc/testsuite/g++.dg/torture/pr46364.C
index 8098991..0160e9a 100644
--- a/gcc/testsuite/g++.dg/torture/pr46364.C
+++ b/gcc/testsuite/g++.dg/torture/pr46364.C
@@ -1,7 +1,11 @@
 // { dg-do compile }
 #include <string>
 
-void a() throw (int);
+void a()
+#if __cplusplus <= 201402L
+throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+;
 void b(std::string const &);
 
 void c(std::string *e)
diff --git a/gcc/testsuite/g++.dg/torture/pr49394.C b/gcc/testsuite/g++.dg/torture/pr49394.C
index e471885..cd8cac3 100644
--- a/gcc/testsuite/g++.dg/torture/pr49394.C
+++ b/gcc/testsuite/g++.dg/torture/pr49394.C
@@ -4,7 +4,10 @@
 struct Mutex
 {
   bool locked;
-  ~Mutex () throw(int)
+  ~Mutex ()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
   {
     if (locked)
       throw 0;
diff --git a/gcc/testsuite/g++.dg/torture/pr52918-1.C b/gcc/testsuite/g++.dg/torture/pr52918-1.C
index 9e7b21b..cb95048 100644
--- a/gcc/testsuite/g++.dg/torture/pr52918-1.C
+++ b/gcc/testsuite/g++.dg/torture/pr52918-1.C
@@ -21,9 +21,16 @@ public:
 class free_list   {
     typedef __mutex __mutex_type;
     __mutex_type&     _M_get_mutex();
-    void _M_get(size_t __sz) throw(bad_alloc);
+    void _M_get(size_t __sz)
+#if __cplusplus <= 201402L
+    throw(bad_alloc)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
 };
-void  free_list::_M_get(size_t __sz) throw(bad_alloc)
+void  free_list::_M_get(size_t __sz)
+#if __cplusplus <= 201402L
+throw(bad_alloc)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   __mutex_type& __bfl_mutex = _M_get_mutex();
   __bfl_mutex.unlock();
diff --git a/gcc/testsuite/g++.dg/torture/pr57190.C b/gcc/testsuite/g++.dg/torture/pr57190.C
index 9fa11cd..f3b7ecc 100644
--- a/gcc/testsuite/g++.dg/torture/pr57190.C
+++ b/gcc/testsuite/g++.dg/torture/pr57190.C
@@ -19,12 +19,24 @@ namespace std {
 class UIException {
 };
 class PasswordDialog {
-    void run() throw (UIException);
+    void run()
+#if __cplusplus <= 201402L
+    throw (UIException)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
 };
 class MessageBox  {
 public:
-    MessageBox (std::string t) throw (UIException);
-    virtual int run() throw (UIException) ;
+    MessageBox (std::string t)
+#if __cplusplus <= 201402L
+    throw (UIException)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
+    virtual int run()
+#if __cplusplus <= 201402L
+    throw (UIException)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
 };
 extern "C" {
     struct __jmp_buf_tag   {
@@ -33,7 +45,10 @@ extern "C" {
     typedef struct __jmp_buf_tag sigjmp_buf[1];
 }
 sigjmp_buf password_dialog_sig_jmp_buf;
-void PasswordDialog::run() throw (UIException)
+void PasswordDialog::run()
+#if __cplusplus <= 201402L
+throw (UIException)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   __sigsetjmp (password_dialog_sig_jmp_buf, 1);
   MessageBox* errmsg = __null;
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-alloca-1.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-alloca-1.C
index 89b0a6a..7c124fe 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-alloca-1.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-alloca-1.C
@@ -27,7 +27,10 @@ struct A : virtual public Base
 struct B {};
 
 void
-foo (int size) throw (B,A)
+foo (int size)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   char *p = (char*) __builtin_alloca (size + 1);
   aligned i;
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-fastcall-1.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-fastcall-1.C
index 2c24ea3..165e7cc 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-fastcall-1.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-fastcall-1.C
@@ -21,7 +21,10 @@ struct B {};
 
 __attribute__ ((fastcall))
 void
-foo (int j, int k, int m, int n, int o) throw (B,A)
+foo (int j, int k, int m, int n, int o)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   aligned i;
 
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-global-1.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-global-1.C
index cc05ed0..48a59eb 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-global-1.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-global-1.C
@@ -21,7 +21,10 @@ struct A : virtual public Base
 struct B {};
 
 void
-foo (void) throw (B,A)
+foo (void)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   aligned i;
 
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-1.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-1.C
index d2555f2..b8c04d9 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-1.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-1.C
@@ -22,7 +22,10 @@ struct B {};
 
 static void
 inline __attribute__((always_inline))
-foo (void) throw (B,A)
+foo (void)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   aligned i;
 
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-2.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-2.C
index 1cbc68c..53ff0cc 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-2.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-inline-2.C
@@ -28,7 +28,10 @@ struct B {};
 
 static void
 inline __attribute__((always_inline))
-foo (int size) throw (B,A)
+foo (int size)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   char *p = (char *) __builtin_alloca (size + 1);
   aligned i;
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-thiscall-1.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-thiscall-1.C
index 403497a..44f562a 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-thiscall-1.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-thiscall-1.C
@@ -21,7 +21,10 @@ struct B {};
 
 __attribute__ ((thiscall))
 void
-foo (int j, int k, int m, int n, int o) throw (B,A)
+foo (int j, int k, int m, int n, int o)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   aligned i;
 
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-1.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-1.C
index b9ba81b..9c3c0f5 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-1.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-1.C
@@ -28,7 +28,10 @@ struct A : virtual public Base
 struct B {};
 
 void
-foo (const char *fmt, ...) throw (B,A)
+foo (const char *fmt, ...)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   va_list arg;
   char *p;
diff --git a/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-2.C b/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-2.C
index 5e28217..aa9b2bd 100644
--- a/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-2.C
+++ b/gcc/testsuite/g++.dg/torture/stackalign/eh-vararg-2.C
@@ -29,7 +29,10 @@ struct A : virtual public Base
 struct B {};
 
 void
-test (va_list arg) throw (B,A)
+test (va_list arg)
+#if __cplusplus <= 201402L
+throw (B,A)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   char *p;
   aligned i;
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr45605.C b/gcc/testsuite/g++.dg/tree-ssa/pr45605.C
index 4d83245..5460021 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/pr45605.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr45605.C
@@ -15,7 +15,11 @@ struct D : public B {
         } o; 
 
         struct Raiser { 
-            Raiser()  throw( int ) {throw 1;}; 
+            Raiser()
+#if __cplusplus <= 201402L
+	    throw( int )			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+	    {throw 1;}; 
         } raiser; 
       }; 
 }; 
diff --git a/gcc/testsuite/g++.dg/warn/Wreturn-type-3.C b/gcc/testsuite/g++.dg/warn/Wreturn-type-3.C
index f13d587..78a3851 100644
--- a/gcc/testsuite/g++.dg/warn/Wreturn-type-3.C
+++ b/gcc/testsuite/g++.dg/warn/Wreturn-type-3.C
@@ -3,7 +3,10 @@
 
 struct E{};
 
-inline int bar() throw(E)
+inline int bar()
+#if __cplusplus <= 201402L
+throw(E)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   return 0;
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/badalloc1.C b/gcc/testsuite/g++.old-deja/g++.eh/badalloc1.C
index f4f443b..31c7300 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/badalloc1.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/badalloc1.C
@@ -93,19 +93,28 @@ extern "C" void *realloc (void *p, size_t size)
   return r;
 }
 
-void fn_throw() throw(int)
+void fn_throw()
+#if __cplusplus <= 201402L
+throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   throw 1;
 }
 
-void fn_rethrow() throw(int)
+void fn_rethrow()
+#if __cplusplus <= 201402L
+throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   try{fn_throw();}
   catch(int a){
     throw;}
 }
 
-void fn_catchthrow() throw(int)
+void fn_catchthrow()
+#if __cplusplus <= 201402L
+throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   try{fn_throw();}
   catch(int a){
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/cleanup2.C b/gcc/testsuite/g++.old-deja/g++.eh/cleanup2.C
index 9538de9..d644512 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/cleanup2.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/cleanup2.C
@@ -18,11 +18,18 @@ static int thrower ()
 
 struct X
 {
-  X (int) throw (int);
+  X (int)
+#if __cplusplus <= 201402L
+  throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  ;
   ~X () throw ();
 };
 
-X::X (int) throw (int)
+X::X (int)
+#if __cplusplus <= 201402L
+  throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
   {printf ("in ctor X %s\n", __PRETTY_FUNCTION__); bad = true;}
 X::~X () throw ()
   {printf ("in dtor X %s\n", __PRETTY_FUNCTION__); bad = true;}
@@ -30,10 +37,17 @@ X::~X () throw ()
 struct X1 {};
 struct Y : X
 {
-  Y() throw (int);
+  Y()
+#if __cplusplus <= 201402L
+  throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  ;
   ~Y() throw ();
 };
-Y::Y() throw (int)
+Y::Y()
+#if __cplusplus <= 201402L
+  throw (int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
   : X(thrower ())   // throws, so X::X is never called
   {printf ("in ctor Y%s\n", __PRETTY_FUNCTION__); bad = true;}
 Y::~Y() throw ()
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/spec1.C b/gcc/testsuite/g++.old-deja/g++.eh/spec1.C
index 0ff8883..ea32045 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/spec1.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/spec1.C
@@ -1,4 +1,4 @@
-// { dg-do run  }
+// { dg-do run { target c++14_down } }
 // Testing exception specifications.
 // Test 1: the original exception succeeds.
 
@@ -9,7 +9,7 @@ void my_term ()  { exit (1); }
 void my_unexp () { throw 42; }
 
 void
-f () throw (char, int, std::bad_exception)
+f () throw (char, int, std::bad_exception)	// { dg-warning "deprecated" "" { target c++11 } }
 {
   throw 'a';
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/spec2.C b/gcc/testsuite/g++.old-deja/g++.eh/spec2.C
index 5c7a913..d1aa698 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/spec2.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/spec2.C
@@ -1,4 +1,4 @@
-// { dg-do run  }
+// { dg-do run { target c++14_down } }
 // Testing exception specifications.
 // Test 2: the second throw succeeds.
 
@@ -9,7 +9,7 @@ void my_term ()  { exit (1); }
 void my_unexp () { throw 42; }
 
 void
-f () throw (int, std::bad_exception)
+f () throw (int, std::bad_exception)	// { dg-warning "deprecated" "" { target c++11 } }
 {
   throw 'a';
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/spec3.C b/gcc/testsuite/g++.old-deja/g++.eh/spec3.C
index 6239270..1d57df3 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/spec3.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/spec3.C
@@ -1,4 +1,4 @@
-// { dg-do run  }
+// { dg-do run { target c++14_down } }
 // Testing exception specifications.
 // Test 3: the bad_exception throw succeeds.
 
@@ -9,7 +9,7 @@ void my_term ()  { exit (1); }
 void my_unexp () { throw 42; }
 
 void
-f () throw (std::bad_exception)
+f () throw (std::bad_exception)		// { dg-warning "deprecated" "" { target c++11 } }
 {
   throw 'a';
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/spec4.C b/gcc/testsuite/g++.old-deja/g++.eh/spec4.C
index e1f702e..e102239 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/spec4.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/spec4.C
@@ -1,4 +1,4 @@
-// { dg-do run  }
+// { dg-do run { target c++14_down } }
 // Testing exception specifications.
 // Test 4: all throws fail, call terminate.
 
@@ -9,7 +9,7 @@ void my_term ()  { exit (0); }
 void my_unexp () { throw 42; }
 
 void
-f () throw (short)
+f () throw (short)		// { dg-warning "deprecated" "" { target c++11 } }
 {
   throw 'a';
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/spec6.C b/gcc/testsuite/g++.old-deja/g++.eh/spec6.C
index 7aa474b..ed485e2 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/spec6.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/spec6.C
@@ -1,4 +1,5 @@
-// { dg-do assemble  }
+// { dg-do assemble { target c++14_down } }
+// { dg-additional-options "-Wno-deprecated" }
 
 // Copyright (C) 1999 Free Software Foundation, Inc.
 // Contributed by Nathan Sidwell 19 Jan 1999 <nathan@acm.org>
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/throw1.C b/gcc/testsuite/g++.old-deja/g++.eh/throw1.C
index e5d234b..3c4813e 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/throw1.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/throw1.C
@@ -1,6 +1,9 @@
 // { dg-do assemble  }
 
-void athrow(const int & e) throw(int)
+void athrow(const int & e)
+#if __cplusplus <= 201402L
+throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
    throw e;
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/throw2.C b/gcc/testsuite/g++.old-deja/g++.eh/throw2.C
index 63bb029..b4296ac 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/throw2.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/throw2.C
@@ -4,7 +4,10 @@
 
 #define ANY int // a class with a public constructor
 
-void athrow(const ANY & e) throw(ANY)
+void athrow(const ANY & e)
+#if __cplusplus <= 201402L
+throw(ANY)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
    throw e; // { dg-bogus "" } discarding const
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/tmpl1.C b/gcc/testsuite/g++.old-deja/g++.eh/tmpl1.C
index 985fcae..c64a522 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/tmpl1.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/tmpl1.C
@@ -1,6 +1,9 @@
 // { dg-do run  }
 template <class T>
-void f() throw (T)
+void f()
+#if __cplusplus <= 201402L
+throw (T)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   throw 7;
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.eh/tmpl3.C b/gcc/testsuite/g++.old-deja/g++.eh/tmpl3.C
index 0ddf63c..4fc869d 100644
--- a/gcc/testsuite/g++.old-deja/g++.eh/tmpl3.C
+++ b/gcc/testsuite/g++.old-deja/g++.eh/tmpl3.C
@@ -3,9 +3,16 @@
 // Posted by Trevor Taylor <ttaylor@powerup.com.au>
 
 template<class T> struct A {
-    void X() throw(T);
+    void X()
+#if __cplusplus <= 201402L
+    throw(T)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+    ;
 };
 
 template<class T>
 inline void A<T>::X() 
-throw(T) { } 
+#if __cplusplus <= 201402L
+throw(T)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+{ } 
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/eh15.C b/gcc/testsuite/g++.old-deja/g++.mike/eh15.C
index 4418f16..a8690be 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/eh15.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/eh15.C
@@ -1,6 +1,6 @@
-// { dg-do assemble  }
+// { dg-do assemble { target c++14_down } }
 // { dg-options "-fexceptions" }
 
 struct A {
-  A() throw (int);
+  A() throw (int);	// { dg-warning "deprecated" "" { target c++11 } }
 };
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/eh25.C b/gcc/testsuite/g++.old-deja/g++.mike/eh25.C
index f3728cf..a6ffd33 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/eh25.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/eh25.C
@@ -10,7 +10,13 @@ void my_terminate() {
 
 struct A {
   A() { }
-  ~A() throw(int) {
+  ~A()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#else
+  noexcept(false)
+#endif
+  {
     std::set_terminate (my_terminate);
     throw 1;		// This throws from EH dtor, should call my_terminate
   }
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/eh33.C b/gcc/testsuite/g++.old-deja/g++.mike/eh33.C
index b679991..cffb0c4 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/eh33.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/eh33.C
@@ -1,4 +1,5 @@
 // { dg-do run { xfail sparc64-*-elf z8k-*-* arm-*-pe } }
+// { dg-require-effective-target c++14_down }
 // { dg-options "-fexceptions" }
 
 #include <exception>
@@ -7,7 +8,7 @@ void my_unexpected() {
   throw 42;
 }
 
-void foo() throw (int) { throw "Hi"; }
+void foo() throw (int) { throw "Hi"; }	// { dg-warning "deprecated" "" { target c++11 } }
 
 int main() {
   std::set_unexpected (my_unexpected);
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/eh34.C b/gcc/testsuite/g++.old-deja/g++.mike/eh34.C
index 056f6b8..3218dd9 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/eh34.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/eh34.C
@@ -1,4 +1,5 @@
 // { dg-do run { xfail sparc64-*-elf arm-*-pe } }
+// { dg-require-effective-target c++14_down }
 
 #include <exception>
 #include <stdlib.h>
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/eh50.C b/gcc/testsuite/g++.old-deja/g++.mike/eh50.C
index de21dc2..0ebaab4 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/eh50.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/eh50.C
@@ -1,4 +1,5 @@
 // { dg-do run { xfail sparc64-*-elf z8k-*-* arm-*-pe } }
+// { dg-require-effective-target c++14_down }
 // { dg-options "-fexceptions" }
 
 #include <exception>
@@ -7,7 +8,7 @@ void my_unexpected() {
   throw 42;
 }
 
-template <class T> void foo(T) throw (int) { throw "Hi"; }
+template <class T> void foo(T) throw (int) { throw "Hi"; }	// { dg-warning "deprecated" "" { target c++11 } }
 
 main() {
   std::set_unexpected (my_unexpected);
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/eh51.C b/gcc/testsuite/g++.old-deja/g++.mike/eh51.C
index 073980a..7d3cd41 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/eh51.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/eh51.C
@@ -1,4 +1,5 @@
 // { dg-do run { xfail sparc64-*-elf z8k-*-* arm-*-pe } }
+// { dg-require-effective-target c++14_down }
 // { dg-options "-fexceptions" }
 
 #include <exception>
@@ -7,7 +8,7 @@ void my_unexpected() {
   throw 42;
 }
 
-template <class T> void foo(T) throw (T) { throw "Hi"; }
+template <class T> void foo(T) throw (T) { throw "Hi"; }	// { dg-warning "deprecated" "" { target c++11 } }
 
 main() {
   std::set_unexpected (my_unexpected);
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/eh55.C b/gcc/testsuite/g++.old-deja/g++.mike/eh55.C
index 14826807..8b574f5 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/eh55.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/eh55.C
@@ -9,7 +9,7 @@ void my_terminate_handler() {
 }
 
 void throw_an_unexpected_exception() throw() {
-  throw 1;
+  throw 1;	// { dg-warning "throw will always call terminate" "" { target c++1z } }
 }
 
 int main() {
diff --git a/gcc/testsuite/g++.old-deja/g++.mike/p10416.C b/gcc/testsuite/g++.old-deja/g++.mike/p10416.C
index 2d21d19..766a27b 100644
--- a/gcc/testsuite/g++.old-deja/g++.mike/p10416.C
+++ b/gcc/testsuite/g++.old-deja/g++.mike/p10416.C
@@ -5,5 +5,9 @@
 
 class not_ok {
 public:
-  void f() throw(int) { }
+  void f()
+#if __cplusplus <= 201402L
+  throw(int)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  { }
 };
diff --git a/gcc/testsuite/g++.old-deja/g++.other/crash28.C b/gcc/testsuite/g++.old-deja/g++.other/crash28.C
index c0f4bfa..5ff84ed 100644
--- a/gcc/testsuite/g++.old-deja/g++.other/crash28.C
+++ b/gcc/testsuite/g++.old-deja/g++.other/crash28.C
@@ -27,9 +27,16 @@ class foo
   bool b;
 public:
   foo();
-  void x () throw(bar);
+  void x ()
+#if __cplusplus <= 201402L
+  throw(bar)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
+  ;
 };
-void foo::x() throw(bar)
+void foo::x()
+#if __cplusplus <= 201402L
+throw(bar)			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
   if (!b) throw bar (static_cast<::N::X*>(this));	// { dg-error "lambda expressions|expected|invalid" } parse error
 }
diff --git a/gcc/testsuite/g++.old-deja/g++.other/crash30.C b/gcc/testsuite/g++.old-deja/g++.other/crash30.C
index 043d54f..50ecfd9 100644
--- a/gcc/testsuite/g++.old-deja/g++.other/crash30.C
+++ b/gcc/testsuite/g++.old-deja/g++.other/crash30.C
@@ -8,7 +8,7 @@ struct foo
 };
 
 void foo::x() throw(bar)	// { dg-error "" } parse error
-{
+{				// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } .-1 }
 }
 
 void bar()
diff --git a/gcc/testsuite/g++.old-deja/g++.other/new7.C b/gcc/testsuite/g++.old-deja/g++.other/new7.C
index 0c0643e..82eabb6 100644
--- a/gcc/testsuite/g++.old-deja/g++.other/new7.C
+++ b/gcc/testsuite/g++.old-deja/g++.other/new7.C
@@ -13,7 +13,10 @@ struct X {
   {
     throw 1;
   }
-  void* operator new ( std::size_t n ) throw ( std::bad_alloc )
+  void* operator new ( std::size_t n )
+#if __cplusplus <= 201402L
+  throw ( std::bad_alloc )			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
   {
     new_flag = true;
     return ::operator new( n );
diff --git a/gcc/testsuite/g++.old-deja/g++.pt/ehspec1.C b/gcc/testsuite/g++.old-deja/g++.pt/ehspec1.C
index e75704e..3630e1f 100644
--- a/gcc/testsuite/g++.old-deja/g++.pt/ehspec1.C
+++ b/gcc/testsuite/g++.old-deja/g++.pt/ehspec1.C
@@ -1,6 +1,6 @@
-// { dg-do assemble  }
+// { dg-do assemble { target c++14_down } }
 // Bug: g++ forgets to instantiate A<int>
 // Contributed by Jason Merrill <jason@cygnus.com>
 
 template <class T> struct A { };
-void f () throw (A<int>);
+void f () throw (A<int>);	// { dg-warning "deprecated" "" { target c++11 } }
diff --git a/gcc/testsuite/g++.old-deja/g++.robertl/eb123.C b/gcc/testsuite/g++.old-deja/g++.robertl/eb123.C
index cb4505f..b22f4eb 100644
--- a/gcc/testsuite/g++.old-deja/g++.robertl/eb123.C
+++ b/gcc/testsuite/g++.old-deja/g++.robertl/eb123.C
@@ -2,7 +2,10 @@
 // { dg-options "-O2 -W   " }
 #include "stdio.h"
 
-void writeNote() throw( int )
+void writeNote()
+#if __cplusplus <= 201402L
+throw( int )			// { dg-warning "deprecated" "" { target { c++11 && { ! c++1z } } } }
+#endif
 {
     printf( "hello world\n" );
     try { }
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 085b95a..15e1aaf 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-08  Jakub Jelinek  <jakub@redhat.com>
+
+	* testsuite/util/testsuite_new_operators.h: Include testsuite_hooks.h.
+	(operator new): Use THROW macro.
+
 2016-12-07  François Dumont  <fdumont@gcc.gnu.org>
 
 	* include/bits/stl_tree.h
diff --git a/libstdc++-v3/testsuite/util/testsuite_new_operators.h b/libstdc++-v3/testsuite/util/testsuite_new_operators.h
index 6713fb8..dea6fbe 100644
--- a/libstdc++-v3/testsuite/util/testsuite_new_operators.h
+++ b/libstdc++-v3/testsuite/util/testsuite_new_operators.h
@@ -23,6 +23,7 @@
 #define _GLIBCXX_TESTSUITE_NEW_OPERATORS_H
 
 #include <new>
+#include <testsuite_hooks.h>
 
 namespace __gnu_test
 {
@@ -38,7 +39,7 @@ namespace __gnu_test
   { get_new_limit() = l; }
 }
 
-void* operator new(std::size_t size) throw(std::bad_alloc)
+void* operator new(std::size_t size) THROW(std::bad_alloc)
 {
   if (size > __gnu_test::get_new_limit())
     throw std::bad_alloc();
-- 
cgit v1.1


From 1adc60671c93ef3618983155d5a4304e3421614c Mon Sep 17 00:00:00 2001
From: Christophe Lyon <christophe.lyon@linaro.org>
Date: Thu, 8 Dec 2016 09:23:29 +0000
Subject: Fix failing poly64 tests on ARM 2016-12-08  Christophe Lyon 
 <christophe.lyon@linaro.org>

	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
	(CHECK_CRYPTO): Remove.
	(expected_poly64x1_t, expected_poly64x2_t): Remove

From-SVN: r243430
---
 gcc/testsuite/ChangeLog                                   |  6 ++++++
 .../gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h  | 15 ---------------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b6b0096..c7d2773 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-08  Christophe Lyon  <christophe.lyon@linaro.org>
+
+	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+	(CHECK_CRYPTO): Remove.
+	(expected_poly64x1_t, expected_poly64x2_t): Remove
+
 2016-12-08  Jakub Jelinek  <jakub@redhat.com>
 
 	P0003R5 - removal of dynamic exception specification from C++17
diff --git a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
index beaf6ac..4728639 100644
--- a/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
+++ b/gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
@@ -99,13 +99,6 @@ extern size_t strlen(const char *);
     fprintf(stderr, "CHECKED %s %s\n", STR(VECT_TYPE(T, W, N)), MSG);	\
   }
 
-#if defined (__ARM_FEATURE_CRYPTO)
-#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT) \
-	       CHECK(MSG,T,W,N,FMT,EXPECTED,COMMENT)
-#else
-#define CHECK_CRYPTO(MSG,T,W,N,FMT,EXPECTED,COMMENT)
-#endif
-
 /* Floating-point variant.  */
 #define CHECK_FP(MSG,T,W,N,FMT,EXPECTED,COMMENT)			\
   {									\
@@ -198,9 +191,6 @@ extern ARRAY(expected, uint, 32, 2);
 extern ARRAY(expected, uint, 64, 1);
 extern ARRAY(expected, poly, 8, 8);
 extern ARRAY(expected, poly, 16, 4);
-#if defined (__ARM_FEATURE_CRYPTO)
-extern ARRAY(expected, poly, 64, 1);
-#endif
 extern ARRAY(expected, hfloat, 16, 4);
 extern ARRAY(expected, hfloat, 32, 2);
 extern ARRAY(expected, hfloat, 64, 1);
@@ -214,9 +204,6 @@ extern ARRAY(expected, uint, 32, 4);
 extern ARRAY(expected, uint, 64, 2);
 extern ARRAY(expected, poly, 8, 16);
 extern ARRAY(expected, poly, 16, 8);
-#if defined (__ARM_FEATURE_CRYPTO)
-extern ARRAY(expected, poly, 64, 2);
-#endif
 extern ARRAY(expected, hfloat, 16, 8);
 extern ARRAY(expected, hfloat, 32, 4);
 extern ARRAY(expected, hfloat, 64, 2);
@@ -233,7 +220,6 @@ extern ARRAY(expected, hfloat, 64, 2);
     CHECK(test_name, uint, 64, 1, PRIx64, EXPECTED, comment);		\
     CHECK(test_name, poly, 8, 8, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 4, PRIx16, EXPECTED, comment);		\
-    CHECK_CRYPTO(test_name, poly, 64, 1, PRIx64, EXPECTED, comment);	\
     CHECK_FP(test_name, float, 32, 2, PRIx32, EXPECTED, comment);	\
 									\
     CHECK(test_name, int, 8, 16, PRIx8, EXPECTED, comment);		\
@@ -246,7 +232,6 @@ extern ARRAY(expected, hfloat, 64, 2);
     CHECK(test_name, uint, 64, 2, PRIx64, EXPECTED, comment);		\
     CHECK(test_name, poly, 8, 16, PRIx8, EXPECTED, comment);		\
     CHECK(test_name, poly, 16, 8, PRIx16, EXPECTED, comment);		\
-    CHECK_CRYPTO(test_name, poly, 64, 2, PRIx64, EXPECTED, comment);	\
     CHECK_FP(test_name, float, 32, 4, PRIx32, EXPECTED, comment);	\
   }									\
 
-- 
cgit v1.1


From a5b03f2a17a7d41380de232bae2ab4800015407f Mon Sep 17 00:00:00 2001
From: Bin Cheng <bin.cheng@arm.com>
Date: Thu, 8 Dec 2016 10:56:41 +0000
Subject: re PR middle-end/78684 (ICE in create_intersect_range_checks_index,
 at tree-vect-loop-manip.c:2074)

	PR middle-end/78684
	* tree-vect-loop-manip.c (create_intersect_range_checks_index): Check
	sign bit for index step of data reference.
	gcc/testsuite
	PR middle-end/78684
	* g++.dg/torture/pr78684.C: New test.

From-SVN: r243431
---
 gcc/ChangeLog                          |  6 ++++++
 gcc/testsuite/ChangeLog                |  5 +++++
 gcc/testsuite/g++.dg/torture/pr78684.C | 20 ++++++++++++++++++++
 gcc/tree-vect-loop-manip.c             |  3 +--
 4 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr78684.C

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2026261..91a1ed3 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-08  Bin Cheng  <bin.cheng@arm.com>
+
+	PR middle-end/78684
+	* tree-vect-loop-manip.c (create_intersect_range_checks_index): Check
+	sign bit for index step of data reference.
+
 2016-12-08  Naveen H.S  <Naveen.Hurugalawadi@cavium.com>
 
 	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index c7d2773..904d74f 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2016-12-08  Bin Cheng  <bin.cheng@arm.com>
+
+	PR middle-end/78684
+	* g++.dg/torture/pr78684.C: New test.
+
 2016-12-08  Christophe Lyon  <christophe.lyon@linaro.org>
 
 	* gcc.target/aarch64/advsimd-intrinsics/arm-neon-ref.h
diff --git a/gcc/testsuite/g++.dg/torture/pr78684.C b/gcc/testsuite/g++.dg/torture/pr78684.C
new file mode 100644
index 0000000..5d71be5
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr78684.C
@@ -0,0 +1,20 @@
+// PR middle-end/78684
+// { dg-do compile }
+
+class a {
+public:
+  a(long);
+  void operator<<=(long) {
+    long b;
+    for (unsigned long c; c; c--)
+      d[c + b] = d[c];
+  }
+  a &g();
+  long d[28];
+};
+long e;
+int f;
+void j() {
+  a h(e), i = h;
+  i.g() <<= f;
+}
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index e13d6a2..beb2f06 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -2070,8 +2070,7 @@ create_intersect_range_checks_index (loop_vec_info loop_vinfo, tree *cond_expr,
       /* Index must have const step, otherwise DR_STEP won't be constant.  */
       gcc_assert (TREE_CODE (idx_step) == INTEGER_CST);
       /* Index must evaluate in the same direction as DR.  */
-      gcc_assert (!neg_step
-		  || tree_int_cst_compare (idx_step, size_zero_node) < 0);
+      gcc_assert (!neg_step || tree_int_cst_sign_bit (idx_step) == 1);
 
       tree min1 = CHREC_LEFT (access1);
       tree min2 = CHREC_LEFT (access2);
-- 
cgit v1.1


From 8a00c78748fd716f9936cb1a937fa1979581d2fe Mon Sep 17 00:00:00 2001
From: Pierre-Marie de Rodat <pmderodat@gcc.gnu.org>
Date: Thu, 8 Dec 2016 11:01:03 +0000
Subject: [PR78112] Remove platform-dependent checks in g++.dg/pr78112.C

... as there checks failed on many platforms. As a replacement, this
commit also adds a new testcase from source reduction. The hope is that
this new testcase will get a consistent output across all platforms.

gcc/testsuite/
	PR debug/78112
	* g++.dg/pr78112.C: Remove platform-dependent checks.
	* g++.dg/pr78112-2.C: New testcase.

From-SVN: r243432
---
 gcc/testsuite/g++.dg/pr78112-2.C | 13 +++++++++++++
 gcc/testsuite/g++.dg/pr78112.C   |  2 --
 2 files changed, 13 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/pr78112-2.C

diff --git a/gcc/testsuite/g++.dg/pr78112-2.C b/gcc/testsuite/g++.dg/pr78112-2.C
new file mode 100644
index 0000000..d9d18ff
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr78112-2.C
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-g -dA -gdwarf-4 -std=gnu++11" } */
+/* { dg-options "-g -dA -std=gnu++11 -gdwarf-4" } */
+/* { dg-final { scan-assembler-times DW_AT_object_pointer 18 } } */
+
+void run (int *int_p, void(*func)(int *)) { func (int_p); }
+namespace foo {
+   struct Foo {
+      int a;
+      Foo() { run (&a, [](int *int_p) { *int_p = 0; }); }
+   };
+}
+int main (void) { foo::Foo f; }
diff --git a/gcc/testsuite/g++.dg/pr78112.C b/gcc/testsuite/g++.dg/pr78112.C
index 986171d..8312292 100644
--- a/gcc/testsuite/g++.dg/pr78112.C
+++ b/gcc/testsuite/g++.dg/pr78112.C
@@ -1,7 +1,5 @@
 /* { dg-do compile } */
 /* { dg-options "-g -dA -std=gnu++11" } */
-/* { dg-final { scan-assembler-times DW_AT_inline 6 { xfail *-*-aix* } } } */
-/* { dg-final { scan-assembler-times DW_AT_object_pointer 37 { xfail *-*-aix* } } } */
 namespace std
 {
 template <typename _Tp> struct integral_constant
-- 
cgit v1.1


From 66110738f0003a5434ffbd7e18a54ea898981efc Mon Sep 17 00:00:00 2001
From: Pierre-Marie de Rodat <pmderodat@gcc.gnu.org>
Date: Thu, 8 Dec 2016 11:04:11 +0000
Subject: Add the missing ChangeLog entry for r243432

2016-12-08  Pierre-Marie de Rodat  <derodat@adacore.com>

	PR debug/78112
	* g++.dg/pr78112.C: Remove platform-dependent checks.
	* g++.dg/pr78112-2.C: New testcase.

From-SVN: r243433
---
 gcc/testsuite/ChangeLog | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 904d74f..7e6b6d0 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-08  Pierre-Marie de Rodat  <derodat@adacore.com>
+
+	PR debug/78112
+	* g++.dg/pr78112.C: Remove platform-dependent checks.
+	* g++.dg/pr78112-2.C: New testcase.
+
 2016-12-08  Bin Cheng  <bin.cheng@arm.com>
 
 	PR middle-end/78684
-- 
cgit v1.1


From eae0b895e0f4dce0601e27cc7c54c89be3c480b9 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Thu, 8 Dec 2016 12:08:14 +0000
Subject: PR71856 try to fix Parallel Mode assertions again

	PR libstdc++/71856
	* doc/xml/manual/using.xml: Document macro.
	* include/bits/c++config [_GLIBCXX_DEBUG || _GLIBCXX_PARALLEL]
	(__glibcxx_assert): Rename to __glibcxx_assert_impl.
	[_GLIBCXX_DEBUG] (__glibcxx_assert): Expand to __glibcxx_assert_impl.
	* include/parallel/base.h [_GLIBCXX_PARALLEL_ASSERTIONS]
	(_GLIBCXX_PARALLEL_ASSERT): Expand to __glibcxx_assert_impl.
	[!_GLIBCXX_PARALLEL_ASSERTIONS] (_GLIBCXX_PARALLEL_ASSERT): Define as
	empty.
	* testsuite/25_algorithms/headers/algorithm/
	parallel_algorithm_assert2.cc: New test.

From-SVN: r243434
---
 libstdc++-v3/ChangeLog                             | 14 ++++++
 libstdc++-v3/doc/xml/manual/using.xml              |  9 ++++
 libstdc++-v3/include/bits/c++config                | 13 ++++--
 libstdc++-v3/include/parallel/base.h               |  6 ++-
 .../algorithm/parallel_algorithm_assert2.cc        | 50 ++++++++++++++++++++++
 5 files changed, 87 insertions(+), 5 deletions(-)
 create mode 100644 libstdc++-v3/testsuite/25_algorithms/headers/algorithm/parallel_algorithm_assert2.cc

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 15e1aaf..42b02a0 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,17 @@
+2016-12-08  Jonathan Wakely  <jwakely@redhat.com>
+
+	PR libstdc++/71856
+	* doc/xml/manual/using.xml: Document macro.
+	* include/bits/c++config [_GLIBCXX_DEBUG || _GLIBCXX_PARALLEL]
+	(__glibcxx_assert): Rename to __glibcxx_assert_impl.
+	[_GLIBCXX_DEBUG] (__glibcxx_assert): Expand to __glibcxx_assert_impl.
+	* include/parallel/base.h [_GLIBCXX_PARALLEL_ASSERTIONS]
+	(_GLIBCXX_PARALLEL_ASSERT): Expand to __glibcxx_assert_impl.
+	[!_GLIBCXX_PARALLEL_ASSERTIONS] (_GLIBCXX_PARALLEL_ASSERT): Define as
+	empty.
+	* testsuite/25_algorithms/headers/algorithm/
+	parallel_algorithm_assert2.cc: New test.
+
 2016-12-08  Jakub Jelinek  <jakub@redhat.com>
 
 	* testsuite/util/testsuite_new_operators.h: Include testsuite_hooks.h.
diff --git a/libstdc++-v3/doc/xml/manual/using.xml b/libstdc++-v3/doc/xml/manual/using.xml
index ee76fef..c06ce16 100644
--- a/libstdc++-v3/doc/xml/manual/using.xml
+++ b/libstdc++-v3/doc/xml/manual/using.xml
@@ -948,6 +948,15 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 hello.cc -o test.exe
 	mode</link>.
       </para>
     </listitem></varlistentry>
+    <varlistentry><term><code>_GLIBCXX_PARALLEL_ASSERTIONS</code></term>
+    <listitem>
+      <para>Undefined by default, but when any parallel mode header is included
+      this macro will be defined to a non-zero value if
+      <code>_GLIBCXX_ASSERTIONS</code> has a non-zero value, otherwise to zero.
+      When defined to a non-zero value, it enables extra error checking and
+      assertions in the parallel mode.
+      </para>
+    </listitem></varlistentry>
 
     <varlistentry><term><code>_GLIBCXX_PROFILE</code></term>
     <listitem>
diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
index 8a27d14..39e55f4 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -434,9 +434,8 @@ namespace std
 #endif
 
 // Assert.
-#if !defined(_GLIBCXX_ASSERTIONS) && !defined(_GLIBCXX_PARALLEL)
-# define __glibcxx_assert(_Condition)
-#else
+#if defined(_GLIBCXX_ASSERTIONS) \
+  || defined(_GLIBCXX_PARALLEL) || defined(_GLIBCXX_PARALLEL_ASSERTIONS)
 namespace std
 {
   // Avoid the use of assert, because we're trying to keep the <cassert>
@@ -450,7 +449,7 @@ namespace std
     __builtin_abort();
   }
 }
-#define __glibcxx_assert(_Condition)				   	 \
+#define __glibcxx_assert_impl(_Condition)				 \
   do 									 \
   {							      		 \
     if (! (_Condition))                                                  \
@@ -459,6 +458,12 @@ namespace std
   } while (false)
 #endif
 
+#if defined(_GLIBCXX_ASSERTIONS)
+# define __glibcxx_assert(_Condition) __glibcxx_assert_impl(_Condition)
+#else
+# define __glibcxx_assert(_Condition)
+#endif
+
 // Macros for race detectors.
 // _GLIBCXX_SYNCHRONIZATION_HAPPENS_BEFORE(A) and
 // _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(A) should be used to explain
diff --git a/libstdc++-v3/include/parallel/base.h b/libstdc++-v3/include/parallel/base.h
index 7d4b69e..721c42d 100644
--- a/libstdc++-v3/include/parallel/base.h
+++ b/libstdc++-v3/include/parallel/base.h
@@ -419,7 +419,11 @@ namespace __gnu_parallel
 	}
     }
 
-#define _GLIBCXX_PARALLEL_ASSERT(_Condition) __glibcxx_assert(_Condition)
+#if _GLIBCXX_PARALLEL_ASSERTIONS && defined(__glibcxx_assert_impl)
+#define _GLIBCXX_PARALLEL_ASSERT(_Condition) __glibcxx_assert_impl(_Condition)
+#else
+#define _GLIBCXX_PARALLEL_ASSERT(_Condition)
+#endif
 
 } //namespace __gnu_parallel
 
diff --git a/libstdc++-v3/testsuite/25_algorithms/headers/algorithm/parallel_algorithm_assert2.cc b/libstdc++-v3/testsuite/25_algorithms/headers/algorithm/parallel_algorithm_assert2.cc
new file mode 100644
index 0000000..c40985e
--- /dev/null
+++ b/libstdc++-v3/testsuite/25_algorithms/headers/algorithm/parallel_algorithm_assert2.cc
@@ -0,0 +1,50 @@
+// { dg-require-parallel-mode "" }
+// { dg-options "-fopenmp -D_GLIBCXX_PARALLEL" { target *-*-* } }
+// { dg-do run }
+
+// Copyright (C) 2016 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// <http://www.gnu.org/licenses/>.
+
+#undef _GLIBCXX_DEBUG
+#undef _GLIBCXX_ASSERTIONS
+#undef _GLIBCXX_PARALLEL_ASSERTIONS
+#define _GLIBCXX_PARALLEL_ASSERTIONS 1
+#include <parallel/algorithm>
+#include <testsuite_hooks.h>
+
+void
+test01()
+{
+  // This should not be enabled without _GLIBCXX_ASSERTIONS:
+  __glibcxx_assert(false);
+}
+
+void
+test02()
+{
+  bool result = false;
+
+  // This should be enabled by _GLIBCXX_PARALLEL_ASSERTIONS:
+  _GLIBCXX_PARALLEL_ASSERT(result = true);
+  VERIFY(result);
+}
+
+int main()
+{
+  test01();
+  test02();
+}
-- 
cgit v1.1


From 1d752b4feec13afaae5ad9f6d24c0f4d83d674e1 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Thu, 8 Dec 2016 13:25:03 +0000
Subject: Delete std::swap for debug mode array

	* include/debug/array (swap): Add deleted overload.
	* include/bits/stl_pair.h (swap): Remove redundant inline keyword
	from deleted overload.
	* include/bits/unique_ptr.h (swap): Likewise.
	* include/std/array (swap): Likewise.
	* include/std/optional (swap): Likewise.
	* include/std/tuple (swap): Likewise.
	* include/std/variant (swap): Likewise.
	* testsuite/23_containers/array/tuple_interface/get_debug_neg.cc:
	Adjust dg-error line numbers.
	* testsuite/23_containers/array/tuple_interface/get_neg.cc: Likewise.
	* testsuite/23_containers/array/tuple_interface/
	tuple_element_debug_neg.cc: Likewise.
	* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
	Likewise.

From-SVN: r243437
---
 libstdc++-v3/ChangeLog                                   | 16 ++++++++++++++++
 libstdc++-v3/include/bits/stl_pair.h                     |  1 -
 libstdc++-v3/include/bits/unique_ptr.h                   |  1 -
 libstdc++-v3/include/debug/array                         |  8 ++++++++
 libstdc++-v3/include/std/array                           |  1 -
 libstdc++-v3/include/std/optional                        |  2 +-
 libstdc++-v3/include/std/tuple                           |  1 -
 libstdc++-v3/include/std/variant                         |  4 ++--
 .../23_containers/array/tuple_interface/get_debug_neg.cc |  4 ++--
 .../23_containers/array/tuple_interface/get_neg.cc       |  6 +++---
 .../array/tuple_interface/tuple_element_debug_neg.cc     |  2 +-
 .../array/tuple_interface/tuple_element_neg.cc           |  2 +-
 12 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 42b02a0..084f0d7 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,21 @@
 2016-12-08  Jonathan Wakely  <jwakely@redhat.com>
 
+	* include/debug/array (swap): Add deleted overload.
+	* include/bits/stl_pair.h (swap): Remove redundant inline keyword
+	from deleted overload.
+	* include/bits/unique_ptr.h (swap): Likewise.
+	* include/std/array (swap): Likewise.
+	* include/std/optional (swap): Likewise.
+	* include/std/tuple (swap): Likewise.
+	* include/std/variant (swap): Likewise.
+	* testsuite/23_containers/array/tuple_interface/get_debug_neg.cc:
+	Adjust dg-error line numbers.
+	* testsuite/23_containers/array/tuple_interface/get_neg.cc: Likewise.
+	* testsuite/23_containers/array/tuple_interface/
+	tuple_element_debug_neg.cc: Likewise.
+	* testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc:
+	Likewise.
+
 	PR libstdc++/71856
 	* doc/xml/manual/using.xml: Document macro.
 	* include/bits/c++config [_GLIBCXX_DEBUG || _GLIBCXX_PARALLEL]
diff --git a/libstdc++-v3/include/bits/stl_pair.h b/libstdc++-v3/include/bits/stl_pair.h
index 981dbeb..01c7134 100644
--- a/libstdc++-v3/include/bits/stl_pair.h
+++ b/libstdc++-v3/include/bits/stl_pair.h
@@ -481,7 +481,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
   template<typename _T1, typename _T2>
-    inline
     typename enable_if<!__and_<__is_swappable<_T1>,
 			       __is_swappable<_T2>>::value>::type
     swap(pair<_T1, _T2>&, pair<_T1, _T2>&) = delete;
diff --git a/libstdc++-v3/include/bits/unique_ptr.h b/libstdc++-v3/include/bits/unique_ptr.h
index 03f9bfc..56e6ec0 100644
--- a/libstdc++-v3/include/bits/unique_ptr.h
+++ b/libstdc++-v3/include/bits/unique_ptr.h
@@ -652,7 +652,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
   template<typename _Tp, typename _Dp>
-    inline
     typename enable_if<!__is_swappable<_Dp>::value>::type
     swap(unique_ptr<_Tp, _Dp>&,
 	 unique_ptr<_Tp, _Dp>&) = delete;
diff --git a/libstdc++-v3/include/debug/array b/libstdc++-v3/include/debug/array
index 48ab2fd..63e6808 100644
--- a/libstdc++-v3/include/debug/array
+++ b/libstdc++-v3/include/debug/array
@@ -260,6 +260,14 @@ namespace __debug
     { return !(__one < __two); }
 
   // Specialized algorithms.
+
+#if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
+  template<typename _Tp, size_t _Nm>
+    typename enable_if<
+      !_GLIBCXX_STD_C::__array_traits<_Tp, _Nm>::_Is_swappable::value>::type
+    swap(array<_Tp, _Nm>&, array<_Tp, _Nm>&) = delete;
+#endif
+
   template<typename _Tp, std::size_t _Nm>
     inline void
     swap(array<_Tp, _Nm>& __one, array<_Tp, _Nm>& __two)
diff --git a/libstdc++-v3/include/std/array b/libstdc++-v3/include/std/array
index fa7bac6..f5028c9 100644
--- a/libstdc++-v3/include/std/array
+++ b/libstdc++-v3/include/std/array
@@ -290,7 +290,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER
 
 #if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
   template<typename _Tp, std::size_t _Nm>
-    inline
     typename enable_if<
       !_GLIBCXX_STD_C::__array_traits<_Tp, _Nm>::_Is_swappable::value>::type
     swap(array<_Tp, _Nm>&, array<_Tp, _Nm>&) = delete;
diff --git a/libstdc++-v3/include/std/optional b/libstdc++-v3/include/std/optional
index 191d64b..3d69e10 100644
--- a/libstdc++-v3/include/std/optional
+++ b/libstdc++-v3/include/std/optional
@@ -930,7 +930,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { __lhs.swap(__rhs); }
 
   template<typename _Tp>
-    inline enable_if_t<!(is_move_constructible_v<_Tp> && is_swappable_v<_Tp>)>
+    enable_if_t<!(is_move_constructible_v<_Tp> && is_swappable_v<_Tp>)>
     swap(optional<_Tp>&, optional<_Tp>&) = delete;
 
   template<typename _Tp>
diff --git a/libstdc++-v3/include/std/tuple b/libstdc++-v3/include/std/tuple
index fb2fd17..13e0bf8 100644
--- a/libstdc++-v3/include/std/tuple
+++ b/libstdc++-v3/include/std/tuple
@@ -1588,7 +1588,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 #if __cplusplus > 201402L || !defined(__STRICT_ANSI__) // c++1z or gnu++11
   template<typename... _Elements>
-    inline
     typename enable_if<!__and_<__is_swappable<_Elements>...>::value>::type
     swap(tuple<_Elements...>&, tuple<_Elements...>&) = delete;
 #endif
diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index dd6109d..822674f 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -865,8 +865,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
     { __lhs.swap(__rhs); }
 
   template<typename... _Types>
-    inline enable_if_t<!__and_<is_move_constructible<_Types>...,
-			       is_swappable<_Types>...>::value>
+    enable_if_t<!__and_<is_move_constructible<_Types>...,
+			is_swappable<_Types>...>::value>
     swap(variant<_Types...>&, variant<_Types...>&) = delete;
 
   class bad_variant_access : public exception
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_debug_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_debug_neg.cc
index 6ad09d6..16761d3 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_debug_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_debug_neg.cc
@@ -27,6 +27,6 @@ int n1 = std::get<1>(a);
 int n2 = std::get<1>(std::move(a));
 int n3 = std::get<1>(ca);
 
-// { dg-error "static assertion failed" "" { target *-*-* } 273 }
-// { dg-error "static assertion failed" "" { target *-*-* } 282 }
+// { dg-error "static assertion failed" "" { target *-*-* } 281 }
 // { dg-error "static assertion failed" "" { target *-*-* } 290 }
+// { dg-error "static assertion failed" "" { target *-*-* } 298 }
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
index 568ec85..69d638b 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/get_neg.cc
@@ -27,6 +27,6 @@ int n1 = std::get<1>(a);
 int n2 = std::get<1>(std::move(a));
 int n3 = std::get<1>(ca);
 
-// { dg-error "static assertion failed" "" { target *-*-* } 303 }
-// { dg-error "static assertion failed" "" { target *-*-* } 312 }
-// { dg-error "static assertion failed" "" { target *-*-* } 320 }
+// { dg-error "static assertion failed" "" { target *-*-* } 302 }
+// { dg-error "static assertion failed" "" { target *-*-* } 311 }
+// { dg-error "static assertion failed" "" { target *-*-* } 319 }
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_debug_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_debug_neg.cc
index d5ab406..1f3a5ed 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_debug_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_debug_neg.cc
@@ -22,4 +22,4 @@
 
 typedef std::tuple_element<1, std::array<int, 1>>::type type;
 
-// { dg-error "static assertion failed" "" { target *-*-* } 308 }
+// { dg-error "static assertion failed" "" { target *-*-* } 316 }
diff --git a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
index 32cb10b..016c747 100644
--- a/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
+++ b/libstdc++-v3/testsuite/23_containers/array/tuple_interface/tuple_element_neg.cc
@@ -22,4 +22,4 @@
 
 typedef std::tuple_element<1, std::array<int, 1>>::type type;
 
-// { dg-error "static assertion failed" "" { target *-*-* } 351 }
+// { dg-error "static assertion failed" "" { target *-*-* } 350 }
-- 
cgit v1.1


From fdb0b271e860a8bbace2ff037ced0a1a6d17a000 Mon Sep 17 00:00:00 2001
From: Jonathan Wakely <jwakely@redhat.com>
Date: Thu, 8 Dec 2016 13:25:09 +0000
Subject: Fix filesystem test that fails in debug mode

	* testsuite/experimental/filesystem/path/construct/range.cc: Don't
	use basic_string::front() when string might be empty.

From-SVN: r243438
---
 libstdc++-v3/ChangeLog                                           | 3 +++
 .../testsuite/experimental/filesystem/path/construct/range.cc    | 9 +++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 084f0d7..5d8ee46 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,5 +1,8 @@
 2016-12-08  Jonathan Wakely  <jwakely@redhat.com>
 
+	* testsuite/experimental/filesystem/path/construct/range.cc: Don't
+	use basic_string::front() when string might be empty.
+
 	* include/debug/array (swap): Add deleted overload.
 	* include/bits/stl_pair.h (swap): Remove redundant inline keyword
 	from deleted overload.
diff --git a/libstdc++-v3/testsuite/experimental/filesystem/path/construct/range.cc b/libstdc++-v3/testsuite/experimental/filesystem/path/construct/range.cc
index 3dfec2f..9e51e0a 100644
--- a/libstdc++-v3/testsuite/experimental/filesystem/path/construct/range.cc
+++ b/libstdc++-v3/testsuite/experimental/filesystem/path/construct/range.cc
@@ -59,13 +59,14 @@ test01()
     using __gnu_test::test_container;
     using __gnu_test::input_iterator_wrapper;
     // Test with input iterators and const value_types
+
     test_container<char, input_iterator_wrapper>
-      r1(&s.front(), &s.front() + s.size());
+      r1((char*)s.c_str(), (char*)s.c_str() + s.size());
     path p9(r1.begin(), r1.end());
     compare_paths(p1, p9);
 
     test_container<char, input_iterator_wrapper>
-      r2(&s.front(), &s.front() + s.size() + 1); // includes null-terminator
+      r2((char*)s.c_str(), (char*)s.c_str() + s.size() + 1); // includes null-terminator
     path p10(r2.begin());
     compare_paths(p1, p10);
 
@@ -82,12 +83,12 @@ test01()
 #if _GLIBCXX_USE_WCHAR_T
     // Test with input iterators and const value_types
     test_container<wchar_t, input_iterator_wrapper>
-      r5(&ws.front(), &ws.front() + ws.size());
+      r5((wchar_t*)ws.c_str(), (wchar_t*)ws.c_str() + ws.size());
     path p13(r5.begin(), r5.end());
     compare_paths(p1, p13);
 
     test_container<wchar_t, input_iterator_wrapper>
-      r6(&ws.front(), &ws.front() + ws.size() + 1); // includes null-terminator
+      r6((wchar_t*)ws.c_str(), (wchar_t*)ws.c_str() + ws.size() + 1); // includes null-terminator
     path p14(r6.begin());
     compare_paths(p1, p14);
 
-- 
cgit v1.1


From 48d73a936497f71a489b8708b34e210bd02c06eb Mon Sep 17 00:00:00 2001
From: Joseph Myers <joseph@codesourcery.com>
Date: Thu, 8 Dec 2016 14:12:46 +0000
Subject: * es.po: Update.

From-SVN: r243439
---
 gcc/po/ChangeLog |   4 +
 gcc/po/es.po     | 611 ++++++++++++-------------------------------------------
 2 files changed, 131 insertions(+), 484 deletions(-)

diff --git a/gcc/po/ChangeLog b/gcc/po/ChangeLog
index fa76569..d083564 100644
--- a/gcc/po/ChangeLog
+++ b/gcc/po/ChangeLog
@@ -1,3 +1,7 @@
+2016-12-08  Joseph Myers  <joseph@codesourcery.com>
+
+	* es.po: Update.
+
 2016-12-05  Joseph Myers  <joseph@codesourcery.com>
 
 	* es.po, fr.po: Update.
diff --git a/gcc/po/es.po b/gcc/po/es.po
index 4172026..6aef088 100644
--- a/gcc/po/es.po
+++ b/gcc/po/es.po
@@ -26,6 +26,7 @@
 # scheduler       - planificador
 # SSA             - SSA
 # statement       - sentencia
+# Thumb           - TBD
 # ubsan           - ubsan
 #
 # advierte de -> avisa sobre
@@ -35,7 +36,7 @@ msgstr ""
 "Project-Id-Version: gcc 6.2.0\n"
 "Report-Msgid-Bugs-To: http://gcc.gnu.org/bugs.html\n"
 "POT-Creation-Date: 2016-08-19 21:03+0000\n"
-"PO-Revision-Date: 2016-12-04 14:54+0100\n"
+"PO-Revision-Date: 2016-12-08 00:01+0100\n"
 "Last-Translator: Antonio Ceballos <aceballos@gmail.com>\n"
 "Language-Team: Spanish <es@tp.org.es>\n"
 "Language: es\n"
@@ -302,294 +303,194 @@ msgid "Options:\n"
 msgstr "Opciones:\n"
 
 #: gcc.c:3384
-#, fuzzy
-#| msgid "  -pass-exit-codes         Exit with highest error code from a phase\n"
 msgid "  -pass-exit-codes         Exit with highest error code from a phase.\n"
 msgstr "  -pass-exit-codes         Sale con el código de error más alto de una fase.\n"
 
 #: gcc.c:3385
-#, fuzzy
-#| msgid "  --help                   Display this information\n"
 msgid "  --help                   Display this information.\n"
 msgstr "  --help                   Muestra esta información.\n"
 
 #: gcc.c:3386
-#, fuzzy
-#| msgid "  --target-help            Display target specific command line options\n"
 msgid "  --target-help            Display target specific command line options.\n"
 msgstr "  --target-help            Muestra opciones de línea de órdenes específicas del objetivo.\n"
 
 #: gcc.c:3387
-#, fuzzy
-#| msgid "  --help={common|optimizers|params|target|warnings|[^]{joined|separate|undocumented}}[,...]\n"
 msgid "  --help={common|optimizers|params|target|warnings|[^]{joined|separate|undocumented}}[,...].\n"
-msgstr "  --help={common|optimizers|params|target|warnings|[^]{joined|separate|undocumented}}[,...]\n"
+msgstr "  --help={common|optimizers|params|target|warnings|[^]{joined|separate|undocumented}}[,...].\n"
 
 #: gcc.c:3388
-#, fuzzy
-#| msgid "                           Display specific types of command line options\n"
 msgid "                           Display specific types of command line options.\n"
-msgstr "                           Muestra tipos específicos de opciones de línea de órdenes\n"
+msgstr "                           Muestra tipos específicos de opciones de línea de órdenes.\n"
 
 #: gcc.c:3390
-#, fuzzy
-#| msgid "  (Use '-v --help' to display command line options of sub-processes)\n"
 msgid "  (Use '-v --help' to display command line options of sub-processes).\n"
-msgstr "  (Use '-v --help' para mostrar las opciones de línea de órdenes de los subprocesos)\n"
+msgstr "  (Use '-v --help' para mostrar las opciones de línea de órdenes de los subprocesos).\n"
 
 #: gcc.c:3391
-#, fuzzy
-#| msgid "  --version                Display compiler version information\n"
 msgid "  --version                Display compiler version information.\n"
-msgstr "  --version                Muestra la información de versión del compilador\n"
+msgstr "  --version                Muestra la información de versión del compilador.\n"
 
 #: gcc.c:3392
-#, fuzzy
-#| msgid "  -dumpspecs               Display all of the built in spec strings\n"
 msgid "  -dumpspecs               Display all of the built in spec strings.\n"
-msgstr "  -dumpspecs               Muestra todas las cadenas internas de especificación\n"
+msgstr "  -dumpspecs               Muestra todas las cadenas internas de especificación.\n"
 
 #: gcc.c:3393
-#, fuzzy
-#| msgid "  -dumpversion             Display the version of the compiler\n"
 msgid "  -dumpversion             Display the version of the compiler.\n"
-msgstr "  -dumpversion             Muestra la versión del compilador\n"
+msgstr "  -dumpversion             Muestra la versión del compilador.\n"
 
 #: gcc.c:3394
-#, fuzzy
-#| msgid "  -dumpmachine             Display the compiler's target processor\n"
 msgid "  -dumpmachine             Display the compiler's target processor.\n"
-msgstr "  -dumpmachine             Muestra el procesador objetivo del compilador\n"
+msgstr "  -dumpmachine             Muestra el procesador objetivo del compilador.\n"
 
 #: gcc.c:3395
-#, fuzzy
-#| msgid "  -print-search-dirs       Display the directories in the compiler's search path\n"
 msgid "  -print-search-dirs       Display the directories in the compiler's search path.\n"
-msgstr "  -print-search-dirs       Muestra los directorios en la ruta de búsqueda del compilador\n"
+msgstr "  -print-search-dirs       Muestra los directorios en la ruta de búsqueda del compilador.\n"
 
 #: gcc.c:3396
-#, fuzzy
-#| msgid "  -print-libgcc-file-name  Display the name of the compiler's companion library\n"
 msgid "  -print-libgcc-file-name  Display the name of the compiler's companion library.\n"
-msgstr "  -print-libgcc-file-name  Muestra el nombre de la biblioteca que acompaña al compilador\n"
+msgstr "  -print-libgcc-file-name  Muestra el nombre de la biblioteca que acompaña al compilador.\n"
 
 #: gcc.c:3397
-#, fuzzy
-#| msgid "  -print-file-name=<lib>   Display the full path to library <lib>\n"
 msgid "  -print-file-name=<lib>   Display the full path to library <lib>.\n"
-msgstr "  -print-file-name=<bib>   Muestra la ruta completa a la biblioteca <bib>\n"
+msgstr "  -print-file-name=<bib>   Muestra la ruta completa a la biblioteca <bib>.\n"
 
 #: gcc.c:3398
-#, fuzzy
-#| msgid "  -print-prog-name=<prog>  Display the full path to compiler component <prog>\n"
 msgid "  -print-prog-name=<prog>  Display the full path to compiler component <prog>.\n"
-msgstr "  -print-prog-name=<prog>  Muestra la ruta completa del programa componente del compilador <prog>\n"
+msgstr "  -print-prog-name=<prog>  Muestra la ruta completa del programa componente del compilador <prog>.\n"
 
 #: gcc.c:3399
-#, fuzzy
-#| msgid ""
-#| "  -print-multi-lib         Display the mapping between command line options and\n"
-#| "                           multiple library search directories\n"
 msgid ""
 "  -print-multiarch         Display the target's normalized GNU triplet, used as\n"
 "                           a component in the library path.\n"
 msgstr ""
-"  -print-multi-lib         Muestra el mapeo entre las opciones de línea de órdenes\n"
-"                           y los múltiples directorios de búsqueda de bibliotecas\n"
+"  -print-multiarch         Muestra el triplete GNU normalizado del destino, usado como\n"
+"                           como componente en la ruta de la biblioteca.\n"
 
 #: gcc.c:3402
-#, fuzzy
-#| msgid "  -print-multi-directory   Display the root directory for versions of libgcc\n"
 msgid "  -print-multi-directory   Display the root directory for versions of libgcc.\n"
-msgstr "  -print-multi-directory   Muestra el directorio raíz para las versiones de libgcc\n"
+msgstr "  -print-multi-directory   Muestra el directorio raíz para las versiones de libgcc.\n"
 
 #: gcc.c:3403
-#, fuzzy
-#| msgid ""
-#| "  -print-multi-lib         Display the mapping between command line options and\n"
-#| "                           multiple library search directories\n"
 msgid ""
 "  -print-multi-lib         Display the mapping between command line options and\n"
 "                           multiple library search directories.\n"
 msgstr ""
 "  -print-multi-lib         Muestra el mapeo entre las opciones de línea de órdenes\n"
-"                           y los múltiples directorios de búsqueda de bibliotecas\n"
+"                           y los múltiples directorios de búsqueda de bibliotecas.\n"
 
 #: gcc.c:3406
-#, fuzzy
-#| msgid "  -print-multi-os-directory Display the relative path to OS libraries\n"
 msgid "  -print-multi-os-directory Display the relative path to OS libraries.\n"
-msgstr "  -print-multi-os-directory Muestra la ruta relativa para las bibliotecas del SO\n"
+msgstr "  -print-multi-os-directory Muestra la ruta relativa para las bibliotecas del SO.\n"
 
 #: gcc.c:3407
-#, fuzzy
-#| msgid "  -print-sysroot           Display the target libraries directory\n"
 msgid "  -print-sysroot           Display the target libraries directory.\n"
-msgstr "  -print-sysroot           Muestra el directorio de las bibliotecas objetivo\n"
+msgstr "  -print-sysroot           Muestra el directorio de las bibliotecas objetivo.\n"
 
 #: gcc.c:3408
-#, fuzzy
-#| msgid "  -print-sysroot-headers-suffix Display the sysroot suffix used to find headers\n"
 msgid "  -print-sysroot-headers-suffix Display the sysroot suffix used to find headers.\n"
-msgstr "  -print-sysroot-headers-suffix Muestra el sufijo sysroot que se usa para buscar encabezados\n"
+msgstr "  -print-sysroot-headers-suffix Muestra el sufijo sysroot que se usa para buscar encabezados.\n"
 
 #: gcc.c:3409
-#, fuzzy
-#| msgid "  -Wa,<options>            Pass comma-separated <options> on to the assembler\n"
 msgid "  -Wa,<options>            Pass comma-separated <options> on to the assembler.\n"
-msgstr "  -Wa,<opciones>           Pasa <opciones> separadas por coma al ensamblador\n"
+msgstr "  -Wa,<opciones>           Pasa <opciones> separadas por coma al ensamblador.\n"
 
 #: gcc.c:3410
-#, fuzzy
-#| msgid "  -Wp,<options>            Pass comma-separated <options> on to the preprocessor\n"
 msgid "  -Wp,<options>            Pass comma-separated <options> on to the preprocessor.\n"
-msgstr "  -Wp,<opciones>           Pasa <opciones> separadas por coma al preprocesador\n"
+msgstr "  -Wp,<opciones>           Pasa <opciones> separadas por coma al preprocesador.\n"
 
 #: gcc.c:3411
-#, fuzzy
-#| msgid "  -Wl,<options>            Pass comma-separated <options> on to the linker\n"
 msgid "  -Wl,<options>            Pass comma-separated <options> on to the linker.\n"
-msgstr "  -Wl,<opciones>           Pasa <opciones> separadas por coma al enlazador\n"
+msgstr "  -Wl,<opciones>           Pasa <opciones> separadas por coma al enlazador.\n"
 
 #: gcc.c:3412
-#, fuzzy
-#| msgid "  -Xassembler <arg>        Pass <arg> on to the assembler\n"
 msgid "  -Xassembler <arg>        Pass <arg> on to the assembler.\n"
-msgstr "  -Xassembler <argumento>  Pasa el <argumento> al ensamblador\n"
+msgstr "  -Xassembler <argumento>  Pasa el <argumento> al ensamblador.\n"
 
 #: gcc.c:3413
-#, fuzzy
-#| msgid "  -Xpreprocessor <arg>     Pass <arg> on to the preprocessor\n"
 msgid "  -Xpreprocessor <arg>     Pass <arg> on to the preprocessor.\n"
-msgstr "  -Xpreprocessor <argumento> Pasa el <argumento> al preprocesador\n"
+msgstr "  -Xpreprocessor <argumento> Pasa el <argumento> al preprocesador.\n"
 
 #: gcc.c:3414
-#, fuzzy
-#| msgid "  -Xlinker <arg>           Pass <arg> on to the linker\n"
 msgid "  -Xlinker <arg>           Pass <arg> on to the linker.\n"
-msgstr "  -Xlinker <argumento>     Pasa el <argumento> al enlazador\n"
+msgstr "  -Xlinker <argumento>     Pasa el <argumento> al enlazador.\n"
 
 #: gcc.c:3415
-#, fuzzy
-#| msgid "  -save-temps              Do not delete intermediate files\n"
 msgid "  -save-temps              Do not delete intermediate files.\n"
-msgstr "  -save-temps              No borra los ficheros intermedios\n"
+msgstr "  -save-temps              No borra los ficheros intermedios.\n"
 
 #: gcc.c:3416
-#, fuzzy
-#| msgid "  -save-temps=<arg>        Do not delete intermediate files\n"
 msgid "  -save-temps=<arg>        Do not delete intermediate files.\n"
-msgstr "  -save-temps=<arg>        No borra los ficheros intermedios\n"
+msgstr "  -save-temps=<arg>        No borra los ficheros intermedios.\n"
 
 #: gcc.c:3417
-#, fuzzy
-#| msgid ""
-#| "  -no-canonical-prefixes   Do not canonicalize paths when building relative\n"
-#| "                           prefixes to other gcc components\n"
 msgid ""
 "  -no-canonical-prefixes   Do not canonicalize paths when building relative\n"
 "                           prefixes to other gcc components.\n"
 msgstr ""
 "  -no-canonical-prefixes   No crea rutas completas al construir prefijos\n"
-"                           relativos a otros componentes de gcc\n"
+"                           relativos a otros componentes de gcc.\n"
 
 #: gcc.c:3420
-#, fuzzy
-#| msgid "  -pipe                    Use pipes rather than intermediate files\n"
 msgid "  -pipe                    Use pipes rather than intermediate files.\n"
-msgstr "  -pipe                    Usa tuberías en lugar de ficheros intermedios\n"
+msgstr "  -pipe                    Usa tuberías en lugar de ficheros intermedios.\n"
 
 #: gcc.c:3421
-#, fuzzy
-#| msgid "  -time                    Time the execution of each subprocess\n"
 msgid "  -time                    Time the execution of each subprocess.\n"
-msgstr "  -time                    Obtiene el tiempo de ejecución de cada subproceso\n"
+msgstr "  -time                    Obtiene el tiempo de ejecución de cada subproceso.\n"
 
 #: gcc.c:3422
-#, fuzzy
-#| msgid "  -specs=<file>            Override built-in specs with the contents of <file>\n"
 msgid "  -specs=<file>            Override built-in specs with the contents of <file>.\n"
-msgstr "  -specs=<fichero>         Sobrepone las especificaciones internas con el contenido del <fichero>\n"
+msgstr "  -specs=<fichero>         Sobrepone las especificaciones internas con el contenido del <fichero>.\n"
 
 #: gcc.c:3423
-#, fuzzy
-#| msgid "  -std=<standard>          Assume that the input sources are for <standard>\n"
 msgid "  -std=<standard>          Assume that the input sources are for <standard>.\n"
-msgstr "  -std=<estándar>          Asume que los ficheros de entrada son para el <estándar>\n"
+msgstr "  -std=<estándar>          Asume que los ficheros de entrada son para el <estándar>.\n"
 
 #: gcc.c:3424
-#, fuzzy
-#| msgid ""
-#| "  --sysroot=<directory>    Use <directory> as the root directory for headers\n"
-#| "                           and libraries\n"
 msgid ""
 "  --sysroot=<directory>    Use <directory> as the root directory for headers\n"
 "                           and libraries.\n"
 msgstr ""
 "  --sysroot=<directorio>   Usa el <directorio> como directorio raíz para\n"
-"                           los encabezados y bibliotecas\n"
+"                           los encabezados y bibliotecas.\n"
 
 #: gcc.c:3427
-#, fuzzy
-#| msgid "  -B <directory>           Add <directory> to the compiler's search paths\n"
 msgid "  -B <directory>           Add <directory> to the compiler's search paths.\n"
-msgstr "  -B <directorio>          Agrega el <directorio> a las rutas de búsqueda del compilador\n"
+msgstr "  -B <directorio>          Agrega el <directorio> a las rutas de búsqueda del compilador.\n"
 
 #: gcc.c:3428
-#, fuzzy
-#| msgid "  -v                       Display the programs invoked by the compiler\n"
 msgid "  -v                       Display the programs invoked by the compiler.\n"
-msgstr "  -v                       Muestra los programas invocados por el compilador\n"
+msgstr "  -v                       Muestra los programas invocados por el compilador.\n"
 
 #: gcc.c:3429
-#, fuzzy
-#| msgid "  -###                     Like -v but options quoted and commands not executed\n"
 msgid "  -###                     Like -v but options quoted and commands not executed.\n"
-msgstr "  -###                     Como -v pero no se ejecutan las opciones entre comillas y las órdenes\n"
+msgstr "  -###                     Como -v pero no se ejecutan las opciones entre comillas y las órdenes.\n"
 
 #: gcc.c:3430
-#, fuzzy
-#| msgid "  -E                       Preprocess only; do not compile, assemble or link\n"
 msgid "  -E                       Preprocess only; do not compile, assemble or link.\n"
-msgstr "  -E                       Solamente preprocesa; no compila, ensambla o enlaza\n"
+msgstr "  -E                       Solamente preprocesa; no compila, ensambla o enlaza.\n"
 
 #: gcc.c:3431
-#, fuzzy
-#| msgid "  -S                       Compile only; do not assemble or link\n"
 msgid "  -S                       Compile only; do not assemble or link.\n"
-msgstr "  -S                       Solamente compila; no ensambla o enlaza\n"
+msgstr "  -S                       Solamente compila; no ensambla o enlaza.\n"
 
 #: gcc.c:3432
-#, fuzzy
-#| msgid "  -c                       Compile and assemble, but do not link\n"
 msgid "  -c                       Compile and assemble, but do not link.\n"
-msgstr "  -c                       Compila y ensambla, pero no enlaza\n"
+msgstr "  -c                       Compila y ensambla, pero no enlaza.\n"
 
 #: gcc.c:3433
-#, fuzzy
-#| msgid "  -o <file>                Place the output into <file>\n"
 msgid "  -o <file>                Place the output into <file>.\n"
-msgstr "  -o <fichero>             Coloca la salida en el <fichero>\n"
+msgstr "  -o <fichero>             Coloca la salida en el <fichero>.\n"
 
 #: gcc.c:3434
-#, fuzzy
-#| msgid "  -pie                     Create a position independent executable\n"
 msgid "  -pie                     Create a position independent executable.\n"
-msgstr "  -pie                     Genera un ejecutable independiente de posición\n"
+msgstr "  -pie                     Genera un ejecutable independiente de posición.\n"
 
 #: gcc.c:3435
-#, fuzzy
-#| msgid "  -shared                  Create a shared library\n"
 msgid "  -shared                  Create a shared library.\n"
-msgstr "  -shared                  Crea una biblioteca compartida\n"
+msgstr "  -shared                  Crea una biblioteca compartida.\n"
 
 #: gcc.c:3436
-#, fuzzy
-#| msgid ""
-#| "  -x <language>            Specify the language of the following input files\n"
-#| "                           Permissible languages include: c c++ assembler none\n"
-#| "                           'none' means revert to the default behavior of\n"
-#| "                           guessing the language based on the file's extension\n"
 msgid ""
 "  -x <language>            Specify the language of the following input files.\n"
 "                           Permissible languages include: c c++ assembler none\n"
@@ -597,9 +498,9 @@ msgid ""
 "                           guessing the language based on the file's extension.\n"
 msgstr ""
 "  -x <lenguaje>            Especifica el lenguaje de los siguientes ficheros de entrada\n"
-"                           Los lenguajes permitidos incluyen: c c++ assembler none\n"
-"                           'none' significa revertir a la conducta habitual de\n"
-"                           adivinar el lenguaje basado en la extensión del fichero\n"
+"                           Los lenguajes permitidos son: c c++ assembler none\n"
+"                           'none' significa revertir al comportamiento habitual de\n"
+"                           adivinar el lenguaje por la extensión del fichero.\n"
 
 #: gcc.c:3443
 #, c-format
@@ -854,16 +755,14 @@ msgstr ""
 "\n"
 
 #: gcov-tool.c:510
-#, fuzzy, c-format
-#| msgid "  -h, --help                      Print this help, then exit\n"
+#, c-format
 msgid "  -h, --help                            Print this help, then exit\n"
-msgstr "  -h, --help                      Muestra esta información, y finaliza\n"
+msgstr "  -h, --help                            Muestra esta información, y finaliza\n"
 
 #: gcov-tool.c:511
-#, fuzzy, c-format
-#| msgid "  -v, --version                   Print version number, then exit\n"
+#, c-format
 msgid "  -v, --version                         Print version number, then exit\n"
-msgstr "  -v, --version                   Muestra el número de versión, y finaliza\n"
+msgstr "  -v, --version                         Muestra el número de versión, y finaliza\n"
 
 #: gcov-tool.c:515 gcov.c:492
 #, c-format
@@ -877,10 +776,9 @@ msgstr ""
 "%s.\n"
 
 #: gcov-tool.c:526
-#, fuzzy, c-format
-#| msgid "Copyright (C) 2001 Free Software Foundation, Inc.\n"
+#, c-format
 msgid "Copyright %s 2014-2016 Free Software Foundation, Inc.\n"
-msgstr "Copyright (C) 2001 Free Software Foundation, Inc.\n"
+msgstr "Copyright %s 2014-2016 Free Software Foundation, Inc.\n"
 
 #: gcov-tool.c:529 gcov.c:506
 #, c-format
@@ -929,10 +827,7 @@ msgid "  -b, --branch-probabilities      Include branch probabilities in output\
 msgstr "  -b, --branch-probabilities      Incluye las probabilidades de ramificación en la salida\n"
 
 #: gcov.c:477
-#, fuzzy, c-format
-#| msgid ""
-#| "  -c, --branch-counts             Given counts of branches taken\n"
-#| "                                    rather than percentages\n"
+#, c-format
 msgid ""
 "  -c, --branch-counts             Output counts of branches taken\n"
 "                                    rather than percentages\n"
@@ -1035,34 +930,29 @@ msgid "\n"
 msgstr "\n"
 
 #: gcov.c:856
-#, fuzzy, c-format
-#| msgid "%s: Cannot open output file: %s\n"
+#, c-format
 msgid "Cannot open intermediate output file %s\n"
-msgstr "%s: No se puede abrir el fichero de salida: %s\n"
+msgstr "No se puede abrir el fichero de salida intermedia %s\n"
 
 #: gcov.c:1139
-#, fuzzy, c-format
-#| msgid "%s:source file is newer than graph file '%s'\n"
+#, c-format
 msgid "%s:source file is newer than notes file '%s'\n"
-msgstr "%s:el fichero fuente es más nuevo que el fichero de grafo '%s'\n"
+msgstr "%s:el fichero fuente es más nuevo que el fichero de notas '%s'\n"
 
 #: gcov.c:1144
-#, fuzzy, c-format
-#| msgid "(the message is only displayed one per source file)\n"
+#, c-format
 msgid "(the message is displayed only once per source file)\n"
-msgstr "(el mensaje sólo se muestra una vez por cada fichero de código)\n"
+msgstr "(el mensaje sólo se muestra una vez por cada fichero de código fuente)\n"
 
 #: gcov.c:1169
-#, fuzzy, c-format
-#| msgid "%s:cannot open graph file\n"
+#, c-format
 msgid "%s:cannot open notes file\n"
-msgstr "%s:no se puede abrir el fichero de grafo\n"
+msgstr "%s:no se puede abrir el fichero de notas\n"
 
 #: gcov.c:1175
-#, fuzzy, c-format
-#| msgid "%s:not a gcov data file\n"
+#, c-format
 msgid "%s:not a gcov notes file\n"
-msgstr "%s:no es un fichero de datos gcov\n"
+msgstr "%s:no es un fichero de notas gcov\n"
 
 #: gcov.c:1188
 #, c-format
@@ -1100,10 +990,9 @@ msgid "%s:version '%.4s', prefer version '%.4s'\n"
 msgstr "%s:versión '%.4s', se prefiere la versión '%.4s'\n"
 
 #: gcov.c:1417
-#, fuzzy, c-format
-#| msgid "%s:stamp mismatch with graph file\n"
+#, c-format
 msgid "%s:stamp mismatch with notes file\n"
-msgstr "%s:no coincide la marca con el fichero de grafo\n"
+msgstr "%s:no coincide la marca con el fichero de notas\n"
 
 #: gcov.c:1452
 #, c-format
@@ -1234,10 +1123,9 @@ msgid "function returns address of local variable"
 msgstr "la función devuelve la dirección de una variable local"
 
 #: gimple-ssa-isolate-paths.c:442 gimple-ssa-isolate-paths.c:287
-#, fuzzy, gcc-internal-format
-#| msgid "function returns address of local variable"
+#, gcc-internal-format
 msgid "function may return address of local variable"
-msgstr "la función devuelve la dirección de una variable local"
+msgstr "la función puede devolver la dirección de una variable local"
 
 #: incpath.c:72
 #, c-format
@@ -1302,16 +1190,12 @@ msgid "In function %qs"
 msgstr "En la función %qs"
 
 #: langhooks.c:448 cp/error.c:3268
-#, fuzzy
-#| msgid "    inlined from %qs at %s:%d:%d"
 msgid "    inlined from %qs at %r%s:%d:%d%R"
-msgstr "    incluido en línea de %qs en %s:%d:%d"
+msgstr "    incluido en línea de %qs en %r%s:%d:%d%R"
 
 #: langhooks.c:453 cp/error.c:3273
-#, fuzzy
-#| msgid "    inlined from %qs at %s:%d"
 msgid "    inlined from %qs at %r%s:%d%R"
-msgstr "    incluido en línea de %qs en %s:%d"
+msgstr "    incluido en línea de %qs en %r%s:%d%R"
 
 #: langhooks.c:459 cp/error.c:3279
 #, c-format
@@ -1384,10 +1268,9 @@ msgid " No options with the desired characteristics were found\n"
 msgstr " No se encontraron opciones con las características deseadas\n"
 
 #: opts.c:1246
-#, fuzzy, c-format
-#| msgid " None found.  Use --help=%s to show *all* the options supported by the %s front-end\n"
+#, c-format
 msgid " None found.  Use --help=%s to show *all* the options supported by the %s front-end.\n"
-msgstr " No se encontró ninguna.  Use --help=%s para mostrar *todas* las opciones admitidas por el frente %s\n"
+msgstr " No se encontró ninguna.  Use --help=%s para mostrar *todas* las opciones admitidas por el frente %s.\n"
 
 #: opts.c:1252
 #, c-format
@@ -1526,10 +1409,9 @@ msgid "%s%s%s %sversion %s (%s) compiled by CC, "
 msgstr "%s%s%s %sversión %s (%s) compilado por CC, "
 
 #: toplev.c:644
-#, fuzzy, c-format
-#| msgid "GMP version %s, MPFR version %s, MPC version %s\n"
+#, c-format
 msgid "GMP version %s, MPFR version %s, MPC version %s, isl version %s\n"
-msgstr "GMP versión %s, MPFR versión %s, MPC versión %s\n"
+msgstr "GMP versión %s, MPFR versión %s, MPC versión %s, isl versión %s\n"
 
 #: toplev.c:646
 #, c-format
@@ -1560,10 +1442,8 @@ msgid "function not considered for inlining"
 msgstr "la función no se consideró para la inclusión en línea"
 
 #: cif-code.def:43
-#, fuzzy
-#| msgid "%qE is not initialized"
 msgid "caller is not optimized"
-msgstr "%qE no está inicializado"
+msgstr "el que llama no está optimizado"
 
 #: cif-code.def:47
 msgid "function body not available"
@@ -1796,8 +1676,7 @@ msgid "The maximum length of scheduling's pending operations list."
 msgstr "La longitud máxima de la lista de operaciones pendientes del planificador."
 
 #: params.def:180
-#, fuzzy, no-c-format
-#| msgid "The maximum number of backtrack attempts the scheduler should make when modulo scheduling a loop"
+#, no-c-format
 msgid "The maximum number of backtrack attempts the scheduler should make when modulo scheduling a loop."
 msgstr "El número máximo de intentos hacia atrás que debe hacer el planificador cuando planifique un bucle módulo."
 
@@ -3032,22 +2911,19 @@ msgid "invalid %%xn code"
 msgstr "código %%xn inválido"
 
 #: config/alpha/alpha.c:5499
-#, fuzzy, c-format
-#| msgid "invalid address"
+#, c-format
 msgid "invalid operand address"
-msgstr "dirección inválida"
+msgstr "dirección de operando no válida"
 
 #: config/arc/arc.c:2966
-#, fuzzy, c-format
-#| msgid "invalid operand to %%s code"
+#, c-format
 msgid "invalid operand to %%Z code"
-msgstr "operando inválido para el código %%s"
+msgstr "operando no válido para el código %%Z"
 
 #: config/arc/arc.c:2974
-#, fuzzy, c-format
-#| msgid "invalid operand to %%s code"
+#, c-format
 msgid "invalid operand to %%z code"
-msgstr "operando inválido para el código %%s"
+msgstr "operando no válido para el código %%z"
 
 #: config/arc/arc.c:2982
 #, fuzzy, c-format
@@ -9469,7 +9345,7 @@ msgstr "Trata de evitar que se generen secuencias de instrucciones que contengan
 
 #: config/i386/stringop.opt:8
 msgid "the Free Software Foundation; either version 3, or (at your option)"
-msgstr ""
+msgstr "la Free Software Foundation; bien versión 3, o (a su elección)"
 
 #: config/i386/stringop.opt:13
 msgid "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"
@@ -9477,391 +9353,267 @@ msgstr ""
 
 #: config/i386/stringop.opt:18
 msgid "see <http://www.gnu.org/licenses/>.  */"
-msgstr ""
+msgstr "véase <http://www.gnu.org/licenses/>.  */"
 
 #: config/pa/pa64-hpux.opt:23
-#, fuzzy
-#| msgid "Assume code will be linked by GNU ld"
 msgid "Assume code will be linked by GNU ld."
-msgstr "Asume que el código se enlazará por ld de GNU."
+msgstr "Asume que el código se enlazará mediante ld de GNU."
 
 #: config/pa/pa64-hpux.opt:27
-#, fuzzy
-#| msgid "Assume code will be linked by HP ld"
 msgid "Assume code will be linked by HP ld."
-msgstr "Asume que el código se enlazará por ld de HP."
+msgstr "Asume que el código se enlazará mediante ld de HP."
 
 #: config/pa/pa-hpux1010.opt:23 config/pa/pa-hpux.opt:31
 #: config/pa/pa-hpux1131.opt:23 config/pa/pa-hpux1111.opt:23
-#, fuzzy
-#| msgid "Specify UNIX standard for predefines and linking"
 msgid "Specify UNIX standard for predefines and linking."
-msgstr "Especifica el estándar UNIX para las predefiniciones y enlace."
+msgstr "Especifica el estándar UNIX para las predefiniciones y enlazado."
 
 #: config/pa/pa-hpux.opt:27
-#, fuzzy
-#| msgid "Generate cpp defines for server IO"
 msgid "Generate cpp defines for server IO."
 msgstr "Genera definiciones cpp para ES de servidor."
 
 #: config/pa/pa-hpux.opt:35
-#, fuzzy
-#| msgid "Generate cpp defines for workstation IO"
 msgid "Generate cpp defines for workstation IO."
 msgstr "Genera definiciones cpp para ES de estación de trabajo."
 
 #: config/pa/pa.opt:30 config/pa/pa.opt:83 config/pa/pa.opt:91
-#, fuzzy
-#| msgid "Generate PA1.0 code"
 msgid "Generate PA1.0 code."
 msgstr "Genera código PA1.0."
 
 #: config/pa/pa.opt:34 config/pa/pa.opt:95 config/pa/pa.opt:136
-#, fuzzy
-#| msgid "Generate PA1.1 code"
 msgid "Generate PA1.1 code."
 msgstr "Genera código PA1.1."
 
 #: config/pa/pa.opt:38 config/pa/pa.opt:99
-#, fuzzy
-#| msgid "Generate PA2.0 code (requires binutils 2.10 or later)"
 msgid "Generate PA2.0 code (requires binutils 2.10 or later)."
 msgstr "Genera código PA2.0 (requiere binutils 2.10 o superior)."
 
 #: config/pa/pa.opt:46
-#, fuzzy
-#| msgid "Disable FP regs"
 msgid "Disable FP regs."
 msgstr "Desactiva los registros FP."
 
 #: config/pa/pa.opt:50
-#, fuzzy
-#| msgid "Disable indexed addressing"
 msgid "Disable indexed addressing."
-msgstr "Desactiva el direccionamiento indizado."
+msgstr "Desactiva el direccionamiento indexado."
 
 #: config/pa/pa.opt:54
-#, fuzzy
-#| msgid "Generate fast indirect calls"
 msgid "Generate fast indirect calls."
 msgstr "Genera llamadas indirectas rápidas."
 
 #: config/pa/pa.opt:62
-#, fuzzy
-#| msgid "Assume code will be assembled by GAS"
 msgid "Assume code will be assembled by GAS."
-msgstr "Asume que el código será ensamblado por GAS."
+msgstr "Asume que el código será ensamblado mediante GAS."
 
 #: config/pa/pa.opt:71
-#, fuzzy
-#| msgid "Enable linker optimizations"
 msgid "Enable linker optimizations."
 msgstr "Activa las optimizaciones del enlazador."
 
 #: config/pa/pa.opt:75
-#, fuzzy
-#| msgid "Always generate long calls"
 msgid "Always generate long calls."
 msgstr "Genera siempre llamadas long."
 
 #: config/pa/pa.opt:79
-#, fuzzy
-#| msgid "Emit long load/store sequences"
 msgid "Emit long load/store sequences."
 msgstr "Emite secuencias load/store long."
 
 #: config/pa/pa.opt:87
-#, fuzzy
-#| msgid "Disable space regs"
 msgid "Disable space regs."
 msgstr "Desactiva los registros de espacio."
 
 #: config/pa/pa.opt:103
-#, fuzzy
-#| msgid "Use portable calling conventions"
 msgid "Use portable calling conventions."
-msgstr "Usa convenciones de llamada transportable."
+msgstr "Usa convenios de llamada transportable."
 
 #: config/pa/pa.opt:107
-#, fuzzy
-#| msgid "Specify CPU for scheduling purposes.  Valid arguments are 700, 7100, 7100LC, 7200, 7300, and 8000"
 msgid "Specify CPU for scheduling purposes.  Valid arguments are 700, 7100, 7100LC, 7200, 7300, and 8000."
-msgstr "Especifica el CPU por razones de planificación.  Los argumentos válidos son 700, 7100, 7100LC, 7200, 7300, y 8000."
+msgstr "Especifica la CPU por razones de planificación.  Los argumentos válidos son 700, 7100, 7100LC, 7200, 7300, y 8000."
 
 #: config/pa/pa.opt:132 config/frv/frv.opt:215
-#, fuzzy
-#| msgid "Use software floating point"
 msgid "Use software floating point."
 msgstr "Usa coma flotante de software."
 
 #: config/pa/pa.opt:140
-#, fuzzy
-#| msgid "Do not disable space regs"
 msgid "Do not disable space regs."
 msgstr "No desactiva los registros de espacio."
 
 #: config/v850/v850.opt:29
-#, fuzzy
-#| msgid "Use registers r2 and r5"
 msgid "Use registers r2 and r5."
 msgstr "Usa los registros r2 y r5."
 
 #: config/v850/v850.opt:33
-#, fuzzy
-#| msgid "Use 4 byte entries in switch tables"
 msgid "Use 4 byte entries in switch tables."
 msgstr "Usa entradas de 4 byte en las tablas de switch."
 
 # No me gusta mucho esta traducción. cfuga
 #: config/v850/v850.opt:37
-#, fuzzy
-#| msgid "Enable backend debugging"
 msgid "Enable backend debugging."
 msgstr "Habilita la depuración del extremo trasero."
 
 #: config/v850/v850.opt:41
-#, fuzzy
-#| msgid "Do not use the callt instruction"
 msgid "Do not use the callt instruction (default)."
-msgstr "No usa la instrucción callt."
+msgstr "No usa la instrucción callt (predeterminado)."
 
 #: config/v850/v850.opt:45
-#, fuzzy
-#| msgid "Reuse r30 on a per function basis"
 msgid "Reuse r30 on a per function basis."
 msgstr "Reusa r30 basado por función."
 
 #: config/v850/v850.opt:52
-#, fuzzy
-#| msgid "Prohibit PC relative function calls"
 msgid "Prohibit PC relative function calls."
 msgstr "Prohíbe la llamada a funciones relativas al PC."
 
 #: config/v850/v850.opt:56
-#, fuzzy
-#| msgid "Use stubs for function prologues"
 msgid "Use stubs for function prologues."
-msgstr "Usa cabos para los prólogos de función."
+msgstr "Usa stubs para los prólogos de función."
 
 #: config/v850/v850.opt:60
-#, fuzzy
-#| msgid "Set the max size of data eligible for the SDA area"
 msgid "Set the max size of data eligible for the SDA area."
 msgstr "Establece el tamaño máximo de datos elegibles para el área SDA."
 
 #: config/v850/v850.opt:67
-#, fuzzy
-#| msgid "Enable the use of the short load instructions"
 msgid "Enable the use of the short load instructions."
 msgstr "Activa el uso de las instrucciones short load."
 
 #: config/v850/v850.opt:71
-#, fuzzy
-#| msgid "Same as: -mep -mprolog-function"
 msgid "Same as: -mep -mprolog-function."
 msgstr "Igual que: -mep -mprolog-function."
 
 #: config/v850/v850.opt:75
-#, fuzzy
-#| msgid "Set the max size of data eligible for the TDA area"
 msgid "Set the max size of data eligible for the TDA area."
 msgstr "Establece el tamaño máximo de datos elegibles para el área TDA."
 
 #: config/v850/v850.opt:82
-#, fuzzy
-#| msgid "Do not enforce strict alignment"
 msgid "Do not enforce strict alignment."
 msgstr "No refuerza la alineación estricta."
 
 #: config/v850/v850.opt:86
-#, fuzzy
-#| msgid "Put jump tables for switch statements into the .data section rather than the .code section"
 msgid "Put jump tables for switch statements into the .data section rather than the .code section."
 msgstr "Coloca las tablas de salto para las declaraciones switch en la sección .data en lugar de la sección .code."
 
 #: config/v850/v850.opt:93
-#, fuzzy
-#| msgid "Compile for the v850 processor"
 msgid "Compile for the v850 processor."
 msgstr "Compila para el procesador v850."
 
 #: config/v850/v850.opt:97
-#, fuzzy
-#| msgid "Compile for the v850e processor"
 msgid "Compile for the v850e processor."
 msgstr "Compila para el procesador v850e."
 
 #: config/v850/v850.opt:101
-#, fuzzy
-#| msgid "Compile for the v850e1 processor"
 msgid "Compile for the v850e1 processor."
 msgstr "Compila para el procesador v850e1."
 
 #: config/v850/v850.opt:105
-#, fuzzy
-#| msgid "Compile for the v850es variant of the v850e1"
 msgid "Compile for the v850es variant of the v850e1."
 msgstr "Compila para la variante v850es del procesador v850e1."
 
 #: config/v850/v850.opt:109
-#, fuzzy
-#| msgid "Compile for the v850e2 processor"
 msgid "Compile for the v850e2 processor."
 msgstr "Compila para el procesador v850e2."
 
 #: config/v850/v850.opt:113
-#, fuzzy
-#| msgid "Compile for the v850e2v3 processor"
 msgid "Compile for the v850e2v3 processor."
 msgstr "Compila para el procesador v850e2v3."
 
 #: config/v850/v850.opt:117
-#, fuzzy
-#| msgid "Compile for the v850e processor"
 msgid "Compile for the v850e3v5 processor."
-msgstr "Compila para el procesador v850e."
+msgstr "Compila para el procesador v850e3v5."
 
 #: config/v850/v850.opt:124
-#, fuzzy
-#| msgid "Enable clip instructions"
 msgid "Enable v850e3v5 loop instructions."
-msgstr "Activa las instrucciones clip."
+msgstr "Activa las instrucciones de bucle del v850e3v5."
 
 #: config/v850/v850.opt:128
-#, fuzzy
-#| msgid "Set the max size of data eligible for the ZDA area"
 msgid "Set the max size of data eligible for the ZDA area."
 msgstr "Establece el tamaño máximo de datos elegibles para el área ZDA."
 
 #: config/v850/v850.opt:135
-#, fuzzy
-#| msgid "Enable compatibility with ic960 assembler"
 msgid "Enable relaxing in the assembler."
-msgstr "Activar la compatibilidad con el ensamblador ic960."
+msgstr "Activar la relajación en el ensamblador."
 
 #: config/v850/v850.opt:139
-#, fuzzy
-#| msgid "Prohibit PC relative function calls"
 msgid "Prohibit PC relative jumps."
-msgstr "Prohíbe la llamada a funciones relativas al PC."
+msgstr "Prohíbe los saltos relativos al PC."
 
 #: config/v850/v850.opt:143
-#, fuzzy
-#| msgid "Prevent the use of all hardware floating-point instructions"
 msgid "Inhibit the use of hardware floating point instructions."
-msgstr "Previene el uso de todas las instrucciones de coma flotante de hardware."
+msgstr "Inhibe el uso de las instrucciones de coma flotante de hardware."
 
 #: config/v850/v850.opt:147
-#, fuzzy
-#| msgid "Allow the use of hardware floating-point ABI and instructions"
 msgid "Allow the use of hardware floating point instructions for V850E2V3 and up."
-msgstr "Permite el uso de la ABI y las instrucciones de coma flotante de hardware."
+msgstr "Permite el uso de las instrucciones de coma flotante de hardware para el V850E2V3 y superior."
 
 #: config/v850/v850.opt:151
-#, fuzzy
-#| msgid "Enable the use of RX FPU instructions.  This is the default."
 msgid "Enable support for the RH850 ABI.  This is the default."
-msgstr "Activa el uso de las instrucciones de FPU RX.  Este es el valor por defecto."
+msgstr "Activa el soporte del ABI del RH850.  Este es el valor por defecto."
 
 #: config/v850/v850.opt:155
-#, fuzzy
-#| msgid "Enable support for huge objects"
 msgid "Enable support for the old GCC ABI."
-msgstr "Activa el soporte para objetos enormes."
+msgstr "Activa el soporte para el antiguo ABI de GCC."
 
 #: config/v850/v850.opt:159
 msgid "Support alignments of up to 64-bits."
-msgstr ""
+msgstr "Activa alineamientos de hasta 64 bits."
 
 #: config/g.opt:27
-#, fuzzy
-#| msgid "-G<number>\tPut global and static data smaller than <number> bytes into a special section (on some targets)"
 msgid "-G<number>\tPut global and static data smaller than <number> bytes into a special section (on some targets)."
 msgstr "-G<número>\tColoca los datos globales y estáticos más pequeños que <número> bytes en una sección especial (en algunos objetivos)."
 
 #: config/lynx.opt:23
-#, fuzzy
-#| msgid "Support legacy multi-threading"
 msgid "Support legacy multi-threading."
 msgstr "Soporte para multihilos antiguo."
 
 #: config/lynx.opt:27
-#, fuzzy
-#| msgid "Use shared libraries"
 msgid "Use shared libraries."
 msgstr "Usa bibliotecas compartidas."
 
 #: config/lynx.opt:31
-#, fuzzy
-#| msgid "Support multi-threading"
 msgid "Support multi-threading."
 msgstr "Soporte para multihilos."
 
 #: config/nvptx/nvptx.opt:22
-#, fuzzy
-#| msgid "Generate code for a Fido A"
 msgid "Generate code for a 32-bit ABI."
-msgstr "Genera código para un Fido A."
+msgstr "Genera código para ABI de 32 bits."
 
 #: config/nvptx/nvptx.opt:26
-#, fuzzy
-#| msgid "Generate code for a Fido A"
 msgid "Generate code for a 64-bit ABI."
-msgstr "Genera código para un Fido A."
+msgstr "Genera código para ABI de 64 bits."
 
 #: config/nvptx/nvptx.opt:30
 msgid "Link in code for a __main kernel."
-msgstr ""
+msgstr "Enlace en el código para un núcleo __main."
 
 #: config/nvptx/nvptx.opt:34
 msgid "Optimize partition neutering."
 msgstr ""
 
 #: config/vxworks.opt:36
-#, fuzzy
-#| msgid "Assume the VxWorks RTP environment"
 msgid "Assume the VxWorks RTP environment."
 msgstr "Asume el entorno RTP de VxWorks."
 
 #: config/vxworks.opt:43
-#, fuzzy
-#| msgid "Assume the VxWorks vThreads environment"
 msgid "Assume the VxWorks vThreads environment."
 msgstr "Asume el entorno vThreads de VxWorks."
 
 #: config/cr16/cr16.opt:23
-#, fuzzy
-#| msgid "-msim   Use simulator runtime"
 msgid "-msim   Use simulator runtime."
 msgstr "-msim   Usa el simulador de tiempo de ejecución."
 
 #: config/cr16/cr16.opt:27
-#, fuzzy
-#| msgid "Generate SBIT, CBIT instructions"
 msgid "Generate SBIT, CBIT instructions."
 msgstr "Genera instrucciones SBIT, CBIT."
 
 #: config/cr16/cr16.opt:31
-#, fuzzy
-#| msgid "Support multiply accumulate instructions"
 msgid "Support multiply accumulate instructions."
 msgstr "Da soporte a múltiples instrucciones de acumulador."
 
 #: config/cr16/cr16.opt:38
-#, fuzzy
-#| msgid "Treat data references as near, far or medium. medium is default"
 msgid "Treat data references as near, far or medium. medium is default."
 msgstr "Trata las referencias a datos como near, far o medium. Por defecto es medium."
 
 #: config/cr16/cr16.opt:42
-#, fuzzy
-#| msgid "Generate code for CR16C architecture"
 msgid "Generate code for CR16C architecture."
 msgstr "Genera código para la arquitectura CR16C."
 
 #: config/cr16/cr16.opt:46
-#, fuzzy
-#| msgid "Generate code for CR16C+ architecture (Default)"
 msgid "Generate code for CR16C+ architecture (Default)."
 msgstr "Genera código para la arquitectura CR16C+ (Por defecto)."
 
@@ -9870,44 +9622,35 @@ msgid "Treat integers as 32-bit."
 msgstr "Trata los enteros como 32-bit."
 
 #: config/avr/avr.opt:23
-#, fuzzy
-#| msgid "Use subroutines for function prologues and epilogues"
 msgid "Use subroutines for function prologues and epilogues."
 msgstr "Usa subrutinas para los prólogos y epílogos de función."
 
 #: config/avr/avr.opt:26
-#, fuzzy, c-format
-#| msgid "missing makefile target after %qs"
+#, c-format
 msgid "missing device or architecture after %qs"
-msgstr "falta un fichero make objetivo después de %qs"
+msgstr "falta dispositivo o arquitectura después de %qs"
 
 #: config/avr/avr.opt:27
-#, fuzzy
-#| msgid "-mmcu=MCU\tSelect the target MCU"
 msgid "-mmcu=MCU\tSelect the target MCU."
 msgstr "-mmcu=MCU\tSelecciona el MCU destino."
 
 #: config/avr/avr.opt:31
 msgid "Set the number of 64 KiB flash segments."
-msgstr ""
+msgstr "Establece el número de segmentos flash de 64 KiB."
 
 #: config/avr/avr.opt:35
 msgid "Indicate presence of a processor erratum."
-msgstr ""
+msgstr "Indica la presencia de una errata del procesador."
 
 #: config/avr/avr.opt:39
 msgid "Enable Read-Modify-Write (RMW) instructions support/use."
-msgstr ""
+msgstr "Activa el soporte/uso de instrucciones Leer-Modificar-Escribir (LME)."
 
 #: config/avr/avr.opt:49
-#, fuzzy
-#| msgid "Use an 8-bit 'int' type"
 msgid "Use an 8-bit 'int' type."
 msgstr "Usa un tipo 'int' de 8-bit."
 
 #: config/avr/avr.opt:53
-#, fuzzy
-#| msgid "Change the stack pointer without disabling interrupts"
 msgid "Change the stack pointer without disabling interrupts."
 msgstr "Cambia el puntero de la pila sin desactivar las interrupciones."
 
@@ -9916,14 +9659,10 @@ msgid "Set the branch costs for conditional branch instructions.  Reasonable val
 msgstr "Establece los costos de ramificación para las instrucciones de ramificación condicional.  Los valores razonables son enteros pequeños que no son negativos.  El costo de ramificación por defecto es 0."
 
 #: config/avr/avr.opt:67
-#, fuzzy
-#| msgid "Change only the low 8 bits of the stack pointer"
 msgid "Change only the low 8 bits of the stack pointer."
 msgstr "Cambia sólo los 8 bits bajos del puntero de pila."
 
 #: config/avr/avr.opt:71
-#, fuzzy
-#| msgid "Relax branches"
 msgid "Relax branches."
 msgstr "Relaja ramificaciones."
 
@@ -9944,242 +9683,170 @@ msgid "The device has no SPH special function register. This option will be over
 msgstr "El dispositivo no tiene el registro de función especial SPH. Esta opción la sobreescribirá el controlador del compilador con la opción correcta si se puede deducir la presencia/ausencia de SPH con -mmcu=MCU."
 
 #: config/avr/avr.opt:92
-#, fuzzy
-#| msgid "taking the address of a label is non-standard"
 msgid "Warn if the address space of an address is changed."
-msgstr "tomar la dirección de una etiqueta no es estándar."
+msgstr "Avisa si se cambia el espacio de direcciones de una direccción."
 
 #: config/avr/avr.opt:96
 msgid "Allow to use truncation instead of rounding towards 0 for fractional int types."
-msgstr ""
+msgstr "Permite truncar en lugar de redondear a 0 los tipos de fracciones de enteros."
 
 #: config/avr/avr.opt:100
 msgid "Do not link against the device-specific library lib<MCU>.a."
-msgstr ""
+msgstr "No enlaza contra la biblioteca específica de dispositivo lib<MCU>.a."
 
 #: config/m32r/m32r.opt:34
-#, fuzzy
-#| msgid "Compile for the m32rx"
 msgid "Compile for the m32rx."
 msgstr "Compila para el m32rx."
 
 #: config/m32r/m32r.opt:38
-#, fuzzy
-#| msgid "Compile for the m32r2"
 msgid "Compile for the m32r2."
 msgstr "Compila para el m32r2."
 
 #: config/m32r/m32r.opt:42
-#, fuzzy
-#| msgid "Compile for the m32r"
 msgid "Compile for the m32r."
 msgstr "Compila para el m32r."
 
 #: config/m32r/m32r.opt:46
-#, fuzzy
-#| msgid "Align all loops to 32 byte boundary"
 msgid "Align all loops to 32 byte boundary."
 msgstr "Alinea todos los bucles al límite de 32 byte."
 
 #: config/m32r/m32r.opt:50
-#, fuzzy
-#| msgid "Prefer branches over conditional execution"
 msgid "Prefer branches over conditional execution."
 msgstr "Prefiere las ramas sobre la ejecución condicional."
 
 #: config/m32r/m32r.opt:54
-#, fuzzy
-#| msgid "Give branches their default cost"
 msgid "Give branches their default cost."
 msgstr "Da a las ramificaciones su costo por defecto."
 
 #: config/m32r/m32r.opt:58
-#, fuzzy
-#| msgid "Display compile time statistics"
 msgid "Display compile time statistics."
 msgstr "Muestra estadísticas de tiempo de compilación."
 
 #: config/m32r/m32r.opt:62
-#, fuzzy
-#| msgid "Specify cache flush function"
 msgid "Specify cache flush function."
 msgstr "Especifica la función de limpieza de caché."
 
 #: config/m32r/m32r.opt:66
-#, fuzzy
-#| msgid "Specify cache flush trap number"
 msgid "Specify cache flush trap number."
 msgstr "Especifica el número de trampa de limpieza de caché."
 
 #: config/m32r/m32r.opt:70
-#, fuzzy
-#| msgid "Only issue one instruction per cycle"
 msgid "Only issue one instruction per cycle."
 msgstr "Sólo ejecuta una instrucción por ciclo."
 
 #: config/m32r/m32r.opt:74
-#, fuzzy
-#| msgid "Allow two instructions to be issued per cycle"
 msgid "Allow two instructions to be issued per cycle."
 msgstr "Permite que se ejecuten dos instrucciones por ciclo."
 
 #: config/m32r/m32r.opt:78
-#, fuzzy
-#| msgid "Code size: small, medium or large"
 msgid "Code size: small, medium or large."
 msgstr "Tamaño del código: small, medium o large."
 
 #: config/m32r/m32r.opt:94
-#, fuzzy
-#| msgid "Don't call any cache flush functions"
 msgid "Don't call any cache flush functions."
 msgstr "No llama a ninguna función de limpieza de caché."
 
 #: config/m32r/m32r.opt:98
-#, fuzzy
-#| msgid "Don't call any cache flush trap"
 msgid "Don't call any cache flush trap."
 msgstr "No llama a ninguna trampa de limpieza de caché."
 
 #: config/m32r/m32r.opt:105
-#, fuzzy
-#| msgid "Small data area: none, sdata, use"
 msgid "Small data area: none, sdata, use."
 msgstr "Área de datos small: none, sdata, use."
 
 #: config/s390/tpf.opt:23
-#, fuzzy
-#| msgid "Enable TPF-OS tracing code"
 msgid "Enable TPF-OS tracing code."
 msgstr "Activa el código de rastreo TPF-OS."
 
 #: config/s390/tpf.opt:27
-#, fuzzy
-#| msgid "Specify main object for TPF-OS"
 msgid "Specify main object for TPF-OS."
 msgstr "Especifica el objeto principal para TPF-OS."
 
 #: config/s390/s390.opt:48
-#, fuzzy
-#| msgid "31 bit ABI"
 msgid "31 bit ABI."
 msgstr "ABI de 31 bit."
 
 #: config/s390/s390.opt:52
-#, fuzzy
-#| msgid "64 bit ABI"
 msgid "64 bit ABI."
 msgstr "ABI de 64 bit."
 
 #: config/s390/s390.opt:96
-#, fuzzy
-#| msgid "Maintain backchain pointer"
 msgid "Maintain backchain pointer."
 msgstr "Mantiene el puntero a la cadena hacia atrás."
 
 #: config/s390/s390.opt:100
-#, fuzzy
-#| msgid "Additional debug prints"
 msgid "Additional debug prints."
 msgstr "Impresiones adicionales de depuración."
 
 #: config/s390/s390.opt:104
-#, fuzzy
-#| msgid "ESA/390 architecture"
 msgid "ESA/390 architecture."
 msgstr "Arquitectura ESA/390."
 
 #: config/s390/s390.opt:108
-#, fuzzy
-#| msgid "Enable decimal floating point hardware support"
 msgid "Enable decimal floating point hardware support."
 msgstr "Admite la coma flotante decimal de hardware."
 
 #: config/s390/s390.opt:112
-#, fuzzy
-#| msgid "Enable hardware floating point"
 msgid "Enable hardware floating point."
 msgstr "Activa coma flotante de hardware."
 
 #: config/s390/s390.opt:116
 msgid "Takes two non-negative integer numbers separated by a comma."
-msgstr ""
+msgstr "Toma dos números enteros no negativos separados por coma."
 
 #: config/s390/s390.opt:134
-#, fuzzy
-#| msgid "Use hardware floating point instructions"
 msgid "Use hardware transactional execution instructions."
-msgstr "Usa instrucciones de coma flotante por hardware."
+msgstr "Usa instrucciones de ejecución transaccional por hardware."
 
 #: config/s390/s390.opt:138
-#, fuzzy
-#| msgid "hardware decimal floating point instructions not available on %s"
 msgid "Use hardware vector facility instructions and enable the vector ABI."
-msgstr "las instrucciones de coma flotante decimal de hardware no están disponibles en %s."
+msgstr "Usa las instrucciones de operaciones con vectores por hardware y activa el vector ABI."
 
 #: config/s390/s390.opt:142
-#, fuzzy
-#| msgid "Use packed stack layout"
 msgid "Use packed stack layout."
 msgstr "Usa la disposición de pila empacada."
 
 #: config/s390/s390.opt:146
-#, fuzzy
-#| msgid "Use bras for executable < 64k"
 msgid "Use bras for executable < 64k."
 msgstr "Usa bras para el ejecutable < 64k."
 
 #: config/s390/s390.opt:150
-#, fuzzy
-#| msgid "Disable hardware floating point"
 msgid "Disable hardware floating point."
 msgstr "Desactiva la coma flotante de hardware."
 
 #: config/s390/s390.opt:154
-#, fuzzy
-#| msgid "Set the max. number of bytes which has to be left to stack size before a trap instruction is triggered"
 msgid "Set the max. number of bytes which has to be left to stack size before a trap instruction is triggered."
 msgstr "Establece el número máximo de bytes que se deben dejar en el tamaño de la pila antes de que se active una instrucción trap."
 
 #: config/s390/s390.opt:158
 msgid "Switches off the -mstack-guard= option."
-msgstr ""
+msgstr "Desactiva la opción -mstack-guard=."
 
 #: config/s390/s390.opt:162
-#, fuzzy
-#| msgid "Emit extra code in the function prologue in order to trap if the stack size exceeds the given limit"
 msgid "Emit extra code in the function prologue in order to trap if the stack size exceeds the given limit."
 msgstr "Emite código extra en el prólogo de la función para atrapar en caso de que el tamaño de la pila exceda el límite dado."
 
 #: config/s390/s390.opt:166
 msgid "Switches off the -mstack-size= option."
-msgstr ""
+msgstr "Desactiva la opción -mstack-size= ."
 
 #: config/s390/s390.opt:174
-#, fuzzy
-#| msgid "Generate string instructions for block moves"
 msgid "Use the mvcle instruction for block moves."
-msgstr "Genera instrucciones de cadena para movimiento de bloques."
+msgstr "Utiliza la instrucción mvcle para movimiento de bloques."
 
 #: config/s390/s390.opt:178
 msgid "Enable the z vector language extension providing the context-sensitive"
-msgstr ""
+msgstr "Activa la extensión de lenguaje de vector z que ofrece la sensibilidad al contexto"
 
 #: config/s390/s390.opt:183
-#, fuzzy
-#| msgid "Warn if a function uses alloca or creates an array with dynamic size"
 msgid "Warn if a function uses alloca or creates an array with dynamic size."
 msgstr "Avisa si una función usa alloca o crea una matriz de tamaño dinámico."
 
 #: config/s390/s390.opt:187
-#, fuzzy
-#| msgid "Warn if a single function's framesize exceeds the given framesize"
 msgid "Warn if a single function's framesize exceeds the given framesize."
 msgstr "Avisa si el tamaño de marco de una sola función excede el tamaño de marco dado."
 
 #: config/s390/s390.opt:191
-#, fuzzy
-#| msgid "z/Architecture"
 msgid "z/Architecture."
 msgstr "z/Architecture."
 
@@ -10193,47 +9860,37 @@ msgstr "Usa el simulador de tiempo de ejecución."
 
 #: config/rl78/rl78.opt:31
 msgid "Selects the type of hardware multiplication and division to use (none/g13/g14)."
-msgstr ""
+msgstr "Selecciona el tipo de multiplicación y división por hardware que se usará (none/g13/g14)."
 
 #: config/rl78/rl78.opt:50
-#, fuzzy
-#| msgid "Specifies the number of registers to reserve for interrupt handlers."
 msgid "Use all registers, reserving none for interrupt handlers."
-msgstr "Especifica el número de registros a reservar para los manejadores de interrupciones."
+msgstr "Usa todos los registros, sin reservar ninguno para los manejadores de interrupciones."
 
 #: config/rl78/rl78.opt:54
 msgid "Enable assembler and linker relaxation.  Enabled by default at -Os."
-msgstr ""
+msgstr "Activa la relajación del ensamblador y el enlazador.  Activado por defecto a -Os."
 
 #: config/rl78/rl78.opt:58
 msgid "Selects the type of RL78 core being targeted (g10/g13/g14).  The default is the G14.  If set, also selects the hardware multiply support to be used."
-msgstr ""
+msgstr "Selecciona el tipo de núcleo RL78 de destino (g10/g13/g14).  El predeterminado es G14.  Si se pone, también selecciona el soporte de multiplicación por hardware que se usará."
 
 #: config/rl78/rl78.opt:77
-#, fuzzy
-#| msgid "Alias for -mcpu."
 msgid "Alias for -mcpu=g10."
-msgstr "Alias para -mcpu."
+msgstr "Alias para -mcpu=g10."
 
 #: config/rl78/rl78.opt:81
-#, fuzzy
-#| msgid "Alias for -mcpu."
 msgid "Alias for -mcpu=g13."
-msgstr "Alias para -mcpu."
+msgstr "Alias para -mcpu=g13."
 
 #: config/rl78/rl78.opt:85 config/rl78/rl78.opt:89
-#, fuzzy
-#| msgid "Alias for -mcpu."
 msgid "Alias for -mcpu=g14."
-msgstr "Alias para -mcpu."
+msgstr "Alias para -mcpu=g14."
 
 #: config/rl78/rl78.opt:93
 msgid "Assume ES is zero throughout program execution, use ES: for read-only data."
-msgstr ""
+msgstr "Asume que ES es cero a lo largo de la ejecución del programa, usa ES: para datos de solo lectura."
 
 #: config/stormy16/stormy16.opt:24
-#, fuzzy
-#| msgid "Provide libraries for the simulator"
 msgid "Provide libraries for the simulator."
 msgstr "Provee bibliotecas para el simulador."
 
@@ -10254,8 +9911,6 @@ msgid "TLS dialect to use:"
 msgstr "Dialecto TLS a utilizar:"
 
 #: config/arm/arm.opt:36
-#, fuzzy
-#| msgid "Specify an ABI"
 msgid "Specify an ABI."
 msgstr "Especifica una ABI."
 
@@ -10264,26 +9919,18 @@ msgid "Known ARM ABIs (for use with the -mabi= option):"
 msgstr "ABIs ARM conocidas (para uso con la opción -mabi=):"
 
 #: config/arm/arm.opt:59
-#, fuzzy
-#| msgid "Generate a call to abort if a noreturn function returns"
 msgid "Generate a call to abort if a noreturn function returns."
 msgstr "Genera una llamada a abortar si una funcion noreturn devuelve."
 
 #: config/arm/arm.opt:66
-#, fuzzy
-#| msgid "Pass FP arguments in FP registers"
 msgid "Pass FP arguments in FP registers."
 msgstr "Pasa los argumentos FP en los registros FP."
 
 #: config/arm/arm.opt:70
-#, fuzzy
-#| msgid "Generate APCS conformant stack frames"
 msgid "Generate APCS conformant stack frames."
 msgstr "Genera marcos de pila que cumplan con APCS."
 
 #: config/arm/arm.opt:74
-#, fuzzy
-#| msgid "Generate re-entrant, PIC code"
 msgid "Generate re-entrant, PIC code."
 msgstr "Genera código PIC reentrante."
 
@@ -10292,14 +9939,10 @@ msgid "Generate code in 32 bit ARM state."
 msgstr "Genera código en el estado ARM de 32 bit."
 
 #: config/arm/arm.opt:98
-#, fuzzy
-#| msgid "Thumb: Assume non-static functions may be called from ARM code"
 msgid "Thumb: Assume non-static functions may be called from ARM code."
 msgstr "Thumb: Asume que las funciones no static pueden ser llamadas desde código ARM."
 
 #: config/arm/arm.opt:102
-#, fuzzy
-#| msgid "Thumb: Assume function pointers may go to non-Thumb aware code"
 msgid "Thumb: Assume function pointers may go to non-Thumb aware code."
 msgstr "Thumb: Asume que los punteros de función pueden ir a código no informado sobre Thumb."
 
-- 
cgit v1.1


From 04bfa1e4691afb9eb0edcd9e80ec1e542628c7d4 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Thu, 8 Dec 2016 06:32:24 -0800
Subject: Enable -fsanitize-address-use-after-scope only if -fsanitize=address
 is enabled

2016-12-08  Dmitry Vyukov  <dvyukov@google.com>

	* opts.c (finish_options): Enable
	-fsanitize-address-use-after-scope only if -fsanitize=address is enabled
	(not -fsanitize=kernel-address).
	* doc/invoke.texi (-fsanitize=kernel-address):
	Don't say that it enables -fsanitize-address-use-after-scope.

From-SVN: r243441
---
 gcc/ChangeLog       | 8 ++++++++
 gcc/doc/invoke.texi | 1 -
 gcc/opts.c          | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 91a1ed3..3766666 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2016-12-08  Dmitry Vyukov  <dvyukov@google.com>
+
+	* opts.c (finish_options): Enable
+	-fsanitize-address-use-after-scope only if -fsanitize=address is enabled
+	(not -fsanitize=kernel-address).
+	* doc/invoke.texi (-fsanitize=kernel-address):
+	Don't say that it enables -fsanitize-address-use-after-scope.
+
 2016-12-08  Bin Cheng  <bin.cheng@arm.com>
 
 	PR middle-end/78684
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 5622c0f..02a34fb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10640,7 +10640,6 @@ The option can't be combined with @option{-fsanitize=thread}.
 @item -fsanitize=kernel-address
 @opindex fsanitize=kernel-address
 Enable AddressSanitizer for Linux kernel.
-The option enables @option{-fsanitize-address-use-after-scope}.
 See @uref{https://github.com/google/kasan/wiki} for more details.
 
 @item -fsanitize=thread
diff --git a/gcc/opts.c b/gcc/opts.c
index c61c367..890da03 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -990,7 +990,7 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
 
   /* Enable -fsanitize-address-use-after-scope if address sanitizer is
      enabled.  */
-  if (opts->x_flag_sanitize
+  if ((opts->x_flag_sanitize & SANITIZE_USER_ADDRESS)
       && !opts_set->x_flag_sanitize_address_use_after_scope)
     opts->x_flag_sanitize_address_use_after_scope = true;
 
-- 
cgit v1.1


From 55dccc1e0dbc8b975f5d14f151991f1c8412768e Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 8 Dec 2016 15:51:25 +0000
Subject: compiler: make Slice_construction_expression::do_flatten idempotent

    Because of the way we handle call expressions with multiple results,
    it's possible for expressions to be flattened more than once.  In the
    case of Slice_construction_expression, allocating the slice storage
    multiple times caused a compiler crash as one of the
    Temporary_statement's wound up not getting a backend expression.

    Test case is https://golang.org/cl/34020.

    Reviewed-on: https://go-review.googlesource.com/34026

From-SVN: r243442
---
 gcc/go/gofrontend/MERGE          | 2 +-
 gcc/go/gofrontend/expressions.cc | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index 7586fd4..dbba68f 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-08d221726e3f50cb197a931ba385fac67f66a028
+7a941ba323660ec7034cd92d4eab466024a3c72c
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/expressions.cc b/gcc/go/gofrontend/expressions.cc
index 24f6b12..899fc9b 100644
--- a/gcc/go/gofrontend/expressions.cc
+++ b/gcc/go/gofrontend/expressions.cc
@@ -12951,8 +12951,8 @@ Slice_construction_expression::do_flatten(Gogo* gogo, Named_object* no,
   // Base class flattening first
   this->Array_construction_expression::do_flatten(gogo, no, inserter);
 
-  // Create an stack-allocated storage temp if storage won't escape
-  if (!this->storage_escapes_)
+  // Create a stack-allocated storage temp if storage won't escape
+  if (!this->storage_escapes_ && this->slice_storage_ == NULL)
     {
       Location loc = this->location();
       this->array_val_ = create_array_val();
-- 
cgit v1.1


From 0a7577bbac00ea3ecfa8f25d38c98ea696f3a0ce Mon Sep 17 00:00:00 2001
From: Eric Botcazou <ebotcazou@adacore.com>
Date: Thu, 8 Dec 2016 15:52:11 +0000
Subject: re PR libstdc++/78264 (ICE in build_noexcept_spec, at
 cp/except.c:1196)

	PR libstdc++/78264
	* include/bits/c++config (_GLIBCXX_NOEXCEPT_PARM): Turn _N into _NE.
	(_GLIBCXX_NOEXCEPT_QUAL): Likewise.

From-SVN: r243443
---
 libstdc++-v3/ChangeLog              | 6 ++++++
 libstdc++-v3/include/bits/c++config | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index 5d8ee46..49ddd5b 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,9 @@
+2016-12-08  Eric Botcazou  <ebotcazou@adacore.com>
+
+	PR libstdc++/78264
+	* include/bits/c++config (_GLIBCXX_NOEXCEPT_PARM): Turn _N into _NE.
+	(_GLIBCXX_NOEXCEPT_QUAL): Likewise.
+
 2016-12-08  Jonathan Wakely  <jwakely@redhat.com>
 
 	* testsuite/experimental/filesystem/path/construct/range.cc: Don't
diff --git a/libstdc++-v3/include/bits/c++config b/libstdc++-v3/include/bits/c++config
index 39e55f4..1304412 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -147,8 +147,8 @@
 #endif
 
 #if __cpp_noexcept_function_type
-#define _GLIBCXX_NOEXCEPT_PARM , bool _N
-#define _GLIBCXX_NOEXCEPT_QUAL noexcept (_N)
+#define _GLIBCXX_NOEXCEPT_PARM , bool _NE
+#define _GLIBCXX_NOEXCEPT_QUAL noexcept (_NE)
 #else
 #define _GLIBCXX_NOEXCEPT_PARM
 #define _GLIBCXX_NOEXCEPT_QUAL
-- 
cgit v1.1


From b2264b0964560e724010aac2faf4f6a3ec2729f7 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 8 Dec 2016 15:54:30 +0000
Subject: runtime: allocate _panic struct on heap

    The gc library allocates a _panic struct on the stack. This does not
    work for gccgo, because when a deferred function recovers the panic we
    unwind the stack up to that point so that returning from the function
    will work correctly.

    Allocating on the stack fine if the panic is not recovered, and it
    works fine if the panic is recovered by a function that
    returns. However, it fails if the panic is recovered by a function
    that itself panics, and if that second panic is then recovered by a
    function higher up on the stack. When we unwind the stack to that
    second panic, the g will wind up pointing at a panic farther down on
    the stack. Even then everything will often work fine, except when the
    deferred function catching the second panic makes a bunch of calls
    that use stack space before returning. In that case the code can
    overwrite the panic struct, which will then cause disaster when we
    remove the struct from the linked list, as the link field will be
    garbage. This case is rare enough that all the x86 tests were passing,
    but there was a failure on ppc64le.

    Before https://golang.org/cl/33414 we allocated the panic struct on
    the heap, so go back to doing that again.

    Fixes golang/go#18228.

    Reviewed-on: https://go-review.googlesource.com/34027

From-SVN: r243444
---
 gcc/go/gofrontend/MERGE   |  2 +-
 libgo/go/runtime/panic.go | 17 +++++++++++++----
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index dbba68f..df38903 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-7a941ba323660ec7034cd92d4eab466024a3c72c
+2442fca7be8a4f51ddc91070fa69ef66e24593ac
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/libgo/go/runtime/panic.go b/libgo/go/runtime/panic.go
index 0ad178f..f7e5efe 100644
--- a/libgo/go/runtime/panic.go
+++ b/libgo/go/runtime/panic.go
@@ -415,10 +415,19 @@ func gopanic(e interface{}) {
 		throw("panic holding locks")
 	}
 
-	var p _panic
-	p.arg = e
-	p.link = gp._panic
-	gp._panic = (*_panic)(noescape(unsafe.Pointer(&p)))
+	// The gc compiler allocates this new _panic struct on the
+	// stack. We can't do that, because when a deferred function
+	// recovers the panic we unwind the stack. We unlink this
+	// entry before unwinding the stack, but that doesn't help in
+	// the case where we panic, a deferred function recovers and
+	// then panics itself, that panic is in turn recovered, and
+	// unwinds the stack past this stack frame.
+
+	p := &_panic{
+		arg:  e,
+		link: gp._panic,
+	}
+	gp._panic = p
 
 	for {
 		d := gp._defer
-- 
cgit v1.1


From 453060a9062959ceb1522b8b99adeb01b2a3f7b7 Mon Sep 17 00:00:00 2001
From: Ian Lance Taylor <ian@gcc.gnu.org>
Date: Thu, 8 Dec 2016 16:37:54 +0000
Subject: runtime: copy memory hash code from Go 1.7

    Rewrite the AES hashing code from gc assembler to C code using
    intrinsics.  The resulting code generates the same hash code for the
    same input as the gc code--that doesn't matter as such, but testing it
    ensures that the C code does something useful.

    Also change mips64pe32le to mips64p32le in configure script--noticed
    during CL review.

    Reviewed-on: https://go-review.googlesource.com/34022

From-SVN: r243445
---
 gcc/go/gofrontend/MERGE          |   2 +-
 gcc/go/gofrontend/types.cc       |   2 +-
 libgo/Makefile.am                |   1 +
 libgo/Makefile.in                |  11 +-
 libgo/configure                  |   2 +-
 libgo/configure.ac               |   2 +-
 libgo/go/runtime/alg.go          |  45 +++
 libgo/go/runtime/hash32.go       |  94 +++++++
 libgo/go/runtime/hash64.go       |  94 +++++++
 libgo/go/runtime/os_gccgo.go     |  23 ++
 libgo/go/runtime/runtime2.go     |  12 +-
 libgo/go/runtime/stubs.go        |   6 +
 libgo/go/runtime/unaligned1.go   |  17 ++
 libgo/go/runtime/unaligned2.go   |  20 ++
 libgo/runtime/aeshash.c          | 583 +++++++++++++++++++++++++++++++++++++++
 libgo/runtime/go-libmain.c       |   1 +
 libgo/runtime/go-main.c          |   1 +
 libgo/runtime/go-type-identity.c |  40 +--
 libgo/runtime/go-type.h          |   1 -
 libgo/runtime/proc.c             |   3 +-
 libgo/runtime/runtime.h          |   6 +
 libgo/runtime/runtime_c.c        |  19 ++
 22 files changed, 937 insertions(+), 48 deletions(-)
 create mode 100644 libgo/go/runtime/hash32.go
 create mode 100644 libgo/go/runtime/hash64.go
 create mode 100644 libgo/go/runtime/os_gccgo.go
 create mode 100644 libgo/go/runtime/unaligned1.go
 create mode 100644 libgo/go/runtime/unaligned2.go
 create mode 100644 libgo/runtime/aeshash.c

diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index df38903..6bc3797 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-2442fca7be8a4f51ddc91070fa69ef66e24593ac
+78e3527fcaf4ffd33b22e39a56e5d076844302be
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/types.cc b/gcc/go/gofrontend/types.cc
index d540acb..f3cb32b 100644
--- a/gcc/go/gofrontend/types.cc
+++ b/gcc/go/gofrontend/types.cc
@@ -1648,7 +1648,7 @@ Type::type_functions(Gogo* gogo, Named_type* name, Function_type* hash_fntype,
   const char* equal_fnname;
   if (this->compare_is_identity(gogo))
     {
-      hash_fnname = "__go_type_hash_identity";
+      hash_fnname = "runtime.memhash";
       equal_fnname = "__go_type_equal_identity";
     }
   else
diff --git a/libgo/Makefile.am b/libgo/Makefile.am
index 7165dfd..b9aee9d 100644
--- a/libgo/Makefile.am
+++ b/libgo/Makefile.am
@@ -422,6 +422,7 @@ endif
 endif
 
 runtime_files = \
+	runtime/aeshash.c \
 	runtime/go-assert.c \
 	runtime/go-breakpoint.c \
 	runtime/go-caller.c \
diff --git a/libgo/Makefile.in b/libgo/Makefile.in
index 9b87db0..86d7aa8 100644
--- a/libgo/Makefile.in
+++ b/libgo/Makefile.in
@@ -189,7 +189,7 @@ libgo_llgo_la_DEPENDENCIES = $(am__DEPENDENCIES_4)
 @LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@am__objects_4 =  \
 @LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@	getncpu-bsd.lo
 @LIBGO_IS_LINUX_TRUE@am__objects_4 = getncpu-linux.lo
-am__objects_5 = go-assert.lo go-breakpoint.lo go-caller.lo \
+am__objects_5 = aeshash.lo go-assert.lo go-breakpoint.lo go-caller.lo \
 	go-callers.lo go-cdiv.lo go-cgo.lo go-construct-map.lo \
 	go-ffi.lo go-fieldtrack.lo go-matherr.lo go-memclr.lo \
 	go-memcmp.lo go-memequal.lo go-memmove.lo go-nanotime.lo \
@@ -767,6 +767,7 @@ toolexeclibgounicode_DATA = \
 @LIBGO_IS_DARWIN_TRUE@@LIBGO_IS_LINUX_FALSE@runtime_getncpu_file = runtime/getncpu-bsd.c
 @LIBGO_IS_LINUX_TRUE@runtime_getncpu_file = runtime/getncpu-linux.c
 runtime_files = \
+	runtime/aeshash.c \
 	runtime/go-assert.c \
 	runtime/go-breakpoint.c \
 	runtime/go-caller.c \
@@ -1446,6 +1447,7 @@ mostlyclean-compile:
 distclean-compile:
 	-rm -f *.tab.c
 
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/aeshash.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/env_posix.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-bsd.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/getncpu-irix.Plo@am__quote@
@@ -1573,6 +1575,13 @@ libgolibbegin_a-go-libmain.obj: runtime/go-libmain.c
 @AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
 @am__fastdepCC_FALSE@	$(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(libgolibbegin_a_CFLAGS) $(CFLAGS) -c -o libgolibbegin_a-go-libmain.obj `if test -f 'runtime/go-libmain.c'; then $(CYGPATH_W) 'runtime/go-libmain.c'; else $(CYGPATH_W) '$(srcdir)/runtime/go-libmain.c'; fi`
 
+aeshash.lo: runtime/aeshash.c
+@am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT aeshash.lo -MD -MP -MF $(DEPDIR)/aeshash.Tpo -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
+@am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/aeshash.Tpo $(DEPDIR)/aeshash.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	source='runtime/aeshash.c' object='aeshash.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@	DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o aeshash.lo `test -f 'runtime/aeshash.c' || echo '$(srcdir)/'`runtime/aeshash.c
+
 go-assert.lo: runtime/go-assert.c
 @am__fastdepCC_TRUE@	$(LIBTOOL)  --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT go-assert.lo -MD -MP -MF $(DEPDIR)/go-assert.Tpo -c -o go-assert.lo `test -f 'runtime/go-assert.c' || echo '$(srcdir)/'`runtime/go-assert.c
 @am__fastdepCC_TRUE@	$(am__mv) $(DEPDIR)/go-assert.Tpo $(DEPDIR)/go-assert.Plo
diff --git a/libgo/configure b/libgo/configure
index 9eac5c0..7789c120 100755
--- a/libgo/configure
+++ b/libgo/configure
@@ -13624,7 +13624,7 @@ esac
 # supported by the gofrontend and all architectures supported by the
 # gc toolchain.
 # N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
 
 # All known GOARCH_FAMILY values.
 ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
diff --git a/libgo/configure.ac b/libgo/configure.ac
index 9e76540..77a744e 100644
--- a/libgo/configure.ac
+++ b/libgo/configure.ac
@@ -197,7 +197,7 @@ AC_SUBST(USE_DEJAGNU)
 # supported by the gofrontend and all architectures supported by the
 # gc toolchain.
 # N.B. Keep in sync with gcc/testsuite/go.test/go-test.exp (go-set-goarch).
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64pe32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mipso32 mipsn32 mipso64 mipsn64 mips mipsle mips64 mips64le mips64p32 mips64p32le ppc ppc64 ppc64le s390 s390x sparc sparc64"
 
 # All known GOARCH_FAMILY values.
 ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 PPC PPC64 S390 S390X SPARC SPARC64"
diff --git a/libgo/go/runtime/alg.go b/libgo/go/runtime/alg.go
index 8f7c3c0..5ec19d0 100644
--- a/libgo/go/runtime/alg.go
+++ b/libgo/go/runtime/alg.go
@@ -23,12 +23,29 @@ import (
 //go:linkname efacevaleq runtime.efacevaleq
 //go:linkname eqstring runtime.eqstring
 //go:linkname cmpstring runtime.cmpstring
+//
+// Temporary to be called from C code.
+//go:linkname alginit runtime.alginit
 
 const (
 	c0 = uintptr((8-sys.PtrSize)/4*2860486313 + (sys.PtrSize-4)/4*33054211828000289)
 	c1 = uintptr((8-sys.PtrSize)/4*3267000013 + (sys.PtrSize-4)/4*23344194077549503)
 )
 
+var useAeshash bool
+
+// in C code
+func aeshashbody(p unsafe.Pointer, h, s uintptr, sched []byte) uintptr
+
+func aeshash(p unsafe.Pointer, h, s uintptr) uintptr {
+	return aeshashbody(p, h, s, aeskeysched[:])
+}
+
+func aeshashstr(p unsafe.Pointer, h uintptr) uintptr {
+	ps := (*stringStruct)(p)
+	return aeshashbody(unsafe.Pointer(ps.str), h, uintptr(ps.len), aeskeysched[:])
+}
+
 func interhash(p unsafe.Pointer, h uintptr, size uintptr) uintptr {
 	a := (*iface)(p)
 	tab := a.tab
@@ -198,7 +215,35 @@ func cmpstring(x, y string) int {
 
 // Force the creation of function descriptors for equality and hash
 // functions.  These will be referenced directly by the compiler.
+var _ = memhash
 var _ = interhash
 var _ = interequal
 var _ = nilinterhash
 var _ = nilinterequal
+
+const hashRandomBytes = sys.PtrSize / 4 * 64
+
+// used in asm_{386,amd64}.s to seed the hash function
+var aeskeysched [hashRandomBytes]byte
+
+// used in hash{32,64}.go to seed the hash function
+var hashkey [4]uintptr
+
+func alginit() {
+	// Install aes hash algorithm if we have the instructions we need
+	if (GOARCH == "386" || GOARCH == "amd64") &&
+		GOOS != "nacl" &&
+		cpuid_ecx&(1<<25) != 0 && // aes (aesenc)
+		cpuid_ecx&(1<<9) != 0 && // sse3 (pshufb)
+		cpuid_ecx&(1<<19) != 0 { // sse4.1 (pinsr{d,q})
+		useAeshash = true
+		// Initialize with random data so hash collisions will be hard to engineer.
+		getRandomData(aeskeysched[:])
+		return
+	}
+	getRandomData((*[len(hashkey) * sys.PtrSize]byte)(unsafe.Pointer(&hashkey))[:])
+	hashkey[0] |= 1 // make sure these numbers are odd
+	hashkey[1] |= 1
+	hashkey[2] |= 1
+	hashkey[3] |= 1
+}
diff --git a/libgo/go/runtime/hash32.go b/libgo/go/runtime/hash32.go
new file mode 100644
index 0000000..cfb3a58
--- /dev/null
+++ b/libgo/go/runtime/hash32.go
@@ -0,0 +1,94 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hashing algorithm inspired by
+//   xxhash: https://code.google.com/p/xxhash/
+// cityhash: https://code.google.com/p/cityhash/
+
+// +build 386 arm armbe m68k mipso32 mipsn32 mips mipsle ppc s390 sparc
+
+package runtime
+
+import "unsafe"
+
+// For gccgo, use go:linkname to rename compiler-called functions to
+// themselves, so that the compiler will export them.
+//
+//go:linkname memhash runtime.memhash
+
+const (
+	// Constants for multiplication: four random odd 32-bit numbers.
+	m1 = 3168982561
+	m2 = 3339683297
+	m3 = 832293441
+	m4 = 2336365089
+)
+
+func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
+	if GOARCH == "386" && GOOS != "nacl" && useAeshash {
+		return aeshash(p, seed, s)
+	}
+	h := uint32(seed + s*hashkey[0])
+tail:
+	switch {
+	case s == 0:
+	case s < 4:
+		h ^= uint32(*(*byte)(p))
+		h ^= uint32(*(*byte)(add(p, s>>1))) << 8
+		h ^= uint32(*(*byte)(add(p, s-1))) << 16
+		h = rotl_15(h*m1) * m2
+	case s == 4:
+		h ^= readUnaligned32(p)
+		h = rotl_15(h*m1) * m2
+	case s <= 8:
+		h ^= readUnaligned32(p)
+		h = rotl_15(h*m1) * m2
+		h ^= readUnaligned32(add(p, s-4))
+		h = rotl_15(h*m1) * m2
+	case s <= 16:
+		h ^= readUnaligned32(p)
+		h = rotl_15(h*m1) * m2
+		h ^= readUnaligned32(add(p, 4))
+		h = rotl_15(h*m1) * m2
+		h ^= readUnaligned32(add(p, s-8))
+		h = rotl_15(h*m1) * m2
+		h ^= readUnaligned32(add(p, s-4))
+		h = rotl_15(h*m1) * m2
+	default:
+		v1 := h
+		v2 := uint32(seed * hashkey[1])
+		v3 := uint32(seed * hashkey[2])
+		v4 := uint32(seed * hashkey[3])
+		for s >= 16 {
+			v1 ^= readUnaligned32(p)
+			v1 = rotl_15(v1*m1) * m2
+			p = add(p, 4)
+			v2 ^= readUnaligned32(p)
+			v2 = rotl_15(v2*m2) * m3
+			p = add(p, 4)
+			v3 ^= readUnaligned32(p)
+			v3 = rotl_15(v3*m3) * m4
+			p = add(p, 4)
+			v4 ^= readUnaligned32(p)
+			v4 = rotl_15(v4*m4) * m1
+			p = add(p, 4)
+			s -= 16
+		}
+		h = v1 ^ v2 ^ v3 ^ v4
+		goto tail
+	}
+	h ^= h >> 17
+	h *= m3
+	h ^= h >> 13
+	h *= m4
+	h ^= h >> 16
+	return uintptr(h)
+}
+
+// Note: in order to get the compiler to issue rotl instructions, we
+// need to constant fold the shift amount by hand.
+// TODO: convince the compiler to issue rotl instructions after inlining.
+func rotl_15(x uint32) uint32 {
+	return (x << 15) | (x >> (32 - 15))
+}
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
new file mode 100644
index 0000000..551d5b5
--- /dev/null
+++ b/libgo/go/runtime/hash64.go
@@ -0,0 +1,94 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hashing algorithm inspired by
+//   xxhash: https://code.google.com/p/xxhash/
+// cityhash: https://code.google.com/p/cityhash/
+
+// +build amd64 amd64p32 arm64 mips64 mips64le ppc64 ppc64le s390x alpha arm64be ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
+
+package runtime
+
+import "unsafe"
+
+// For gccgo, use go:linkname to rename compiler-called functions to
+// themselves, so that the compiler will export them.
+//
+//go:linkname memhash runtime.memhash
+
+const (
+	// Constants for multiplication: four random odd 64-bit numbers.
+	m1 = 16877499708836156737
+	m2 = 2820277070424839065
+	m3 = 9497967016996688599
+	m4 = 15839092249703872147
+)
+
+func memhash(p unsafe.Pointer, seed, s uintptr) uintptr {
+	if GOARCH == "amd64" && GOOS != "nacl" && useAeshash {
+		return aeshash(p, seed, s)
+	}
+	h := uint64(seed + s*hashkey[0])
+tail:
+	switch {
+	case s == 0:
+	case s < 4:
+		h ^= uint64(*(*byte)(p))
+		h ^= uint64(*(*byte)(add(p, s>>1))) << 8
+		h ^= uint64(*(*byte)(add(p, s-1))) << 16
+		h = rotl_31(h*m1) * m2
+	case s <= 8:
+		h ^= uint64(readUnaligned32(p))
+		h ^= uint64(readUnaligned32(add(p, s-4))) << 32
+		h = rotl_31(h*m1) * m2
+	case s <= 16:
+		h ^= readUnaligned64(p)
+		h = rotl_31(h*m1) * m2
+		h ^= readUnaligned64(add(p, s-8))
+		h = rotl_31(h*m1) * m2
+	case s <= 32:
+		h ^= readUnaligned64(p)
+		h = rotl_31(h*m1) * m2
+		h ^= readUnaligned64(add(p, 8))
+		h = rotl_31(h*m1) * m2
+		h ^= readUnaligned64(add(p, s-16))
+		h = rotl_31(h*m1) * m2
+		h ^= readUnaligned64(add(p, s-8))
+		h = rotl_31(h*m1) * m2
+	default:
+		v1 := h
+		v2 := uint64(seed * hashkey[1])
+		v3 := uint64(seed * hashkey[2])
+		v4 := uint64(seed * hashkey[3])
+		for s >= 32 {
+			v1 ^= readUnaligned64(p)
+			v1 = rotl_31(v1*m1) * m2
+			p = add(p, 8)
+			v2 ^= readUnaligned64(p)
+			v2 = rotl_31(v2*m2) * m3
+			p = add(p, 8)
+			v3 ^= readUnaligned64(p)
+			v3 = rotl_31(v3*m3) * m4
+			p = add(p, 8)
+			v4 ^= readUnaligned64(p)
+			v4 = rotl_31(v4*m4) * m1
+			p = add(p, 8)
+			s -= 32
+		}
+		h = v1 ^ v2 ^ v3 ^ v4
+		goto tail
+	}
+
+	h ^= h >> 29
+	h *= m3
+	h ^= h >> 32
+	return uintptr(h)
+}
+
+// Note: in order to get the compiler to issue rotl instructions, we
+// need to constant fold the shift amount by hand.
+// TODO: convince the compiler to issue rotl instructions after inlining.
+func rotl_31(x uint64) uint64 {
+	return (x << 31) | (x >> (64 - 31))
+}
diff --git a/libgo/go/runtime/os_gccgo.go b/libgo/go/runtime/os_gccgo.go
new file mode 100644
index 0000000..4609432
--- /dev/null
+++ b/libgo/go/runtime/os_gccgo.go
@@ -0,0 +1,23 @@
+// Copyright 2009 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package runtime
+
+import (
+	"unsafe"
+)
+
+var urandom_dev = []byte("/dev/urandom\x00")
+
+func getRandomData(r []byte) {
+	if startupRandomData != nil {
+		n := copy(r, startupRandomData)
+		extendRandom(r, n)
+		return
+	}
+	fd := open(&urandom_dev[0], 0 /* O_RDONLY */, 0)
+	n := read(fd, unsafe.Pointer(&r[0]), int32(len(r)))
+	closefd(fd)
+	extendRandom(r, int(n))
+}
diff --git a/libgo/go/runtime/runtime2.go b/libgo/go/runtime/runtime2.go
index c8db7ad..4712318 100644
--- a/libgo/go/runtime/runtime2.go
+++ b/libgo/go/runtime/runtime2.go
@@ -5,6 +5,7 @@
 package runtime
 
 import (
+	"runtime/internal/sys"
 	"unsafe"
 )
 
@@ -668,7 +669,6 @@ type forcegcstate struct {
 // the ELF AT_RANDOM auxiliary vector (vdso_linux_amd64.go or os_linux_386.go).
 var startupRandomData []byte
 
-/*
 // extendRandom extends the random numbers in r[:n] to the whole slice r.
 // Treats n<0 as n==0.
 func extendRandom(r []byte, n int) {
@@ -689,7 +689,6 @@ func extendRandom(r []byte, n int) {
 		}
 	}
 }
-*/
 
 // deferred subroutine calls
 // This is the gccgo version.
@@ -770,11 +769,12 @@ var (
 
 	sched schedt
 
-//	newprocs    int32
+	//	newprocs    int32
+
+	// Information about what cpu features are available.
+	// Set on startup.
+	cpuid_ecx uint32
 
-// Information about what cpu features are available.
-// Set on startup in asm_{x86,amd64}.s.
-//	cpuid_ecx         uint32
 //	cpuid_edx         uint32
 //	cpuid_ebx7        uint32
 //	lfenceBeforeRdtsc bool
diff --git a/libgo/go/runtime/stubs.go b/libgo/go/runtime/stubs.go
index b2f1829..b4fee6b 100644
--- a/libgo/go/runtime/stubs.go
+++ b/libgo/go/runtime/stubs.go
@@ -248,6 +248,12 @@ func funcPC(f interface{}) uintptr {
 	return **(**uintptr)(i.data)
 }
 
+// For gccgo, to communicate from the C code to the Go code.
+//go:linkname setCpuidECX runtime.setCpuidECX
+func setCpuidECX(v uint32) {
+	cpuid_ecx = v
+}
+
 // typedmemmove copies a typed value.
 // For gccgo for now.
 //go:nosplit
diff --git a/libgo/go/runtime/unaligned1.go b/libgo/go/runtime/unaligned1.go
new file mode 100644
index 0000000..c94f19e
--- /dev/null
+++ b/libgo/go/runtime/unaligned1.go
@@ -0,0 +1,17 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build 386 amd64 amd64p32 arm64 ppc64 ppc64le s390x ppc s390 arm64be
+
+package runtime
+
+import "unsafe"
+
+func readUnaligned32(p unsafe.Pointer) uint32 {
+	return *(*uint32)(p)
+}
+
+func readUnaligned64(p unsafe.Pointer) uint64 {
+	return *(*uint64)(p)
+}
diff --git a/libgo/go/runtime/unaligned2.go b/libgo/go/runtime/unaligned2.go
new file mode 100644
index 0000000..e52d6ce
--- /dev/null
+++ b/libgo/go/runtime/unaligned2.go
@@ -0,0 +1,20 @@
+// Copyright 2014 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// +build arm mips64 mips64le armbe m68k mipso32 mipsn32 mips mipsle sparc alpha ia64 mipso64 mipsn64 mips64p32 mips64p32le sparc64
+
+package runtime
+
+import "unsafe"
+
+// Note: These routines perform the read with an unspecified endianness.
+func readUnaligned32(p unsafe.Pointer) uint32 {
+	q := (*[4]byte)(p)
+	return uint32(q[0]) + uint32(q[1])<<8 + uint32(q[2])<<16 + uint32(q[3])<<24
+}
+
+func readUnaligned64(p unsafe.Pointer) uint64 {
+	q := (*[8]byte)(p)
+	return uint64(q[0]) + uint64(q[1])<<8 + uint64(q[2])<<16 + uint64(q[3])<<24 + uint64(q[4])<<32 + uint64(q[5])<<40 + uint64(q[6])<<48 + uint64(q[7])<<56
+}
diff --git a/libgo/runtime/aeshash.c b/libgo/runtime/aeshash.c
new file mode 100644
index 0000000..faa90e0
--- /dev/null
+++ b/libgo/runtime/aeshash.c
@@ -0,0 +1,583 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Hash code using AES intrinsics.
+
+#include "runtime.h"
+
+uintptr aeshashbody(void*, uintptr, uintptr, Slice)
+	__asm__(GOSYM_PREFIX "runtime.aeshashbody");
+
+uintptr aeshashbody(void*, uintptr, uintptr, Slice)
+	__attribute__((no_split_stack));
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#include <emmintrin.h>
+#include <tmmintrin.h>
+#include <wmmintrin.h>
+
+// Force appropriate CPU level.  We won't call here unless the CPU
+// supports it.
+
+#pragma GCC target("ssse3", "aes")
+
+#ifdef __x86_64__
+
+// aeshashbody implements a hash function using AES instructions
+// available in recent x86 processors. Note this is not encryption,
+// just hashing.
+//
+// This is written to produce exactly the same results as the gc
+// implementation, not because that matters, but just to ensure that
+// this does something reasonable.
+uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
+	__m128i mseed, mseed2, mseed3, mseed4, mseed5, mseed6, mseed7, mseed8;
+	__m128i mval, mval2, mval3, mval4, mval5, mval6, mval7, mval8;
+
+	// Start with hash seed.
+	mseed = _mm_cvtsi64_si128(seed);
+	// Get 16 bits of length.
+	mseed = _mm_insert_epi16(mseed, size, 4);
+	// Repeat length 4 times total.
+	mseed = _mm_shufflehi_epi16(mseed, 0);
+	// Save unscrambled seed.
+	mseed2 = mseed;
+	// XOR in per-process seed.
+	mseed ^= _mm_loadu_si128(aeskeysched.__values);
+	// Scramble seed.
+	mseed = _mm_aesenc_si128(mseed, mseed);
+
+	if (size <= 16) {
+		if (size == 0) {
+			// Return scrambled input seed.
+			return _mm_cvtsi128_si64(_mm_aesenc_si128(mseed, mseed));
+		} else if (size < 16) {
+			if ((((uintptr)(p) + 16) & 0xff0) != 0) {
+				static const uint64 masks[32]
+				  __attribute__ ((aligned(16))) =
+				  {
+				    0x0000000000000000, 0x0000000000000000,
+				    0x00000000000000ff, 0x0000000000000000,
+				    0x000000000000ffff, 0x0000000000000000,
+				    0x0000000000ffffff, 0x0000000000000000,
+				    0x00000000ffffffff, 0x0000000000000000,
+				    0x000000ffffffffff, 0x0000000000000000,
+				    0x0000ffffffffffff, 0x0000000000000000,
+				    0x00ffffffffffffff, 0x0000000000000000,
+				    0xffffffffffffffff, 0x0000000000000000,
+				    0xffffffffffffffff, 0x00000000000000ff,
+				    0xffffffffffffffff, 0x000000000000ffff,
+				    0xffffffffffffffff, 0x0000000000ffffff,
+				    0xffffffffffffffff, 0x00000000ffffffff,
+				    0xffffffffffffffff, 0x000000ffffffffff,
+				    0xffffffffffffffff, 0x0000ffffffffffff,
+				    0xffffffffffffffff, 0x00ffffffffffffff
+				  };
+
+				// 16 bytes loaded at p won't cross a page
+				// boundary, so we can load directly.
+				mval = _mm_loadu_si128(p);
+				mval &= *(const __m128i*)(&masks[size*2]);
+			} else {
+				static const uint64 shifts[32]
+				  __attribute__ ((aligned(16))) =
+				  {
+				    0x0000000000000000, 0x0000000000000000,
+				    0xffffffffffffff0f, 0xffffffffffffffff,
+				    0xffffffffffff0f0e, 0xffffffffffffffff,
+				    0xffffffffff0f0e0d, 0xffffffffffffffff,
+				    0xffffffff0f0e0d0c, 0xffffffffffffffff,
+				    0xffffff0f0e0d0c0b, 0xffffffffffffffff,
+				    0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
+				    0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
+				    0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
+				    0x0e0d0c0b0a090807, 0xffffffffffffff0f,
+				    0x0d0c0b0a09080706, 0xffffffffffff0f0e,
+				    0x0c0b0a0908070605, 0xffffffffff0f0e0d,
+				    0x0b0a090807060504, 0xffffffff0f0e0d0c,
+				    0x0a09080706050403, 0xffffff0f0e0d0c0b,
+				    0x0908070605040302, 0xffff0f0e0d0c0b0a,
+				    0x0807060504030201, 0xff0f0e0d0c0b0a09,
+				  };
+
+				// address ends in 1111xxxx. Might be
+				// up against a page boundary, so load
+				// ending at last byte.  Then shift
+				// bytes down using pshufb.
+				mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
+				mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
+			}
+		} else {
+			mval = _mm_loadu_si128(p);
+		}
+
+		// XOR data with seed.
+		mval ^= mseed;
+		// Scramble combo 3 times.
+		mval = _mm_aesenc_si128(mval, mval);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval = _mm_aesenc_si128(mval, mval);
+		return _mm_cvtsi128_si64(mval);
+	} else if (size <= 32) {
+		// Make second starting seed.
+		mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+		mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+		// Load data to be hashed.
+		mval = _mm_loadu_si128(p);
+		mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
+		// XOR with seed.
+		mval ^= mseed;
+		mval2 ^= mseed2;
+		// Scramble 3 times.
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		// Combine results.
+		mval ^= mval2;
+		return _mm_cvtsi128_si64(mval);
+	} else if (size <= 64) {
+		// Make 3 more starting seeds.
+		mseed3 = mseed2;
+		mseed4 = mseed2;
+		mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+		mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+		mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+		mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+		mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+		mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+
+		mval = _mm_loadu_si128(p);
+		mval2 = _mm_loadu_si128((void*)((char*)p + 16));
+		mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
+		mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+		mval ^= mseed;
+		mval2 ^= mseed2;
+		mval3 ^= mseed3;
+		mval4 ^= mseed4;
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+
+		mval ^= mval3;
+		mval2 ^= mval4;
+		mval ^= mval2;
+		return _mm_cvtsi128_si64(mval);
+	} else if (size <= 128) {
+		// Make 7 more starting seeds.
+		mseed3 = mseed2;
+		mseed4 = mseed2;
+		mseed5 = mseed2;
+		mseed6 = mseed2;
+		mseed7 = mseed2;
+		mseed8 = mseed2;
+		mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+		mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+		mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+		mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
+		mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
+		mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
+		mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
+		mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+		mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+		mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+		mseed5 = _mm_aesenc_si128(mseed5, mseed5);
+		mseed6 = _mm_aesenc_si128(mseed6, mseed6);
+		mseed7 = _mm_aesenc_si128(mseed7, mseed7);
+		mseed8 = _mm_aesenc_si128(mseed8, mseed8);
+
+		// Load data.
+		mval = _mm_loadu_si128(p);
+		mval2 = _mm_loadu_si128((void*)((char*)p + 16));
+		mval3 = _mm_loadu_si128((void*)((char*)p + 32));
+		mval4 = _mm_loadu_si128((void*)((char*)p + 48));
+		mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
+		mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
+		mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
+		mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+		// XOR with seed.
+		mval ^= mseed;
+		mval2 ^= mseed2;
+		mval3 ^= mseed3;
+		mval4 ^= mseed4;
+		mval5 ^= mseed5;
+		mval6 ^= mseed6;
+		mval7 ^= mseed7;
+		mval8 ^= mseed8;
+
+		// Scramble 3 times.
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+		mval5 = _mm_aesenc_si128(mval5, mval5);
+		mval6 = _mm_aesenc_si128(mval6, mval6);
+		mval7 = _mm_aesenc_si128(mval7, mval7);
+		mval8 = _mm_aesenc_si128(mval8, mval8);
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+		mval5 = _mm_aesenc_si128(mval5, mval5);
+		mval6 = _mm_aesenc_si128(mval6, mval6);
+		mval7 = _mm_aesenc_si128(mval7, mval7);
+		mval8 = _mm_aesenc_si128(mval8, mval8);
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+		mval5 = _mm_aesenc_si128(mval5, mval5);
+		mval6 = _mm_aesenc_si128(mval6, mval6);
+		mval7 = _mm_aesenc_si128(mval7, mval7);
+		mval8 = _mm_aesenc_si128(mval8, mval8);
+
+		// Combine results.
+		mval ^= mval5;
+		mval2 ^= mval6;
+		mval3 ^= mval7;
+		mval4 ^= mval8;
+		mval ^= mval3;
+		mval2 ^= mval4;
+		mval ^= mval2;
+		return _mm_cvtsi128_si64(mval);
+	} else {
+		// Make 7 more starting seeds.
+		mseed3 = mseed2;
+		mseed4 = mseed2;
+		mseed5 = mseed2;
+		mseed6 = mseed2;
+		mseed7 = mseed2;
+		mseed8 = mseed2;
+		mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+		mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+		mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+		mseed5 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 64));
+		mseed6 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 80));
+		mseed7 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 96));
+		mseed8 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 112));
+		mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+		mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+		mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+		mseed5 = _mm_aesenc_si128(mseed5, mseed5);
+		mseed6 = _mm_aesenc_si128(mseed6, mseed6);
+		mseed7 = _mm_aesenc_si128(mseed7, mseed7);
+		mseed8 = _mm_aesenc_si128(mseed8, mseed8);
+
+		// Start with last (possibly overlapping) block.
+		mval = _mm_loadu_si128((void*)((char*)p + size - 128));
+		mval2 = _mm_loadu_si128((void*)((char*)p + size - 112));
+		mval3 = _mm_loadu_si128((void*)((char*)p + size - 96));
+		mval4 = _mm_loadu_si128((void*)((char*)p + size - 80));
+		mval5 = _mm_loadu_si128((void*)((char*)p + size - 64));
+		mval6 = _mm_loadu_si128((void*)((char*)p + size - 48));
+		mval7 = _mm_loadu_si128((void*)((char*)p + size - 32));
+		mval8 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+		// XOR in seed.
+		mval ^= mseed;
+		mval2 ^= mseed2;
+		mval3 ^= mseed3;
+		mval4 ^= mseed4;
+		mval5 ^= mseed5;
+		mval6 ^= mseed6;
+		mval7 ^= mseed7;
+		mval8 ^= mseed8;
+
+		// Compute number of remaining 128-byte blocks.
+		size--;
+		size >>= 7;
+		do {
+			// Scramble state.
+			mval = _mm_aesenc_si128(mval, mval);
+			mval2 = _mm_aesenc_si128(mval2, mval2);
+			mval3 = _mm_aesenc_si128(mval3, mval3);
+			mval4 = _mm_aesenc_si128(mval4, mval4);
+			mval5 = _mm_aesenc_si128(mval5, mval5);
+			mval6 = _mm_aesenc_si128(mval6, mval6);
+			mval7 = _mm_aesenc_si128(mval7, mval7);
+			mval8 = _mm_aesenc_si128(mval8, mval8);
+
+			// Scramble state, XOR in a block.
+			mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
+			mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
+			mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
+			mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
+			mval5 = _mm_aesenc_si128(mval5, _mm_loadu_si128((void*)((char*)p + 64)));
+			mval6 = _mm_aesenc_si128(mval6, _mm_loadu_si128((void*)((char*)p + 80)));
+			mval7 = _mm_aesenc_si128(mval7, _mm_loadu_si128((void*)((char*)p + 96)));
+			mval8 = _mm_aesenc_si128(mval8, _mm_loadu_si128((void*)((char*)p + 112)));
+
+			p = (void*)((char*)p + 128);
+		} while (--size > 0);
+
+		// 3 more scrambles to finish.
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+		mval5 = _mm_aesenc_si128(mval5, mval5);
+		mval6 = _mm_aesenc_si128(mval6, mval6);
+		mval7 = _mm_aesenc_si128(mval7, mval7);
+		mval8 = _mm_aesenc_si128(mval8, mval8);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+		mval5 = _mm_aesenc_si128(mval5, mval5);
+		mval6 = _mm_aesenc_si128(mval6, mval6);
+		mval7 = _mm_aesenc_si128(mval7, mval7);
+		mval8 = _mm_aesenc_si128(mval8, mval8);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+		mval5 = _mm_aesenc_si128(mval5, mval5);
+		mval6 = _mm_aesenc_si128(mval6, mval6);
+		mval7 = _mm_aesenc_si128(mval7, mval7);
+		mval8 = _mm_aesenc_si128(mval8, mval8);
+
+		mval ^= mval5;
+		mval2 ^= mval6;
+		mval3 ^= mval7;
+		mval4 ^= mval8;
+		mval ^= mval3;
+		mval2 ^= mval4;
+		mval ^= mval2;
+		return _mm_cvtsi128_si64(mval);
+	}
+}
+
+#else // !defined(__x86_64__)
+
+// The 32-bit version of aeshashbody.
+
+uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
+	__m128i mseed, mseed2, mseed3, mseed4;
+	__m128i mval, mval2, mval3, mval4;
+
+	// Start with hash seed.
+	mseed = _mm_cvtsi32_si128(seed);
+	// Get 16 bits of length.
+	mseed = _mm_insert_epi16(mseed, size, 4);
+	// Replace size with its low 2 bytes repeated 4 times.
+	mseed = _mm_shufflehi_epi16(mseed, 0);
+	// Save unscrambled seed.
+	mseed2 = mseed;
+	// XOR in per-process seed.
+	mseed ^= _mm_loadu_si128(aeskeysched.__values);
+	// Scramble seed.
+	mseed = _mm_aesenc_si128(mseed, mseed);
+
+	if (size <= 16) {
+		if (size == 0) {
+			// Return scrambled input seed.
+			return _mm_cvtsi128_si32(_mm_aesenc_si128(mseed, mseed));
+		} else if (size < 16) {
+			if ((((uintptr)(p) + 16) & 0xff0) != 0) {
+				static const uint64 masks[32]
+				  __attribute__ ((aligned(16))) =
+				  {
+				    0x0000000000000000, 0x0000000000000000,
+				    0x00000000000000ff, 0x0000000000000000,
+				    0x000000000000ffff, 0x0000000000000000,
+				    0x0000000000ffffff, 0x0000000000000000,
+				    0x00000000ffffffff, 0x0000000000000000,
+				    0x000000ffffffffff, 0x0000000000000000,
+				    0x0000ffffffffffff, 0x0000000000000000,
+				    0x00ffffffffffffff, 0x0000000000000000,
+				    0xffffffffffffffff, 0x0000000000000000,
+				    0xffffffffffffffff, 0x00000000000000ff,
+				    0xffffffffffffffff, 0x000000000000ffff,
+				    0xffffffffffffffff, 0x0000000000ffffff,
+				    0xffffffffffffffff, 0x00000000ffffffff,
+				    0xffffffffffffffff, 0x000000ffffffffff,
+				    0xffffffffffffffff, 0x0000ffffffffffff,
+				    0xffffffffffffffff, 0x00ffffffffffffff
+				  };
+
+				// 16 bytes loaded at p won't cross a page
+				// boundary, so we can load it directly.
+				mval = _mm_loadu_si128(p);
+				mval &= *(const __m128i*)(&masks[size*2]);
+			} else {
+				static const uint64 shifts[32]
+				  __attribute__ ((aligned(16))) =
+				  {
+				    0x0000000000000000, 0x0000000000000000,
+				    0xffffffffffffff0f, 0xffffffffffffffff,
+				    0xffffffffffff0f0e, 0xffffffffffffffff,
+				    0xffffffffff0f0e0d, 0xffffffffffffffff,
+				    0xffffffff0f0e0d0c, 0xffffffffffffffff,
+				    0xffffff0f0e0d0c0b, 0xffffffffffffffff,
+				    0xffff0f0e0d0c0b0a, 0xffffffffffffffff,
+				    0xff0f0e0d0c0b0a09, 0xffffffffffffffff,
+				    0x0f0e0d0c0b0a0908, 0xffffffffffffffff,
+				    0x0e0d0c0b0a090807, 0xffffffffffffff0f,
+				    0x0d0c0b0a09080706, 0xffffffffffff0f0e,
+				    0x0c0b0a0908070605, 0xffffffffff0f0e0d,
+				    0x0b0a090807060504, 0xffffffff0f0e0d0c,
+				    0x0a09080706050403, 0xffffff0f0e0d0c0b,
+				    0x0908070605040302, 0xffff0f0e0d0c0b0a,
+				    0x0807060504030201, 0xff0f0e0d0c0b0a09,
+				  };
+
+				// address ends in 1111xxxx. Might be
+				// up against a page boundary, so load
+				// ending at last byte.  Then shift
+				// bytes down using pshufb.
+				mval = _mm_loadu_si128((void*)((char*)p - 16 + size));
+				mval = _mm_shuffle_epi8(mval, *(const __m128i*)(&shifts[size*2]));
+			}
+		} else {
+			mval = _mm_loadu_si128(p);
+		}
+
+		// Scramble input, XOR in seed.
+		mval = _mm_aesenc_si128(mval, mseed);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval = _mm_aesenc_si128(mval, mval);
+		return _mm_cvtsi128_si32(mval);
+	} else if (size <= 32) {
+		// Make second starting seed.
+		mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+		mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+		// Load data to be hashed.
+		mval = _mm_loadu_si128(p);
+		mval2 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+		// Scramble 3 times.
+		mval = _mm_aesenc_si128(mval, mseed);
+		mval2 = _mm_aesenc_si128(mval2, mseed2);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+
+		// Combine results.
+		mval ^= mval2;
+		return _mm_cvtsi128_si32(mval);
+	} else if (size <= 64) {
+		// Make 3 more starting seeds.
+		mseed3 = mseed2;
+		mseed4 = mseed2;
+		mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+		mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+		mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+		mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+		mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+		mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+
+		mval = _mm_loadu_si128(p);
+		mval2 = _mm_loadu_si128((void*)((char*)p + 16));
+		mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
+		mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+		mval = _mm_aesenc_si128(mval, mseed);
+		mval2 = _mm_aesenc_si128(mval2, mseed2);
+		mval3 = _mm_aesenc_si128(mval3, mseed3);
+		mval4 = _mm_aesenc_si128(mval4, mseed4);
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+
+		mval ^= mval3;
+		mval2 ^= mval4;
+		mval ^= mval2;
+		return _mm_cvtsi128_si32(mval);
+	} else {
+		// Make 3 more starting seeds.
+		mseed3 = mseed2;
+		mseed4 = mseed2;
+		mseed2 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 16));
+		mseed3 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 32));
+		mseed4 ^= _mm_loadu_si128((void*)((char*)aeskeysched.__values + 48));
+		mseed2 = _mm_aesenc_si128(mseed2, mseed2);
+		mseed3 = _mm_aesenc_si128(mseed3, mseed3);
+		mseed4 = _mm_aesenc_si128(mseed4, mseed4);
+
+		// Start with last (possibly overlapping) block.
+		mval = _mm_loadu_si128((void*)((char*)p + size - 64));
+		mval2 = _mm_loadu_si128((void*)((char*)p + size - 48));
+		mval3 = _mm_loadu_si128((void*)((char*)p + size - 32));
+		mval4 = _mm_loadu_si128((void*)((char*)p + size - 16));
+
+		// Scramble state once.
+		mval = _mm_aesenc_si128(mval, mseed);
+		mval2 = _mm_aesenc_si128(mval2, mseed2);
+		mval3 = _mm_aesenc_si128(mval3, mseed3);
+		mval4 = _mm_aesenc_si128(mval4, mseed4);
+
+		// Compute number of remaining 64-byte blocks.
+		size--;
+		size >>= 6;
+		do {
+			// Scramble state, XOR in a block.
+			mval = _mm_aesenc_si128(mval, _mm_loadu_si128(p));
+			mval2 = _mm_aesenc_si128(mval2, _mm_loadu_si128((void*)((char*)p + 16)));
+			mval3 = _mm_aesenc_si128(mval3, _mm_loadu_si128((void*)((char*)p + 32)));
+			mval4 = _mm_aesenc_si128(mval4, _mm_loadu_si128((void*)((char*)p + 48)));
+
+			// Scramble state.
+			mval = _mm_aesenc_si128(mval, mval);
+			mval2 = _mm_aesenc_si128(mval2, mval2);
+			mval3 = _mm_aesenc_si128(mval3, mval3);
+			mval4 = _mm_aesenc_si128(mval4, mval4);
+
+			p = (void*)((char*)p + 64);
+		} while (--size > 0);
+
+		// 2 more scrambles to finish.
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+
+		mval = _mm_aesenc_si128(mval, mval);
+		mval2 = _mm_aesenc_si128(mval2, mval2);
+		mval3 = _mm_aesenc_si128(mval3, mval3);
+		mval4 = _mm_aesenc_si128(mval4, mval4);
+
+		mval ^= mval3;
+		mval2 ^= mval4;
+		mval ^= mval2;
+		return _mm_cvtsi128_si32(mval);
+	}
+}
+
+#endif // !defined(__x86_64__)
+
+#else // !defined(__i386__) && !defined(__x86_64__)
+
+uintptr aeshashbody(void* p, uintptr seed, uintptr size, Slice aeskeysched) {
+	// We should never get here on a non-x86 system.
+	runtime_throw("impossible call to aeshashbody");
+}
+
+#endif // !defined(__i386__) && !defined(__x86_64__)
diff --git a/libgo/runtime/go-libmain.c b/libgo/runtime/go-libmain.c
index 6884f3a..c62ad93 100644
--- a/libgo/runtime/go-libmain.c
+++ b/libgo/runtime/go-libmain.c
@@ -61,6 +61,7 @@ initfn (int argc, char **argv, char** env __attribute__ ((unused)))
 
   runtime_isarchive = true;
 
+  runtime_cpuinit ();
   runtime_initsig(true);
 
   a = (struct args *) malloc (sizeof *a);
diff --git a/libgo/runtime/go-main.c b/libgo/runtime/go-main.c
index ff2958c..622a77d 100644
--- a/libgo/runtime/go-main.c
+++ b/libgo/runtime/go-main.c
@@ -47,6 +47,7 @@ main (int argc, char **argv)
   runtime_isstarted = true;
 
   __go_end = (uintptr)_end;
+  runtime_cpuinit ();
   runtime_check ();
   runtime_args (argc, (byte **) argv);
   runtime_osinit ();
diff --git a/libgo/runtime/go-type-identity.c b/libgo/runtime/go-type-identity.c
index d58aa75..842fa24 100644
--- a/libgo/runtime/go-type-identity.c
+++ b/libgo/runtime/go-type-identity.c
@@ -9,44 +9,14 @@
 #include "runtime.h"
 #include "go-type.h"
 
-/* An identity hash function for a type.  This is used for types where
-   we can simply use the type value itself as a hash code.  This is
-   true of, e.g., integers and pointers.  */
+/* The hash functions for types that can compare as identity is
+   written in Go.  */
 
-uintptr_t
-__go_type_hash_identity (const void *key, uintptr_t seed, uintptr_t key_size)
-{
-  uintptr_t ret;
-  uintptr_t i;
-  const unsigned char *p;
-
-  if (key_size <= 8)
-    {
-      union
-      {
-	uint64 v;
-	unsigned char a[8];
-      } u;
-      u.v = 0;
-#ifdef WORDS_BIGENDIAN
-      __builtin_memcpy (&u.a[8 - key_size], key, key_size);
-#else
-      __builtin_memcpy (&u.a[0], key, key_size);
-#endif
-      if (sizeof (uintptr_t) >= 8)
-	return (uintptr_t) u.v ^ seed;
-      else
-	return (uintptr_t) ((u.v >> 32) ^ (u.v & 0xffffffff)) ^ seed;
-    }
-
-  ret = seed;
-  for (i = 0, p = (const unsigned char *) key; i < key_size; i++, p++)
-    ret = ret * 33 + *p;
-  return ret;
-}
+extern uintptr runtime_memhash(void *, uintptr, uintptr)
+  __asm__ (GOSYM_PREFIX "runtime.memhash");
 
 const FuncVal __go_type_hash_identity_descriptor =
-  { (void *) __go_type_hash_identity };
+  { (void *) runtime_memhash };
 
 /* An identity equality function for a type.  This is used for types
    where we can check for equality by checking that the values have
diff --git a/libgo/runtime/go-type.h b/libgo/runtime/go-type.h
index 7c3149b..2d5965c 100644
--- a/libgo/runtime/go-type.h
+++ b/libgo/runtime/go-type.h
@@ -362,7 +362,6 @@ extern _Bool
 __go_type_descriptors_equal(const struct __go_type_descriptor*,
 			    const struct __go_type_descriptor*);
 
-extern uintptr_t __go_type_hash_identity (const void *, uintptr_t, uintptr_t);
 extern const FuncVal __go_type_hash_identity_descriptor;
 extern _Bool __go_type_equal_identity (const void *, const void *, uintptr_t);
 extern const FuncVal __go_type_equal_identity_descriptor;
diff --git a/libgo/runtime/proc.c b/libgo/runtime/proc.c
index dd5562b..be7e083 100644
--- a/libgo/runtime/proc.c
+++ b/libgo/runtime/proc.c
@@ -455,7 +455,8 @@ runtime_schedinit(void)
 	// runtime_symtabinit();
 	runtime_mallocinit();
 	mcommoninit(m);
-	
+	runtime_alginit(); // maps must not be used before this call
+
 	// Initialize the itable value for newErrorCString,
 	// so that the next time it gets called, possibly
 	// in a fault during a garbage collection, it will not
diff --git a/libgo/runtime/runtime.h b/libgo/runtime/runtime.h
index f793fea..424b429 100644
--- a/libgo/runtime/runtime.h
+++ b/libgo/runtime/runtime.h
@@ -265,6 +265,8 @@ struct __go_func_type;
 void	runtime_args(int32, byte**)
   __asm__ (GOSYM_PREFIX "runtime.args");
 void	runtime_osinit();
+void	runtime_alginit(void)
+  __asm__ (GOSYM_PREFIX "runtime.alginit");
 void	runtime_goargs(void)
   __asm__ (GOSYM_PREFIX "runtime.goargs");
 void	runtime_goenvs(void);
@@ -592,3 +594,7 @@ extern void *getitab(const struct __go_type_descriptor *,
 		     const struct __go_type_descriptor *,
 		     _Bool)
   __asm__ (GOSYM_PREFIX "runtime.getitab");
+
+extern void runtime_cpuinit(void);
+extern void setCpuidECX(uint32)
+  __asm__ (GOSYM_PREFIX "runtime.setCpuidECX");
diff --git a/libgo/runtime/runtime_c.c b/libgo/runtime/runtime_c.c
index 16be089..3387401 100644
--- a/libgo/runtime/runtime_c.c
+++ b/libgo/runtime/runtime_c.c
@@ -6,6 +6,10 @@
 #include <signal.h>
 #include <unistd.h>
 
+#if defined(__i386__) || defined(__x86_64__)
+#include <cpuid.h>
+#endif
+
 #include "config.h"
 
 #include "runtime.h"
@@ -204,3 +208,18 @@ go_errno()
 {
   return (intgo)errno;
 }
+
+// CPU-specific initialization.
+// Fetch CPUID info on x86.
+
+void
+runtime_cpuinit()
+{
+#if defined(__i386__) || defined(__x86_64__)
+	unsigned int eax, ebx, ecx, edx;
+
+	if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
+		setCpuidECX(ecx);
+	}
+#endif
+}
-- 
cgit v1.1