rs6000-string.c (expand_block_move): Allow the use of unaligned VSX load/store on P8/P9.

2018-01-02 Aaron Sawdey <acsawdey@linux.vnet.ibm.com> * config/rs6000/rs6000-string.c (expand_block_move): Allow the use of unaligned VSX load/store on P8/P9. (expand_block_clear): Allow the use of unaligned VSX load/store on P8/P9. From-SVN: r256112
author: Aaron Sawdey <acsawdey@linux.vnet.ibm.com> 2018-01-02 23:01:43 +0000
committer: Aaron Sawdey <acsawdey@gcc.gnu.org> 2018-01-02 17:01:43 -0600
commit: 3b0cb1a553831f9119995362d8b6d982a0ba1b6b (patch)
tree: 3f83fbdb295a628cd6b19b4102ccd6e25271c357 /gcc
parent: 6012c652c778abaf9fb07fb1628435ff055dd349 (diff)
download: gcc-3b0cb1a553831f9119995362d8b6d982a0ba1b6b.zip
gcc-3b0cb1a553831f9119995362d8b6d982a0ba1b6b.tar.gz
gcc-3b0cb1a553831f9119995362d8b6d982a0ba1b6b.tar.bz2
2 files changed, 10 insertions, 3 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index bdf32da..b977543 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2018-01-02  Aaron Sawdey  <acsawdey@linux.vnet.ibm.com>
+
+        * config/rs6000/rs6000-string.c (expand_block_move): Allow the use of
+        unaligned VSX load/store on P8/P9.
+        (expand_block_clear): Allow the use of unaligned VSX
+	load/store on P8/P9.
+
 2018-01-02  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
 
 	* config/rs6000/rs6000-p8swap.c (swap_feeds_both_load_and_store):
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 7e1976b..23de925 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -73,7 +73,7 @@ expand_block_clear (rtx operands[])
      When optimize_size, avoid any significant code bloat; calling
      memset is about 4 instructions, so allow for one instruction to
      load zero and three to do clearing.  */
-  if (TARGET_ALTIVEC && align >= 128)
+  if (TARGET_ALTIVEC && (align >= 128 || TARGET_EFFICIENT_UNALIGNED_VSX))
     clear_step = 16;
   else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
     clear_step = 8;
@@ -90,7 +90,7 @@ expand_block_clear (rtx operands[])
       machine_mode mode = BLKmode;
       rtx dest;
 
-      if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
+      if (bytes >= 16 && TARGET_ALTIVEC && (align >= 128 || TARGET_EFFICIENT_UNALIGNED_VSX))
 	{
 	  clear_bytes = 16;
 	  mode = V4SImode;
@@ -1260,7 +1260,7 @@ expand_block_move (rtx operands[])
 
       /* Altivec first, since it will be faster than a string move
 	 when it applies, and usually not significantly larger.  */
-      if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
+      if (TARGET_ALTIVEC && bytes >= 16 && (TARGET_EFFICIENT_UNALIGNED_VSX || align >= 128))
 	{
 	  move_bytes = 16;
 	  mode = V4SImode;
author	Aaron Sawdey <acsawdey@linux.vnet.ibm.com>	2018-01-02 23:01:43 +0000
committer	Aaron Sawdey <acsawdey@gcc.gnu.org>	2018-01-02 17:01:43 -0600
commit	3b0cb1a553831f9119995362d8b6d982a0ba1b6b (patch)
tree	3f83fbdb295a628cd6b19b4102ccd6e25271c357 /gcc
parent	6012c652c778abaf9fb07fb1628435ff055dd349 (diff)
download	gcc-3b0cb1a553831f9119995362d8b6d982a0ba1b6b.zip gcc-3b0cb1a553831f9119995362d8b6d982a0ba1b6b.tar.gz gcc-3b0cb1a553831f9119995362d8b6d982a0ba1b6b.tar.bz2