i386.c (ix86_expand_vector_move): Tidy.

* config/i386/i386.c (ix86_expand_vector_move): Tidy. (ix86_expand_vector_move_misalign): New. (ix86_misaligned_mem_ok): Remove. (TARGET_VECTORIZE_MISALIGNED_MEM_OK): Remove. * config/i386/i386-protos.h: Update. * config/i386/i386.md (SSEMODEI): Rename from SSEINT16. (MMXMODEI): Rename from MMXINT8. (SSEMODE, MMXMODE, movmisalign<mode>): New. From-SVN: r92543
author: Richard Henderson <rth@redhat.com> 2004-12-23 02:20:04 -0800
committer: Richard Henderson <rth@gcc.gnu.org> 2004-12-23 02:20:04 -0800
commit: c38573a8d0ada5f22281897f0ee5ecd818156e56 (patch)
tree: 0c3f072d4750018120765dd8de2ddbe8528098e5 /gcc
parent: f98625f6a5077fa1554d7ea94016f452b79a00e2 (diff)
download: gcc-c38573a8d0ada5f22281897f0ee5ecd818156e56.zip
gcc-c38573a8d0ada5f22281897f0ee5ecd818156e56.tar.gz
gcc-c38573a8d0ada5f22281897f0ee5ecd818156e56.tar.bz2
4 files changed, 175 insertions, 33 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 4f8f3a6..1fa3c95 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2004-12-23  Richard Henderson  <rth@redhat.com>
+
+	* config/i386/i386.c (ix86_expand_vector_move): Tidy.
+	(ix86_expand_vector_move_misalign): New.
+	(ix86_misaligned_mem_ok): Remove.
+	(TARGET_VECTORIZE_MISALIGNED_MEM_OK): Remove.
+	* config/i386/i386-protos.h: Update.
+	* config/i386/i386.md (SSEMODEI): Rename from SSEINT16.
+	(MMXMODEI): Rename from MMXINT8.
+	(SSEMODE, MMXMODE, movmisalign<mode>): New.
+
 2004-12-23  Mark Mitchell  <mark@codesourcery.com>
 
 	PR c++/16405
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3ee9b22..58e4e23 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -125,6 +125,7 @@ extern void i386_output_dwarf_dtprel (FILE*, int, rtx);
 extern void ix86_expand_clear (rtx);
 extern void ix86_expand_move (enum machine_mode, rtx[]);
 extern void ix86_expand_vector_move (enum machine_mode, rtx[]);
+extern void ix86_expand_vector_move_misalign (enum machine_mode, rtx[]);
 extern void ix86_expand_binary_operator (enum rtx_code,
 					 enum machine_mode, rtx[]);
 extern int ix86_binary_operator_ok (enum rtx_code, enum machine_mode, rtx[]);
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 2a9dca2..fa6c3b4 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -867,7 +867,6 @@ static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
 static int ix86_issue_rate (void);
 static int ix86_adjust_cost (rtx, rtx, rtx, int);
 static int ia32_multipass_dfa_lookahead (void);
-static bool ix86_misaligned_mem_ok (enum machine_mode);
 static void ix86_init_mmx_sse_builtins (void);
 static rtx x86_this_parameter (tree);
 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
@@ -1010,9 +1009,6 @@ static void init_ext_80387_constants (void);
 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
   ia32_multipass_dfa_lookahead
 
-#undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
-#define TARGET_VECTORIZE_MISALIGNED_MEM_OK ix86_misaligned_mem_ok
-
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
 
@@ -7556,28 +7552,149 @@ ix86_expand_move (enum machine_mode mode, rtx operands[])
 void
 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
 {
+  rtx op0 = operands[0], op1 = operands[1];
+
   /* Force constants other than zero into memory.  We do not know how
      the instructions used to build constants modify the upper 64 bits
      of the register, once we have that information we may be able
      to handle some of them more efficiently.  */
   if ((reload_in_progress | reload_completed) == 0
-      && register_operand (operands[0], mode)
-      && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
-    operands[1] = validize_mem (force_const_mem (mode, operands[1]));
+      && register_operand (op0, mode)
+      && CONSTANT_P (op1) && op1 != CONST0_RTX (mode))
+    op1 = validize_mem (force_const_mem (mode, op1));
 
   /* Make operand1 a register if it isn't already.  */
   if (!no_new_pseudos
-      && !register_operand (operands[0], mode)
-      && !register_operand (operands[1], mode))
+      && !register_operand (op0, mode)
+      && !register_operand (op1, mode))
     {
-      rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
-      emit_move_insn (operands[0], temp);
+      emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
       return;
     }
 
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
+  emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
+}
+
+/* Implement the movmisalign patterns for SSE.  Non-SSE modes go 
+   straight to ix86_expand_vector_move.  */
+
+void
+ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
+{
+  rtx op0, op1, m;
+
+  op0 = operands[0];
+  op1 = operands[1];
+
+  if (MEM_P (op1))
+    {
+      /* If we're optimizing for size, movups is the smallest.  */
+      if (optimize_size)
+	{
+	  op0 = gen_lowpart (V4SFmode, op0);
+	  op1 = gen_lowpart (V4SFmode, op1);
+	  emit_insn (gen_sse_movups (op0, op1));
+	  return;
+	}
+
+      /* ??? If we have typed data, then it would appear that using
+	 movdqu is the only way to get unaligned data loaded with
+	 integer type.  */
+      if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+	{
+	  op0 = gen_lowpart (V16QImode, op0);
+	  op1 = gen_lowpart (V16QImode, op1);
+	  emit_insn (gen_sse2_movdqu (op0, op1));
+	  return;
+	}
+
+      if (TARGET_SSE2 && mode == V2DFmode)
+	{
+	  /* When SSE registers are split into halves, we can avoid
+	     writing to the top half twice.  */
+	  if (TARGET_SSE_SPLIT_REGS)
+	    {
+	      emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+	      m = adjust_address (op1, DFmode, 0);
+	      emit_insn (gen_sse2_loadlpd (op0, op0, m));
+	      m = adjust_address (op1, DFmode, 8);
+	      emit_insn (gen_sse2_loadhpd (op0, op0, m));
+	    }
+	  else
+	    {
+	      /* ??? Not sure about the best option for the Intel chips.
+		 The following would seem to satisfy; the register is
+		 entirely cleared, breaking the dependency chain.  We
+		 then store to the upper half, with a dependency depth
+		 of one.  A rumor has it that Intel recommends two movsd
+		 followed by an unpacklpd, but this is unconfirmed.  And
+		 given that the dependency depth of the unpacklpd would
+		 still be one, I'm not sure why this would be better.  */
+	      m = adjust_address (op1, DFmode, 0);
+	      emit_insn (gen_sse2_loadsd (op0, m));
+	      m = adjust_address (op1, DFmode, 8);
+	      emit_insn (gen_sse2_loadhpd (op0, op0, m));
+	    }
+	}
+      else
+	{
+	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
+	    emit_move_insn (op0, CONST0_RTX (mode));
+	  else
+	    emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+
+	  op0 = gen_lowpart (V4SFmode, op0);
+	  m = adjust_address (op1, V4SFmode, 0);
+	  emit_insn (gen_sse_movlps (op0, op0, m));
+	  m = adjust_address (op1, V4SFmode, 8);
+	  emit_insn (gen_sse_movhps (op0, op0, m));
+	}
+    }
+  else if (MEM_P (op0))
+    {
+      /* If we're optimizing for size, movups is the smallest.  */
+      if (optimize_size)
+	{
+	  op0 = gen_lowpart (V4SFmode, op0);
+	  op1 = gen_lowpart (V4SFmode, op1);
+	  emit_insn (gen_sse_movups (op0, op1));
+	  return;
+	}
+
+      /* ??? Similar to above, only less clear because of quote
+	 typeless stores unquote.  */
+      if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
+	  && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+        {
+	  op0 = gen_lowpart (V16QImode, op0);
+	  op1 = gen_lowpart (V16QImode, op1);
+	  emit_insn (gen_sse2_movdqu (op0, op1));
+	  return;
+	}
+
+      if (TARGET_SSE2 && mode == V2DFmode)
+	{
+	  m = adjust_address (op0, DFmode, 0);
+	  emit_insn (gen_sse2_storelpd (m, op1));
+	  m = adjust_address (op0, DFmode, 8);
+	  emit_insn (gen_sse2_storehpd (m, op1));
+	  return;
+	}
+      else
+	{
+	  op1 = gen_lowpart (V4SFmode, op1);
+	  m = adjust_address (op0, V4SFmode, 0);
+	  emit_insn (gen_sse_movlps (m, m, op1));
+	  m = adjust_address (op0, V4SFmode, 8);
+	  emit_insn (gen_sse_movhps (m, m, op1));
+	  return;
+	}
+    }
+  else
+    gcc_unreachable ();
 }
 
+
 /* Attempt to expand a binary operator.  Make the expansion closer to the
    actual machine, then just general_operand, which will allow 3 separate
    memory references (one output, two input) in a single insn.  */
@@ -11727,17 +11844,6 @@ ia32_multipass_dfa_lookahead (void)
 }
 
 
-/* Implement the target hook targetm.vectorize.misaligned_mem_ok.  */
-
-static bool
-ix86_misaligned_mem_ok (enum machine_mode mode)
-{
-  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
-    return true;
-  else
-    return false;
-}
-
 /* Compute the alignment given to a constant that is being placed in memory.
    EXP is the constant and ALIGN is the alignment that the object would
    ordinarily have.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ff0f9f9..17835c7 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -19789,11 +19789,11 @@
 
 ;; 16 byte integral modes handled by SSE, minus TImode, which gets
 ;; special-cased for TARGET_64BIT.
-(define_mode_macro SSEINT16 [V16QI V8HI V4SI V2DI])
+(define_mode_macro SSEMODEI [V16QI V8HI V4SI V2DI])
 
 (define_expand "mov<mode>"
-  [(set (match_operand:SSEINT16 0 "nonimmediate_operand" "")
-	(match_operand:SSEINT16 1 "nonimmediate_operand" ""))]
+  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "")
+	(match_operand:SSEMODEI 1 "nonimmediate_operand" ""))]
   "TARGET_SSE"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -19801,8 +19801,8 @@
 })
 
 (define_insn "*mov<mode>_internal"
-  [(set (match_operand:SSEINT16 0 "nonimmediate_operand" "=x,x ,m")
-	(match_operand:SSEINT16 1 "vector_move_operand"  "C ,xm,x"))]
+  [(set (match_operand:SSEMODEI 0 "nonimmediate_operand" "=x,x ,m")
+	(match_operand:SSEMODEI 1 "vector_move_operand"  "C ,xm,x"))]
   "TARGET_SSE
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
 {
@@ -19842,11 +19842,11 @@
 	       (const_string "TI")))])
 
 ;; 8 byte integral modes handled by MMX (and by extension, SSE)
-(define_mode_macro MMXINT8 [V8QI V4HI V2SI])
+(define_mode_macro MMXMODEI [V8QI V4HI V2SI])
 
 (define_expand "mov<mode>"
-  [(set (match_operand:MMXINT8 0 "nonimmediate_operand" "")
-	(match_operand:MMXINT8 1 "nonimmediate_operand" ""))]
+  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand" "")
+	(match_operand:MMXMODEI 1 "nonimmediate_operand" ""))]
   "TARGET_MMX"
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -19854,9 +19854,9 @@
 })
 
 (define_insn "*mov<mode>_internal"
-  [(set (match_operand:MMXINT8 0 "nonimmediate_operand"
+  [(set (match_operand:MMXMODEI 0 "nonimmediate_operand"
 					"=y,y ,m,!y,!*Y,*x,?*x,?m")
-	(match_operand:MMXINT8 1 "vector_move_operand"
+	(match_operand:MMXMODEI 1 "vector_move_operand"
 					"C ,ym,y,*Y,y  ,C ,*xm,*x"))]
   "TARGET_MMX
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
@@ -20103,6 +20103,30 @@
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
+;; All 16-byte vector modes handled by SSE
+(define_mode_macro SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "")
+	(match_operand:SSEMODE 1 "nonimmediate_operand" ""))]
+  "TARGET_SSE"
+{
+  ix86_expand_vector_move_misalign (<MODE>mode, operands);
+  DONE;
+})
+
+;; All 8-byte vector modes handled by MMX
+(define_mode_macro MMXMODE [V8QI V4HI V2SI V2SF])
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:MMXMODE 0 "nonimmediate_operand" "")
+	(match_operand:MMXMODE 1 "nonimmediate_operand" ""))]
+  "TARGET_MMX"
+{
+  ix86_expand_vector_move (<MODE>mode, operands);
+  DONE;
+})
+
 ;; These two patterns are useful for specifying exactly whether to use
 ;; movaps or movups
 (define_expand "sse_movaps"
author	Richard Henderson <rth@redhat.com>	2004-12-23 02:20:04 -0800
committer	Richard Henderson <rth@gcc.gnu.org>	2004-12-23 02:20:04 -0800
commit	c38573a8d0ada5f22281897f0ee5ecd818156e56 (patch)
tree	0c3f072d4750018120765dd8de2ddbe8528098e5 /gcc
parent	f98625f6a5077fa1554d7ea94016f452b79a00e2 (diff)
download	gcc-c38573a8d0ada5f22281897f0ee5ecd818156e56.zip gcc-c38573a8d0ada5f22281897f0ee5ecd818156e56.tar.gz gcc-c38573a8d0ada5f22281897f0ee5ecd818156e56.tar.bz2