aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2012-08-10 22:46:15 +0200
committerUros Bizjak <uros@gcc.gnu.org>2012-08-10 22:46:15 +0200
commitb0d5396c7ea5e8ff6952598475244fcbd3d1276e (patch)
tree582176ee20551cd15e50bf1ca96e53168a2f7b29 /gcc
parenta2a40ee85be27a37cb84341fb21b6abc497be175 (diff)
downloadgcc-b0d5396c7ea5e8ff6952598475244fcbd3d1276e.zip
gcc-b0d5396c7ea5e8ff6952598475244fcbd3d1276e.tar.gz
gcc-b0d5396c7ea5e8ff6952598475244fcbd3d1276e.tar.bz2
* config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>,
*fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>, *fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns. From-SVN: r190304
Diffstat (limited to 'gcc')
-rw-r--r--gcc/ChangeLog6
-rw-r--r--gcc/config/i386/sse.md340
2 files changed, 174 insertions, 172 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6fe9b36..1620bc8 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
2012-08-10 Uros Bizjak <ubizjak@gmail.com>
+ * config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>,
+ *fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>,
+ *fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns.
+
+2012-08-10 Uros Bizjak <ubizjak@gmail.com>
+
* config/i386/i386.md (simple LEA peephole2s): Add zero-extend
variants of PLUS and MULT simple LEA patterns. Disable PLUS
patterns for TARGET_OPT_AGU.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 532ebdd..641a3ba 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -403,8 +403,6 @@
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
-
;; Mapping of immediate bits for blend instructions
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
@@ -1886,12 +1884,13 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 floating point multiply/accumulate instructions. This
-;; includes the scalar version of the instructions as well as the
-;; vector.
+;; FMA floating point multiply/accumulate instructions. These include
+;; scalar versions of the instructions as well as vector versions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
+
;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
;; combine to generate a multiply/add with two memory references. We then
;; split this insn, into loading up the destination register with one of the
@@ -1907,8 +1906,6 @@
;; We could now properly represent that only one memory operand is
;; allowed and not be penalized during optimization.
-;; Intrinsic FMA operations.
-
;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
@@ -1942,7 +1939,7 @@
(neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
-;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
+;; The builtin for intrinsics is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
@@ -1951,7 +1948,71 @@
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"TARGET_FMA || TARGET_FMA4")
-(define_insn "*fma4i_fmadd_<mode>"
+;; FMA3 version
+
+(define_insn "*fma_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+ "TARGET_FMA"
+ "@
+ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:FMAMODE
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+ "TARGET_FMA"
+ "@
+ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fnmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (neg:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+ "TARGET_FMA"
+ "@
+ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fnmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (neg:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:FMAMODE
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+ "TARGET_FMA"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA4 version
+
+(define_insn "*fma4_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
@@ -1962,7 +2023,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fmsub_<mode>"
+(define_insn "*fma4_fmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
@@ -1974,7 +2035,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmadd_<mode>"
+(define_insn "*fma4_fnmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(neg:FMAMODE
@@ -1986,7 +2047,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmsub_<mode>"
+(define_insn "*fma4_fnmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(neg:FMAMODE
@@ -1999,22 +2060,88 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-;; Scalar versions of the above. Unlike ADDSS et al, these write the
-;; entire destination register, with the high-order elements zeroed.
+;; FMA parallel floating point multiply addsub and subadd operations.
-(define_expand "fma4i_vmfmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand")
- (vec_merge:VF_128
- (fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand")
- (match_operand:VF_128 2 "nonimmediate_operand")
- (match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 4)
- (const_int 1)))]
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;; (fma op1 op2 op3)
+;; (fma op1 op2 (neg op3))
+;; (merge-const))
+;;
+;; But this doesn't seem useful in practice.
+
+(define_expand "fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand")
+ (match_operand:VF 2 "nonimmediate_operand")
+ (match_operand:VF 3 "nonimmediate_operand")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4")
+
+;; FMA3 version
+
+(define_insn "*fma_fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsubadd_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:VF
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA4 version
+
+(define_insn "*fma4_fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
+ (match_operand:VF 2 "nonimmediate_operand" " x,m")
+ (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
+ UNSPEC_FMADDSUB))]
"TARGET_FMA4"
-{
- operands[4] = CONST0_RTX (<MODE>mode);
-})
+ "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma4_fmsubadd_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
+ (match_operand:VF 2 "nonimmediate_operand" " x,m")
+ (neg:VF
+ (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA4"
+ "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA3 floating point scalar intrinsics. These merge result with
+;; high-order elements from the destination register.
(define_expand "fmai_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
@@ -2099,6 +2226,21 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+;; FMA4 floating point scalar intrinsics. These write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_FMA4"
+ "operands[4] = CONST0_RTX (<MODE>mode);")
+
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
@@ -2161,152 +2303,6 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 Parallel floating point multiply addsub and subadd operations.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; It would be possible to represent these without the UNSPEC as
-;;
-;; (vec_merge
-;; (fma op1 op2 op3)
-;; (fma op1 op2 (neg op3))
-;; (merge-const))
-;;
-;; But this doesn't seem useful in practice.
-
-(define_expand "fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")
- (match_operand:VF 3 "nonimmediate_operand")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4")
-
-(define_insn "*fma4_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma4_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; FMA3 floating point multiply/accumulate instructions.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "*fma_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;