diff options
author | Uros Bizjak <ubizjak@gmail.com> | 2012-08-10 22:46:15 +0200 |
---|---|---|
committer | Uros Bizjak <uros@gcc.gnu.org> | 2012-08-10 22:46:15 +0200 |
commit | b0d5396c7ea5e8ff6952598475244fcbd3d1276e (patch) | |
tree | 582176ee20551cd15e50bf1ca96e53168a2f7b29 /gcc | |
parent | a2a40ee85be27a37cb84341fb21b6abc497be175 (diff) | |
download | gcc-b0d5396c7ea5e8ff6952598475244fcbd3d1276e.zip gcc-b0d5396c7ea5e8ff6952598475244fcbd3d1276e.tar.gz gcc-b0d5396c7ea5e8ff6952598475244fcbd3d1276e.tar.bz2 |
* config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>,
*fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>,
*fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns.
From-SVN: r190304
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/ChangeLog | 6 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 340 |
2 files changed, 174 insertions, 172 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6fe9b36..1620bc8 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,5 +1,11 @@ 2012-08-10 Uros Bizjak <ubizjak@gmail.com> + * config/i386/sse.md (*fma_fmadd_<mode>, *fma_fmsub_<mode>, + *fma_fnmadd_<mode>, *fma_fnmsub_<mode>, *fma_fmaddsub_<mode>, + *fma_fmsubadd_<mode>): Move FMA3 insn patterns before FMA4 patterns. + +2012-08-10 Uros Bizjak <ubizjak@gmail.com> + * config/i386/i386.md (simple LEA peephole2s): Add zero-extend variants of PLUS and MULT simple LEA patterns. Disable PLUS patterns for TARGET_OPT_AGU. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 532ebdd..641a3ba 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -403,8 +403,6 @@ ;; Mix-n-match (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) -(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) - ;; Mapping of immediate bits for blend instructions (define_mode_attr blendbits [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) @@ -1886,12 +1884,13 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; FMA4 floating point multiply/accumulate instructions. This -;; includes the scalar version of the instructions as well as the -;; vector. +;; FMA floating point multiply/accumulate instructions. These include +;; scalar versions of the instructions as well as vector versions. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF]) + ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow ;; combine to generate a multiply/add with two memory references. We then ;; split this insn, into loading up the destination register with one of the @@ -1907,8 +1906,6 @@ ;; We could now properly represent that only one memory operand is ;; allowed and not be penalized during optimization. -;; Intrinsic FMA operations. - ;; The standard names for fma is only available with SSE math enabled. (define_expand "fma<mode>4" [(set (match_operand:FMAMODE 0 "register_operand") @@ -1942,7 +1939,7 @@ (neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))] "(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH") -;; The builtin for fma4intrin.h is not constrained by SSE math enabled. +;; The builtin for intrinsics is not constrained by SSE math enabled. (define_expand "fma4i_fmadd_<mode>" [(set (match_operand:FMAMODE 0 "register_operand") (fma:FMAMODE @@ -1951,7 +1948,71 @@ (match_operand:FMAMODE 3 "nonimmediate_operand")))] "TARGET_FMA || TARGET_FMA4") -(define_insn "*fma4i_fmadd_<mode>" +;; FMA3 version + +(define_insn "*fma_fmadd_<mode>" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] + "TARGET_FMA" + "@ + vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fma_fmsub_<mode>" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (neg:FMAMODE + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] + "TARGET_FMA" + "@ + vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fma_fnmadd_<mode>" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (neg:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] + "TARGET_FMA" + "@ + vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fma_fnmsub_<mode>" + [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") + (fma:FMAMODE + (neg:FMAMODE + (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) + (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") + (neg:FMAMODE + (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] + "TARGET_FMA" + "@ + vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +;; FMA4 version + +(define_insn "*fma4_fmadd_<mode>" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") @@ -1962,7 +2023,7 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*fma4i_fmsub_<mode>" +(define_insn "*fma4_fmsub_<mode>" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x") @@ -1974,7 +2035,7 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*fma4i_fnmadd_<mode>" +(define_insn "*fma4_fnmadd_<mode>" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (neg:FMAMODE @@ -1986,7 +2047,7 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -(define_insn "*fma4i_fnmsub_<mode>" +(define_insn "*fma4_fnmsub_<mode>" [(set (match_operand:FMAMODE 0 "register_operand" "=x,x") (fma:FMAMODE (neg:FMAMODE @@ -1999,22 +2060,88 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) -;; Scalar versions of the above. Unlike ADDSS et al, these write the -;; entire destination register, with the high-order elements zeroed. +;; FMA parallel floating point multiply addsub and subadd operations. -(define_expand "fma4i_vmfmadd_<mode>" - [(set (match_operand:VF_128 0 "register_operand") - (vec_merge:VF_128 - (fma:VF_128 - (match_operand:VF_128 1 "nonimmediate_operand") - (match_operand:VF_128 2 "nonimmediate_operand") - (match_operand:VF_128 3 "nonimmediate_operand")) - (match_dup 4) - (const_int 1)))] +;; It would be possible to represent these without the UNSPEC as +;; +;; (vec_merge +;; (fma op1 op2 op3) +;; (fma op1 op2 (neg op3)) +;; (merge-const)) +;; +;; But this doesn't seem useful in practice. + +(define_expand "fmaddsub_<mode>" + [(set (match_operand:VF 0 "register_operand") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand") + (match_operand:VF 2 "nonimmediate_operand") + (match_operand:VF 3 "nonimmediate_operand")] + UNSPEC_FMADDSUB))] + "TARGET_FMA || TARGET_FMA4") + +;; FMA3 version + +(define_insn "*fma_fmaddsub_<mode>" + [(set (match_operand:VF 0 "register_operand" "=x,x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")] + UNSPEC_FMADDSUB))] + "TARGET_FMA" + "@ + vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fma_fmsubadd_<mode>" + [(set (match_operand:VF 0 "register_operand" "=x,x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") + (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") + (neg:VF + (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))] + UNSPEC_FMADDSUB))] + "TARGET_FMA" + "@ + vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} + vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} + vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +;; FMA4 version + +(define_insn "*fma4_fmaddsub_<mode>" + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%x,x") + (match_operand:VF 2 "nonimmediate_operand" " x,m") + (match_operand:VF 3 "nonimmediate_operand" "xm,x")] + UNSPEC_FMADDSUB))] "TARGET_FMA4" -{ - operands[4] = CONST0_RTX (<MODE>mode); -}) + "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +(define_insn "*fma4_fmsubadd_<mode>" + [(set (match_operand:VF 0 "register_operand" "=x,x") + (unspec:VF + [(match_operand:VF 1 "nonimmediate_operand" "%x,x") + (match_operand:VF 2 "nonimmediate_operand" " x,m") + (neg:VF + (match_operand:VF 3 "nonimmediate_operand" "xm,x"))] + UNSPEC_FMADDSUB))] + "TARGET_FMA4" + "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "mode" "<MODE>")]) + +;; FMA3 floating point scalar intrinsics. These merge result with +;; high-order elements from the destination register. (define_expand "fmai_vmfmadd_<mode>" [(set (match_operand:VF_128 0 "register_operand") @@ -2099,6 +2226,21 @@ [(set_attr "type" "ssemuladd") (set_attr "mode" "<MODE>")]) +;; FMA4 floating point scalar intrinsics. These write the +;; entire destination register, with the high-order elements zeroed. + +(define_expand "fma4i_vmfmadd_<mode>" + [(set (match_operand:VF_128 0 "register_operand") + (vec_merge:VF_128 + (fma:VF_128 + (match_operand:VF_128 1 "nonimmediate_operand") + (match_operand:VF_128 2 "nonimmediate_operand") + (match_operand:VF_128 3 "nonimmediate_operand")) + (match_dup 4) + (const_int 1)))] + "TARGET_FMA4" + "operands[4] = CONST0_RTX (<MODE>mode);") + (define_insn "*fma4i_vmfmadd_<mode>" [(set (match_operand:VF_128 0 "register_operand" "=x,x") (vec_merge:VF_128 @@ -2161,152 +2303,6 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; FMA4 Parallel floating point multiply addsub and subadd operations. -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;; It would be possible to represent these without the UNSPEC as -;; -;; (vec_merge -;; (fma op1 op2 op3) -;; (fma op1 op2 (neg op3)) -;; (merge-const)) -;; -;; But this doesn't seem useful in practice. - -(define_expand "fmaddsub_<mode>" - [(set (match_operand:VF 0 "register_operand") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand") - (match_operand:VF 2 "nonimmediate_operand") - (match_operand:VF 3 "nonimmediate_operand")] - UNSPEC_FMADDSUB))] - "TARGET_FMA || TARGET_FMA4") - -(define_insn "*fma4_fmaddsub_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (match_operand:VF 3 "nonimmediate_operand" "xm,x")] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma4_fmsubadd_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%x,x") - (match_operand:VF 2 "nonimmediate_operand" " x,m") - (neg:VF - (match_operand:VF 3 "nonimmediate_operand" "xm,x"))] - UNSPEC_FMADDSUB))] - "TARGET_FMA4" - "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; -;; FMA3 floating point multiply/accumulate instructions. -;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(define_insn "*fma_fmadd_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" - "@ - vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma_fmsub_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" - "@ - vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma_fnmadd_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))] - "TARGET_FMA" - "@ - vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma_fnmsub_<mode>" - [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x") - (fma:FMAMODE - (neg:FMAMODE - (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")) - (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm") - (neg:FMAMODE - (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))] - "TARGET_FMA" - "@ - vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma_fmaddsub_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")] - UNSPEC_FMADDSUB))] - "TARGET_FMA" - "@ - vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -(define_insn "*fma_fmsubadd_<mode>" - [(set (match_operand:VF 0 "register_operand" "=x,x,x") - (unspec:VF - [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x") - (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm") - (neg:VF - (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))] - UNSPEC_FMADDSUB))] - "TARGET_FMA" - "@ - vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2} - vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3} - vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "ssemuladd") - (set_attr "mode" "<MODE>")]) - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ;; Parallel single-precision floating point conversion operations ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |