diff options
Diffstat (limited to 'gcc/config/aarch64/aarch64-simd.md')
-rw-r--r-- | gcc/config/aarch64/aarch64-simd.md | 81 |
1 files changed, 81 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 12da8be..49f615c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -393,6 +393,87 @@ } ) +;; These instructions map to the __builtins for the Dot Product operations. +(define_insn "aarch64_<sur>dot<vsi2qi>" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS (match_operand:VS 1 "register_operand" "0") + (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") + (match_operand:<VSI2QI> 3 "register_operand" "w")] + DOTPROD)))] + "TARGET_DOTPROD" + "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>" + [(set_attr "type" "neon_dot")] +) + +;; These expands map to the Dot Product optab the vectorizer checks for. +;; The auto-vectorizer expects a dot product builtin that also does an +;; accumulation into the provided register. +;; Given the following pattern +;; +;; for (i=0; i<len; i++) { +;; c = a[i] * b[i]; +;; r += c; +;; } +;; return result; +;; +;; This can be auto-vectorized to +;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3]; +;; +;; given enough iterations. However the vectorizer can keep unrolling the loop +;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7]; +;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11]; +;; ... +;; +;; and so the vectorizer provides r, in which the result has to be accumulated. +(define_expand "<sur>dot_prod<vsi2qi>" + [(set (match_operand:VS 0 "register_operand") + (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand") + (match_operand:<VSI2QI> 2 "register_operand")] + DOTPROD) + (match_operand:VS 3 "register_operand")))] + "TARGET_DOTPROD" +{ + emit_insn ( + gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1], + operands[2])); + emit_insn (gen_rtx_SET (operands[0], operands[3])); + DONE; +}) + +;; These instructions map to the __builtins for the Dot Product +;; indexed operations. +(define_insn "aarch64_<sur>dot_lane<vsi2qi>" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS (match_operand:VS 1 "register_operand" "0") + (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") + (match_operand:V8QI 3 "register_operand" "<h_con>") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD)))] + "TARGET_DOTPROD" + { + operands[4] + = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); + return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; + } + [(set_attr "type" "neon_dot")] +) + +(define_insn "aarch64_<sur>dot_laneq<vsi2qi>" + [(set (match_operand:VS 0 "register_operand" "=w") + (plus:VS (match_operand:VS 1 "register_operand" "0") + (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w") + (match_operand:V16QI 3 "register_operand" "<h_con>") + (match_operand:SI 4 "immediate_operand" "i")] + DOTPROD)))] + "TARGET_DOTPROD" + { + operands[4] + = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4]))); + return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; + } + [(set_attr "type" "neon_dot")] +) + (define_expand "copysign<mode>3" [(match_operand:VHSDF 0 "register_operand") (match_operand:VHSDF 1 "register_operand") |