aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/aarch64/aarch64-simd.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/aarch64/aarch64-simd.md')
-rw-r--r--gcc/config/aarch64/aarch64-simd.md81
1 files changed, 81 insertions, 0 deletions
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 12da8be..49f615c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -393,6 +393,87 @@
}
)
+;; These instructions map to the __builtins for the Dot Product operations.
+(define_insn "aarch64_<sur>dot<vsi2qi>"
+ [(set (match_operand:VS 0 "register_operand" "=w")
+ (plus:VS (match_operand:VS 1 "register_operand" "0")
+ (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+ (match_operand:<VSI2QI> 3 "register_operand" "w")]
+ DOTPROD)))]
+ "TARGET_DOTPROD"
+ "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
+ [(set_attr "type" "neon_dot")]
+)
+
+;; These expands map to the Dot Product optab the vectorizer checks for.
+;; The auto-vectorizer expects a dot product builtin that also does an
+;; accumulation into the provided register.
+;; Given the following pattern
+;;
+;; for (i=0; i<len; i++) {
+;; c = a[i] * b[i];
+;; r += c;
+;; }
+;; return result;
+;;
+;; This can be auto-vectorized to
+;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
+;;
+;; given enough iterations. However the vectorizer can keep unrolling the loop
+;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
+;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
+;; ...
+;;
+;; and so the vectorizer provides r, in which the result has to be accumulated.
+(define_expand "<sur>dot_prod<vsi2qi>"
+ [(set (match_operand:VS 0 "register_operand")
+ (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
+ (match_operand:<VSI2QI> 2 "register_operand")]
+ DOTPROD)
+ (match_operand:VS 3 "register_operand")))]
+ "TARGET_DOTPROD"
+{
+ emit_insn (
+ gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
+ operands[2]));
+ emit_insn (gen_rtx_SET (operands[0], operands[3]));
+ DONE;
+})
+
+;; These instructions map to the __builtins for the Dot Product
+;; indexed operations.
+(define_insn "aarch64_<sur>dot_lane<vsi2qi>"
+ [(set (match_operand:VS 0 "register_operand" "=w")
+ (plus:VS (match_operand:VS 1 "register_operand" "0")
+ (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+ (match_operand:V8QI 3 "register_operand" "<h_con>")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ DOTPROD)))]
+ "TARGET_DOTPROD"
+ {
+ operands[4]
+ = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
+ return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
+ }
+ [(set_attr "type" "neon_dot")]
+)
+
+(define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
+ [(set (match_operand:VS 0 "register_operand" "=w")
+ (plus:VS (match_operand:VS 1 "register_operand" "0")
+ (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
+ (match_operand:V16QI 3 "register_operand" "<h_con>")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ DOTPROD)))]
+ "TARGET_DOTPROD"
+ {
+ operands[4]
+ = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4])));
+ return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
+ }
+ [(set_attr "type" "neon_dot")]
+)
+
(define_expand "copysign<mode>3"
[(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")