; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py ; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M ; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON ; RUN: opt -passes="print" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" define i32 @reduce_i64(i32 %arg) { ; V8M-LABEL: 'reduce_i64' ; V8M-NEXT: Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:13 CodeSize:14 Lat:13 SizeLat:13 for: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:35 CodeSize:38 Lat:35 SizeLat:35 for: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:79 CodeSize:86 Lat:79 SizeLat:79 for: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:167 CodeSize:182 Lat:167 SizeLat:167 for: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef ; ; NEON-LABEL: 'reduce_i64' ; NEON-NEXT: Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of 17 for: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of 31 for: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of 76 for: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of 178 for: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; MVE-LABEL: 'reduce_i64' ; MVE-NEXT: Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:110 CodeSize:59 Lat:110 SizeLat:110 for: %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:212 CodeSize:110 Lat:212 SizeLat:212 for: %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:416 CodeSize:212 Lat:416 SizeLat:416 for: %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:824 CodeSize:416 Lat:824 SizeLat:824 for: %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %V1 = call i64 @llvm.vector.reduce.umax.v1i64(<1 x i64> undef) %V2 = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> undef) %V4 = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> undef) %V8 = call i64 @llvm.vector.reduce.umax.v8i64(<8 x i64> undef) %V16 = call i64 @llvm.vector.reduce.umax.v16i64(<16 x i64> undef) ret i32 undef } define i32 @reduce_i32(i32 %arg) { ; V8M-LABEL: 'reduce_i32' ; V8M-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef ; ; NEON-LABEL: 'reduce_i32' ; NEON-NEXT: Cost Model: Found costs of 16 for: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; MVE-LABEL: 'reduce_i32' ; MVE-NEXT: Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %V2 = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> undef) %V4 = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> undef) %V8 = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> undef) %V16 = call i32 @llvm.vector.reduce.umax.v16i32(<16 x i32> undef) %V32 = call i32 @llvm.vector.reduce.umax.v32i32(<32 x i32> undef) ret i32 undef } define i32 @reduce_i16(i32 %arg) { ; V8M-LABEL: 'reduce_i16' ; V8M-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef ; ; NEON-LABEL: 'reduce_i16' ; NEON-NEXT: Cost Model: Found costs of 16 for: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of 53 for: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; MVE-LABEL: 'reduce_i16' ; MVE-NEXT: Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:24 CodeSize:12 Lat:24 SizeLat:24 for: %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:48 CodeSize:24 Lat:48 SizeLat:48 for: %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %V2 = call i16 @llvm.vector.reduce.umax.v2i16(<2 x i16> undef) %V4 = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> undef) %V8 = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> undef) %V16 = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> undef) %V32 = call i16 @llvm.vector.reduce.umax.v32i16(<32 x i16> undef) %V64 = call i16 @llvm.vector.reduce.umax.v64i16(<64 x i16> undef) ret i32 undef } define i32 @reduce_i8(i32 %arg) { ; V8M-LABEL: 'reduce_i8' ; V8M-NEXT: Cost Model: Found costs of RThru:7 CodeSize:8 Lat:7 SizeLat:7 for: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:19 CodeSize:22 Lat:19 SizeLat:19 for: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:43 CodeSize:50 Lat:43 SizeLat:43 for: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:91 CodeSize:106 Lat:91 SizeLat:91 for: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:187 CodeSize:218 Lat:187 SizeLat:187 for: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:379 CodeSize:442 Lat:379 SizeLat:379 for: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of RThru:763 CodeSize:890 Lat:763 SizeLat:763 for: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef ; ; NEON-LABEL: 'reduce_i8' ; NEON-NEXT: Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; ; MVE-LABEL: 'reduce_i8' ; MVE-NEXT: Cost Model: Found costs of RThru:58 CodeSize:31 Lat:58 SizeLat:58 for: %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:6 CodeSize:3 Lat:6 SizeLat:6 for: %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:32 CodeSize:16 Lat:32 SizeLat:32 for: %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:64 CodeSize:32 Lat:64 SizeLat:64 for: %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef ; %V2 = call i8 @llvm.vector.reduce.umax.v2i8(<2 x i8> undef) %V4 = call i8 @llvm.vector.reduce.umax.v4i8(<4 x i8> undef) %V8 = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> undef) %V16 = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> undef) %V32 = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> undef) %V64 = call i8 @llvm.vector.reduce.umax.v64i8(<64 x i8> undef) %V128 = call i8 @llvm.vector.reduce.umax.v128i8(<128 x i8> undef) ret i32 undef }