diff options
Diffstat (limited to 'llvm/test')
3 files changed, 208 insertions, 351 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index d4cc154..52ca22b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -1,38 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ -; RUN: -mtriple=arm64-eabi -aarch64-neon-syntax=apple \ -; RUN: | FileCheck %s --check-prefixes=GISEL,FALLBACK +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -fast-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FI +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for test_vcvt_bf16_f64 -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32) define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_f64_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl v0.2d, v0.2s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_f64_f32: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl v0.2d, v0.2s -; GISEL-NEXT: ret %vcvt1.i = fpext <2 x float> %x to <2 x double> ret <2 x double> %vcvt1.i } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32) define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_high_f64_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_f64_f32: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3> %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double> ret <2 x double> %vcvt1.i @@ -43,11 +29,6 @@ define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind r ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %bc1 = bitcast <4 x float> %x to <2 x double> %ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1> %bc2 = bitcast <1 x double> %ext to <2 x float> @@ -60,11 +41,6 @@ define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind rea ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1> %bc2 = bitcast <1 x i64> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -76,11 +52,6 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %bc2 = bitcast <2 x i32> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -92,11 +63,6 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %bc2 = bitcast <4 x i16> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -108,11 +74,6 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %bc2 = bitcast <8 x i8> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -124,11 +85,6 @@ define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1> %bc2 = bitcast <1 x i64> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> @@ -140,11 +96,6 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %bc2 = bitcast <2 x i32> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> @@ -156,11 +107,6 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %bc2 = bitcast <4 x i16> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> @@ -172,134 +118,118 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %bc2 = bitcast <8 x i8> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> ret <4 x float> %r } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64) define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_f32_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtn v0.2s, v0.2d -; GISEL-NEXT: ret %vcvt1.i = fptrunc <2 x double> %v to <2 x float> ret <2 x float> %vcvt1.i } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_bf16_f64) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_bf16_f64) define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_bf16_f64: -; GENERIC: // %bb.0: -; GENERIC-NEXT: fcvtxn v0.2s, v0.2d -; GENERIC-NEXT: movi.4s v1, #1 -; GENERIC-NEXT: movi.4s v2, #127, msl #8 -; GENERIC-NEXT: ushr.4s v3, v0, #16 -; GENERIC-NEXT: add.4s v2, v0, v2 -; GENERIC-NEXT: and.16b v1, v3, v1 -; GENERIC-NEXT: fcmeq.4s v3, v0, v0 -; GENERIC-NEXT: orr.4s v0, #64, lsl #16 -; GENERIC-NEXT: add.4s v1, v1, v2 -; GENERIC-NEXT: bit.16b v0, v1, v3 -; GENERIC-NEXT: shrn.4h v0, v0, #16 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvt_bf16_f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-SD-NEXT: movi.4s v1, #1 +; CHECK-SD-NEXT: movi.4s v2, #127, msl #8 +; CHECK-SD-NEXT: ushr.4s v3, v0, #16 +; CHECK-SD-NEXT: add.4s v2, v0, v2 +; CHECK-SD-NEXT: and.16b v1, v3, v1 +; CHECK-SD-NEXT: fcmeq.4s v3, v0, v0 +; CHECK-SD-NEXT: orr.4s v0, #64, lsl #16 +; CHECK-SD-NEXT: add.4s v1, v1, v2 +; CHECK-SD-NEXT: bit.16b v0, v1, v3 +; CHECK-SD-NEXT: shrn.4h v0, v0, #16 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvt_bf16_f64: -; FAST: // %bb.0: -; FAST-NEXT: fcvtxn v1.2s, v0.2d -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d1 -; FAST-NEXT: ushr.4s v1, v0, #16 -; FAST-NEXT: movi.4s v2, #1 -; FAST-NEXT: and.16b v1, v1, v2 -; FAST-NEXT: add.4s v1, v1, v0 -; FAST-NEXT: movi.4s v2, #127, msl #8 -; FAST-NEXT: add.4s v1, v1, v2 -; FAST-NEXT: mov.16b v2, v0 -; FAST-NEXT: orr.4s v2, #64, lsl #16 -; FAST-NEXT: fcmeq.4s v0, v0, v0 -; FAST-NEXT: bsl.16b v0, v1, v2 -; FAST-NEXT: shrn.4h v0, v0, #16 -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvt_bf16_f64: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fcvtxn v1.2s, v0.2d +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d1 +; CHECK-FI-NEXT: ushr.4s v1, v0, #16 +; CHECK-FI-NEXT: movi.4s v2, #1 +; CHECK-FI-NEXT: and.16b v1, v1, v2 +; CHECK-FI-NEXT: add.4s v1, v1, v0 +; CHECK-FI-NEXT: movi.4s v2, #127, msl #8 +; CHECK-FI-NEXT: add.4s v1, v1, v2 +; CHECK-FI-NEXT: mov.16b v2, v0 +; CHECK-FI-NEXT: orr.4s v2, #64, lsl #16 +; CHECK-FI-NEXT: fcmeq.4s v0, v0, v0 +; CHECK-FI-NEXT: bsl.16b v0, v1, v2 +; CHECK-FI-NEXT: shrn.4h v0, v0, #16 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvt_bf16_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtxn v0.2s, v0.2d -; GISEL-NEXT: movi.4s v1, #1 -; GISEL-NEXT: movi.4s v2, #127, msl #8 -; GISEL-NEXT: ushr.4s v3, v0, #16 -; GISEL-NEXT: add.4s v2, v0, v2 -; GISEL-NEXT: and.16b v1, v3, v1 -; GISEL-NEXT: fcmeq.4s v3, v0, v0 -; GISEL-NEXT: orr.4s v0, #64, lsl #16 -; GISEL-NEXT: add.4s v1, v1, v2 -; GISEL-NEXT: bit.16b v0, v1, v3 -; GISEL-NEXT: shrn.4h v0, v0, #16 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvt_bf16_f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-GI-NEXT: movi.4s v1, #1 +; CHECK-GI-NEXT: movi.4s v2, #127, msl #8 +; CHECK-GI-NEXT: ushr.4s v3, v0, #16 +; CHECK-GI-NEXT: add.4s v2, v0, v2 +; CHECK-GI-NEXT: and.16b v1, v3, v1 +; CHECK-GI-NEXT: fcmeq.4s v3, v0, v0 +; CHECK-GI-NEXT: orr.4s v0, #64, lsl #16 +; CHECK-GI-NEXT: add.4s v1, v1, v2 +; CHECK-GI-NEXT: bit.16b v0, v1, v3 +; CHECK-GI-NEXT: shrn.4h v0, v0, #16 +; CHECK-GI-NEXT: ret %vcvt1.i = fptrunc <2 x double> %v to <2 x bfloat> ret <2 x bfloat> %vcvt1.i } define half @test_vcvt_f16_f32(<1 x float> %x) { -; GENERIC-LABEL: test_vcvt_f16_f32: -; GENERIC: // %bb.0: -; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 -; GENERIC-NEXT: fcvt h0, s0 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvt_f16_f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fcvt h0, s0 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvt_f16_f32: -; FAST: // %bb.0: -; FAST-NEXT: fmov d1, d0 -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d1 -; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0 -; FAST-NEXT: fcvt h0, s0 -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvt_f16_f32: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov d1, d0 +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d1 +; CHECK-FI-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FI-NEXT: fcvt h0, s0 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvt_f16_f32: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvt_f16_f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvt h0, s0 +; CHECK-GI-NEXT: ret %tmp = fptrunc <1 x float> %x to <1 x half> %elt = extractelement <1 x half> %tmp, i32 0 ret half %elt } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64) define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_f32_f64: -; GENERIC: // %bb.0: -; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 -; GENERIC-NEXT: fcvtn2 v0.4s, v1.2d -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvt_high_f32_f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvt_high_f32_f64: -; FAST: // %bb.0: -; FAST-NEXT: fmov d2, d0 -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d2 -; FAST-NEXT: fcvtn2 v0.4s, v1.2d -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvt_high_f32_f64: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov d2, d0 +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d2 +; CHECK-FI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvt_high_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: fcvtn2 v0.4s, v1.2d -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvt_high_f32_f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-GI-NEXT: ret %cvt = fptrunc <2 x double> %v to <2 x float> %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x float> %vcvt2.i @@ -310,99 +240,80 @@ define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtxn v0.2s, v0.2d ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvtx_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtxn v0.2s, v0.2d -; GISEL-NEXT: ret %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind ret <2 x float> %vcvtx1.i } define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvtx_high_f32_f64: -; GENERIC: // %bb.0: -; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 -; GENERIC-NEXT: fcvtxn2 v0.4s, v1.2d -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvtx_high_f32_f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvtx_high_f32_f64: -; FAST: // %bb.0: -; FAST-NEXT: fmov d2, d0 -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d2 -; FAST-NEXT: fcvtxn2 v0.4s, v1.2d -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvtx_high_f32_f64: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov d2, d0 +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d2 +; CHECK-FI-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvtx_high_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: fcvtxn2 v0.4s, v1.2d -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvtx_high_f32_f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-GI-NEXT: ret %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x float> %res } - -declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone -declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone - -declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone -declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone - -declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone - define i16 @to_half(float %in) { -; GENERIC-LABEL: to_half: -; GENERIC: // %bb.0: -; GENERIC-NEXT: fcvt h0, s0 -; GENERIC-NEXT: fmov w0, s0 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: to_half: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvt h0, s0 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: to_half: -; FAST: // %bb.0: -; FAST-NEXT: fcvt h1, s0 -; FAST-NEXT: // implicit-def: $w0 -; FAST-NEXT: fmov s0, w0 -; FAST-NEXT: fmov s0, s1 -; FAST-NEXT: fmov w0, s0 -; FAST-NEXT: // kill: def $w1 killed $w0 -; FAST-NEXT: ret +; CHECK-FI-LABEL: to_half: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fcvt h1, s0 +; CHECK-FI-NEXT: // implicit-def: $w0 +; CHECK-FI-NEXT: fmov s0, w0 +; CHECK-FI-NEXT: fmov s0, s1 +; CHECK-FI-NEXT: fmov w0, s0 +; CHECK-FI-NEXT: // kill: def $w1 killed $w0 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: to_half: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: fmov w0, s0 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: to_half: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvt h0, s0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %res = call i16 @llvm.convert.to.fp16.f32(float %in) ret i16 %res } define float @from_half(i16 %in) { -; GENERIC-LABEL: from_half: -; GENERIC: // %bb.0: -; GENERIC-NEXT: fmov s0, w0 -; GENERIC-NEXT: fcvt s0, h0 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: from_half: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: fcvt s0, h0 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: from_half: -; FAST: // %bb.0: -; FAST-NEXT: fmov s0, w0 -; FAST-NEXT: // kill: def $h0 killed $h0 killed $s0 -; FAST-NEXT: fcvt s0, h0 -; FAST-NEXT: ret +; CHECK-FI-LABEL: from_half: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov s0, w0 +; CHECK-FI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-FI-NEXT: fcvt s0, h0 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: from_half: -; GISEL: // %bb.0: -; GISEL-NEXT: fmov s0, w0 -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: from_half: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: ret %res = call float @llvm.convert.from.fp16.f32(i16 %in) ret float %res } - -declare float @llvm.convert.from.fp16.f32(i16) #1 -declare i16 @llvm.convert.to.fp16.f32(float) #1 -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; FALLBACK: {{.*}} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll index 5ae0839..3dfa6df 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll @@ -1361,132 +1361,6 @@ for.body: ; preds = %for.body.preheader, br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !loop !1 } -define i32 @red_extended_add_incomplete_chain(ptr %start, ptr %end, i32 %offset) { -; CHECK-NEON-LABEL: define i32 @red_extended_add_incomplete_chain( -; CHECK-NEON-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEON-NEXT: entry: -; CHECK-NEON-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 -; CHECK-NEON-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-NEON-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 -; CHECK-NEON-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] -; CHECK-NEON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 -; CHECK-NEON-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK-NEON: vector.ph: -; CHECK-NEON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16 -; CHECK-NEON-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-NEON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] -; CHECK-NEON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[OFFSET]], i64 0 -; CHECK-NEON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEON-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK-NEON: vector.body: -; CHECK-NEON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEON-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] -; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 -; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> -; CHECK-NEON-NEXT: [[PARTIAL_REDUCE:%.*]] = add <16 x i32> [[VEC_PHI]], [[TMP3]] -; CHECK-NEON-NEXT: [[TMP4]] = add <16 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] -; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEON-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-NEON-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] -; CHECK-NEON: middle.block: -; CHECK-NEON-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP4]]) -; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] -; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK-NEON: scalar.ph: -; -; CHECK-SVE-LABEL: define i32 @red_extended_add_incomplete_chain( -; CHECK-SVE-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-SVE-NEXT: entry: -; CHECK-SVE-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 -; CHECK-SVE-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-SVE-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 -; CHECK-SVE-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] -; CHECK-SVE-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 2 -; CHECK-SVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] -; CHECK-SVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK-SVE: vector.ph: -; CHECK-SVE-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 4 -; CHECK-SVE-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] -; CHECK-SVE-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-SVE-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] -; CHECK-SVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OFFSET]], i64 0 -; CHECK-SVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -; CHECK-SVE-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK-SVE: vector.body: -; CHECK-SVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-SVE-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] -; CHECK-SVE-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] -; CHECK-SVE-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[NEXT_GEP]], align 1 -; CHECK-SVE-NEXT: [[TMP7:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32> -; CHECK-SVE-NEXT: [[TMP8:%.*]] = add <vscale x 4 x i32> [[VEC_PHI]], [[TMP7]] -; CHECK-SVE-NEXT: [[TMP9]] = add <vscale x 4 x i32> [[TMP8]], [[BROADCAST_SPLAT]] -; CHECK-SVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-SVE-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-SVE-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] -; CHECK-SVE: middle.block: -; CHECK-SVE-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP9]]) -; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] -; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK-SVE: scalar.ph: -; -; CHECK-SVE-MAXBW-LABEL: define i32 @red_extended_add_incomplete_chain( -; CHECK-SVE-MAXBW-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-SVE-MAXBW-NEXT: entry: -; CHECK-SVE-MAXBW-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 -; CHECK-SVE-MAXBW-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 -; CHECK-SVE-MAXBW-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 -; CHECK-SVE-MAXBW-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] -; CHECK-SVE-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 3 -; CHECK-SVE-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], [[TMP3]] -; CHECK-SVE-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] -; CHECK-SVE-MAXBW: vector.ph: -; CHECK-SVE-MAXBW-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-SVE-MAXBW-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 -; CHECK-SVE-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5]] -; CHECK-SVE-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] -; CHECK-SVE-MAXBW-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] -; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i32> poison, i32 [[OFFSET]], i64 0 -; CHECK-SVE-MAXBW-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer -; CHECK-SVE-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK-SVE-MAXBW: vector.body: -; CHECK-SVE-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-SVE-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] -; CHECK-SVE-MAXBW-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] -; CHECK-SVE-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 8 x i8>, ptr [[NEXT_GEP]], align 1 -; CHECK-SVE-MAXBW-NEXT: [[TMP7:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i32> -; CHECK-SVE-MAXBW-NEXT: [[PARTIAL_REDUCE:%.*]] = add <vscale x 8 x i32> [[VEC_PHI]], [[TMP7]] -; CHECK-SVE-MAXBW-NEXT: [[TMP8]] = add <vscale x 8 x i32> [[PARTIAL_REDUCE]], [[BROADCAST_SPLAT]] -; CHECK-SVE-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] -; CHECK-SVE-MAXBW-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-SVE-MAXBW-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] -; CHECK-SVE-MAXBW: middle.block: -; CHECK-SVE-MAXBW-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.add.nxv8i32(<vscale x 8 x i32> [[TMP8]]) -; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] -; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] -; CHECK-SVE-MAXBW: scalar.ph: -; -entry: - br label %loop - -loop: - %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] - %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] - %l = load i8, ptr %ptr.iv, align 1 - %l.ext = zext i8 %l to i32 - %add = add i32 %red, %l.ext - %red.next = add i32 %add, %offset - %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 - %ec = icmp eq ptr %ptr.iv, %end - br i1 %ec, label %exit, label %loop - -exit: - ret i32 %red.next -} - attributes #0 = { vscale_range(1,16) } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll new file mode 100644 index 0000000..d80178fd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-incomplete-chains.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt --mattr=+neon,+dotprod -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S %s | FileCheck %s --check-prefixes=CHECK-NEON + +target triple = "arm64-apple-macosx" + +define i32 @red_extended_add_incomplete_chain(ptr %start, ptr %end, i32 %offset) { +; CHECK-NEON-LABEL: define i32 @red_extended_add_incomplete_chain( +; CHECK-NEON-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i32 [[OFFSET:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEON-NEXT: [[ENTRY:.*]]: +; CHECK-NEON-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 +; CHECK-NEON-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 +; CHECK-NEON-NEXT: [[TMP0:%.*]] = add i64 [[END1]], 1 +; CHECK-NEON-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] +; CHECK-NEON-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 16 +; CHECK-NEON-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NEON: [[VECTOR_PH]]: +; CHECK-NEON-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 16 +; CHECK-NEON-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEON-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]] +; CHECK-NEON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[OFFSET]], i64 0 +; CHECK-NEON-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEON-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NEON: [[VECTOR_BODY]]: +; CHECK-NEON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEON-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEON-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX]] +; CHECK-NEON-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; CHECK-NEON-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-NEON-NEXT: [[TMP4:%.*]] = add <16 x i32> [[VEC_PHI]], [[TMP3]] +; CHECK-NEON-NEXT: [[TMP5]] = add <16 x i32> [[TMP4]], [[BROADCAST_SPLAT]] +; CHECK-NEON-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEON-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEON-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NEON: [[MIDDLE_BLOCK]]: +; CHECK-NEON-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP5]]) +; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] +; CHECK-NEON-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NEON: [[SCALAR_PH]]: +; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ] +; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEON-NEXT: br label %[[LOOP:.*]] +; CHECK-NEON: [[LOOP]]: +; CHECK-NEON-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[GEP_IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEON-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEON-NEXT: [[L:%.*]] = load i8, ptr [[PTR_IV]], align 1 +; CHECK-NEON-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-NEON-NEXT: [[ADD:%.*]] = add i32 [[RED]], [[L_EXT]] +; CHECK-NEON-NEXT: [[RED_NEXT]] = add i32 [[ADD]], [[OFFSET]] +; CHECK-NEON-NEXT: [[GEP_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 +; CHECK-NEON-NEXT: [[EC:%.*]] = icmp eq ptr [[PTR_IV]], [[END]] +; CHECK-NEON-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NEON: [[EXIT]]: +; CHECK-NEON-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i32 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEON-NEXT: ret i32 [[RED_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %ptr.iv = phi ptr [ %start, %entry ], [ %gep.iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %red.next, %loop ] + %l = load i8, ptr %ptr.iv, align 1 + %l.ext = zext i8 %l to i32 + %add = add i32 %red, %l.ext + %red.next = add i32 %add, %offset + %gep.iv.next = getelementptr i8, ptr %ptr.iv, i64 1 + %ec = icmp eq ptr %ptr.iv, %end + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %red.next +} |