diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll | 361 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir | 4 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/fcmove.ll | 15 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/isel-select-fcmov.ll | 175 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll | 46 |
6 files changed, 361 insertions, 244 deletions
diff --git a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll index d4cc154..52ca22b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -1,38 +1,24 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,GENERIC -; RUN: llc < %s -O0 -fast-isel -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,FAST -; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* \ -; RUN: -mtriple=arm64-eabi -aarch64-neon-syntax=apple \ -; RUN: | FileCheck %s --check-prefixes=GISEL,FALLBACK +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -O0 -fast-isel | FileCheck %s --check-prefixes=CHECK,CHECK-FI +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI + +; CHECK-GI: warning: Instruction selection used fallback path for test_vcvt_bf16_f64 -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f64_f32) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f64_f32) define <2 x double> @test_vcvt_f64_f32(<2 x float> %x) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_f64_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl v0.2d, v0.2s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_f64_f32: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl v0.2d, v0.2s -; GISEL-NEXT: ret %vcvt1.i = fpext <2 x float> %x to <2 x double> ret <2 x double> %vcvt1.i } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f64_f32) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f64_f32) define <2 x double> @test_vcvt_high_f64_f32(<4 x float> %x) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_high_f64_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_f64_f32: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %cvt_in = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> <i32 2, i32 3> %vcvt1.i = fpext <2 x float> %cvt_in to <2 x double> ret <2 x double> %vcvt1.i @@ -43,11 +29,6 @@ define <2 x double> @test_vcvt_high_v1f64_f32_bitcast(<4 x float> %x) nounwind r ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v1f64_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %bc1 = bitcast <4 x float> %x to <2 x double> %ext = shufflevector <2 x double> %bc1, <2 x double> undef, <1 x i32> <i32 1> %bc2 = bitcast <1 x double> %ext to <2 x float> @@ -60,11 +41,6 @@ define <2 x double> @test_vcvt_high_v1i64_f32_bitcast(<2 x i64> %x) nounwind rea ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v1i64_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1> %bc2 = bitcast <1 x i64> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -76,11 +52,6 @@ define <2 x double> @test_vcvt_high_v2i32_f32_bitcast(<4 x i32> %x) nounwind rea ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v2i32_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %bc2 = bitcast <2 x i32> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -92,11 +63,6 @@ define <2 x double> @test_vcvt_high_v4i16_f32_bitcast(<8 x i16> %x) nounwind rea ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v4i16_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %bc2 = bitcast <4 x i16> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -108,11 +74,6 @@ define <2 x double> @test_vcvt_high_v8i8_f32_bitcast(<16 x i8> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.2d, v0.4s ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v8i8_f32_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.2d, v0.4s -; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %bc2 = bitcast <8 x i8> %ext to <2 x float> %r = fpext <2 x float> %bc2 to <2 x double> @@ -124,11 +85,6 @@ define <4 x float> @test_vcvt_high_v1i64_f16_bitcast(<2 x i64> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v1i64_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <2 x i64> %x, <2 x i64> undef, <1 x i32> <i32 1> %bc2 = bitcast <1 x i64> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> @@ -140,11 +96,6 @@ define <4 x float> @test_vcvt_high_v2i32_f16_bitcast(<4 x i32> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v2i32_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <4 x i32> %x, <4 x i32> undef, <2 x i32> <i32 2, i32 3> %bc2 = bitcast <2 x i32> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> @@ -156,11 +107,6 @@ define <4 x float> @test_vcvt_high_v4i16_f16_bitcast(<8 x i16> %x) nounwind read ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v4i16_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <8 x i16> %x, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7> %bc2 = bitcast <4 x i16> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> @@ -172,134 +118,118 @@ define <4 x float> @test_vcvt_high_v8i8_f16_bitcast(<16 x i8> %x) nounwind readn ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_high_v8i8_f16_bitcast: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtl2 v0.4s, v0.8h -; GISEL-NEXT: ret %ext = shufflevector <16 x i8> %x, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> %bc2 = bitcast <8 x i8> %ext to <4 x half> %r = fpext <4 x half> %bc2 to <4 x float> ret <4 x float> %r } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_f32_f64) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_f32_f64) define <2 x float> @test_vcvt_f32_f64(<2 x double> %v) nounwind readnone ssp { ; CHECK-LABEL: test_vcvt_f32_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvt_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtn v0.2s, v0.2d -; GISEL-NEXT: ret %vcvt1.i = fptrunc <2 x double> %v to <2 x float> ret <2 x float> %vcvt1.i } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_bf16_f64) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_bf16_f64) define <2 x bfloat> @test_vcvt_bf16_f64(<2 x double> %v) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_bf16_f64: -; GENERIC: // %bb.0: -; GENERIC-NEXT: fcvtxn v0.2s, v0.2d -; GENERIC-NEXT: movi.4s v1, #1 -; GENERIC-NEXT: movi.4s v2, #127, msl #8 -; GENERIC-NEXT: ushr.4s v3, v0, #16 -; GENERIC-NEXT: add.4s v2, v0, v2 -; GENERIC-NEXT: and.16b v1, v3, v1 -; GENERIC-NEXT: fcmeq.4s v3, v0, v0 -; GENERIC-NEXT: orr.4s v0, #64, lsl #16 -; GENERIC-NEXT: add.4s v1, v1, v2 -; GENERIC-NEXT: bit.16b v0, v1, v3 -; GENERIC-NEXT: shrn.4h v0, v0, #16 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvt_bf16_f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-SD-NEXT: movi.4s v1, #1 +; CHECK-SD-NEXT: movi.4s v2, #127, msl #8 +; CHECK-SD-NEXT: ushr.4s v3, v0, #16 +; CHECK-SD-NEXT: add.4s v2, v0, v2 +; CHECK-SD-NEXT: and.16b v1, v3, v1 +; CHECK-SD-NEXT: fcmeq.4s v3, v0, v0 +; CHECK-SD-NEXT: orr.4s v0, #64, lsl #16 +; CHECK-SD-NEXT: add.4s v1, v1, v2 +; CHECK-SD-NEXT: bit.16b v0, v1, v3 +; CHECK-SD-NEXT: shrn.4h v0, v0, #16 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvt_bf16_f64: -; FAST: // %bb.0: -; FAST-NEXT: fcvtxn v1.2s, v0.2d -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d1 -; FAST-NEXT: ushr.4s v1, v0, #16 -; FAST-NEXT: movi.4s v2, #1 -; FAST-NEXT: and.16b v1, v1, v2 -; FAST-NEXT: add.4s v1, v1, v0 -; FAST-NEXT: movi.4s v2, #127, msl #8 -; FAST-NEXT: add.4s v1, v1, v2 -; FAST-NEXT: mov.16b v2, v0 -; FAST-NEXT: orr.4s v2, #64, lsl #16 -; FAST-NEXT: fcmeq.4s v0, v0, v0 -; FAST-NEXT: bsl.16b v0, v1, v2 -; FAST-NEXT: shrn.4h v0, v0, #16 -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvt_bf16_f64: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fcvtxn v1.2s, v0.2d +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d1 +; CHECK-FI-NEXT: ushr.4s v1, v0, #16 +; CHECK-FI-NEXT: movi.4s v2, #1 +; CHECK-FI-NEXT: and.16b v1, v1, v2 +; CHECK-FI-NEXT: add.4s v1, v1, v0 +; CHECK-FI-NEXT: movi.4s v2, #127, msl #8 +; CHECK-FI-NEXT: add.4s v1, v1, v2 +; CHECK-FI-NEXT: mov.16b v2, v0 +; CHECK-FI-NEXT: orr.4s v2, #64, lsl #16 +; CHECK-FI-NEXT: fcmeq.4s v0, v0, v0 +; CHECK-FI-NEXT: bsl.16b v0, v1, v2 +; CHECK-FI-NEXT: shrn.4h v0, v0, #16 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvt_bf16_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtxn v0.2s, v0.2d -; GISEL-NEXT: movi.4s v1, #1 -; GISEL-NEXT: movi.4s v2, #127, msl #8 -; GISEL-NEXT: ushr.4s v3, v0, #16 -; GISEL-NEXT: add.4s v2, v0, v2 -; GISEL-NEXT: and.16b v1, v3, v1 -; GISEL-NEXT: fcmeq.4s v3, v0, v0 -; GISEL-NEXT: orr.4s v0, #64, lsl #16 -; GISEL-NEXT: add.4s v1, v1, v2 -; GISEL-NEXT: bit.16b v0, v1, v3 -; GISEL-NEXT: shrn.4h v0, v0, #16 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvt_bf16_f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvtxn v0.2s, v0.2d +; CHECK-GI-NEXT: movi.4s v1, #1 +; CHECK-GI-NEXT: movi.4s v2, #127, msl #8 +; CHECK-GI-NEXT: ushr.4s v3, v0, #16 +; CHECK-GI-NEXT: add.4s v2, v0, v2 +; CHECK-GI-NEXT: and.16b v1, v3, v1 +; CHECK-GI-NEXT: fcmeq.4s v3, v0, v0 +; CHECK-GI-NEXT: orr.4s v0, #64, lsl #16 +; CHECK-GI-NEXT: add.4s v1, v1, v2 +; CHECK-GI-NEXT: bit.16b v0, v1, v3 +; CHECK-GI-NEXT: shrn.4h v0, v0, #16 +; CHECK-GI-NEXT: ret %vcvt1.i = fptrunc <2 x double> %v to <2 x bfloat> ret <2 x bfloat> %vcvt1.i } define half @test_vcvt_f16_f32(<1 x float> %x) { -; GENERIC-LABEL: test_vcvt_f16_f32: -; GENERIC: // %bb.0: -; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 -; GENERIC-NEXT: fcvt h0, s0 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvt_f16_f32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fcvt h0, s0 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvt_f16_f32: -; FAST: // %bb.0: -; FAST-NEXT: fmov d1, d0 -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d1 -; FAST-NEXT: // kill: def $s0 killed $s0 killed $q0 -; FAST-NEXT: fcvt h0, s0 -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvt_f16_f32: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov d1, d0 +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d1 +; CHECK-FI-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-FI-NEXT: fcvt h0, s0 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvt_f16_f32: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvt_f16_f32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvt h0, s0 +; CHECK-GI-NEXT: ret %tmp = fptrunc <1 x float> %x to <1 x half> %elt = extractelement <1 x half> %tmp, i32 0 ret half %elt } -; FALLBACK-NOT: remark{{.*}}G_FPEXT{{.*}}(in function: test_vcvt_high_f32_f64) -; FALLBACK-NOT: remark{{.*}}fpext{{.*}}(in function: test_vcvt_high_f32_f64) define <4 x float> @test_vcvt_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvt_high_f32_f64: -; GENERIC: // %bb.0: -; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 -; GENERIC-NEXT: fcvtn2 v0.4s, v1.2d -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvt_high_f32_f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvt_high_f32_f64: -; FAST: // %bb.0: -; FAST-NEXT: fmov d2, d0 -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d2 -; FAST-NEXT: fcvtn2 v0.4s, v1.2d -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvt_high_f32_f64: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov d2, d0 +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d2 +; CHECK-FI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvt_high_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: fcvtn2 v0.4s, v1.2d -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvt_high_f32_f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-GI-NEXT: ret %cvt = fptrunc <2 x double> %v to <2 x float> %vcvt2.i = shufflevector <2 x float> %x, <2 x float> %cvt, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x float> %vcvt2.i @@ -310,99 +240,80 @@ define <2 x float> @test_vcvtx_f32_f64(<2 x double> %v) nounwind readnone ssp { ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtxn v0.2s, v0.2d ; CHECK-NEXT: ret -; -; GISEL-LABEL: test_vcvtx_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvtxn v0.2s, v0.2d -; GISEL-NEXT: ret %vcvtx1.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind ret <2 x float> %vcvtx1.i } define <4 x float> @test_vcvtx_high_f32_f64(<2 x float> %x, <2 x double> %v) nounwind readnone ssp { -; GENERIC-LABEL: test_vcvtx_high_f32_f64: -; GENERIC: // %bb.0: -; GENERIC-NEXT: // kill: def $d0 killed $d0 def $q0 -; GENERIC-NEXT: fcvtxn2 v0.4s, v1.2d -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: test_vcvtx_high_f32_f64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-SD-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: test_vcvtx_high_f32_f64: -; FAST: // %bb.0: -; FAST-NEXT: fmov d2, d0 -; FAST-NEXT: // implicit-def: $q0 -; FAST-NEXT: fmov d0, d2 -; FAST-NEXT: fcvtxn2 v0.4s, v1.2d -; FAST-NEXT: ret +; CHECK-FI-LABEL: test_vcvtx_high_f32_f64: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov d2, d0 +; CHECK-FI-NEXT: // implicit-def: $q0 +; CHECK-FI-NEXT: fmov d0, d2 +; CHECK-FI-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: test_vcvtx_high_f32_f64: -; GISEL: // %bb.0: -; GISEL-NEXT: // kill: def $d0 killed $d0 def $q0 -; GISEL-NEXT: fcvtxn2 v0.4s, v1.2d -; GISEL-NEXT: ret +; CHECK-GI-LABEL: test_vcvtx_high_f32_f64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-GI-NEXT: fcvtxn2 v0.4s, v1.2d +; CHECK-GI-NEXT: ret %vcvtx2.i = tail call <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double> %v) nounwind %res = shufflevector <2 x float> %x, <2 x float> %vcvtx2.i, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ret <4 x float> %res } - -declare <2 x double> @llvm.aarch64.neon.vcvthighfp2df(<4 x float>) nounwind readnone -declare <2 x double> @llvm.aarch64.neon.vcvtfp2df(<2 x float>) nounwind readnone - -declare <2 x float> @llvm.aarch64.neon.vcvtdf2fp(<2 x double>) nounwind readnone -declare <4 x float> @llvm.aarch64.neon.vcvthighdf2fp(<2 x float>, <2 x double>) nounwind readnone - -declare <2 x float> @llvm.aarch64.neon.fcvtxn.v2f32.v2f64(<2 x double>) nounwind readnone - define i16 @to_half(float %in) { -; GENERIC-LABEL: to_half: -; GENERIC: // %bb.0: -; GENERIC-NEXT: fcvt h0, s0 -; GENERIC-NEXT: fmov w0, s0 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: to_half: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcvt h0, s0 +; CHECK-SD-NEXT: fmov w0, s0 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: to_half: -; FAST: // %bb.0: -; FAST-NEXT: fcvt h1, s0 -; FAST-NEXT: // implicit-def: $w0 -; FAST-NEXT: fmov s0, w0 -; FAST-NEXT: fmov s0, s1 -; FAST-NEXT: fmov w0, s0 -; FAST-NEXT: // kill: def $w1 killed $w0 -; FAST-NEXT: ret +; CHECK-FI-LABEL: to_half: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fcvt h1, s0 +; CHECK-FI-NEXT: // implicit-def: $w0 +; CHECK-FI-NEXT: fmov s0, w0 +; CHECK-FI-NEXT: fmov s0, s1 +; CHECK-FI-NEXT: fmov w0, s0 +; CHECK-FI-NEXT: // kill: def $w1 killed $w0 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: to_half: -; GISEL: // %bb.0: -; GISEL-NEXT: fcvt h0, s0 -; GISEL-NEXT: fmov w0, s0 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: to_half: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcvt h0, s0 +; CHECK-GI-NEXT: fmov w0, s0 +; CHECK-GI-NEXT: ret %res = call i16 @llvm.convert.to.fp16.f32(float %in) ret i16 %res } define float @from_half(i16 %in) { -; GENERIC-LABEL: from_half: -; GENERIC: // %bb.0: -; GENERIC-NEXT: fmov s0, w0 -; GENERIC-NEXT: fcvt s0, h0 -; GENERIC-NEXT: ret +; CHECK-SD-LABEL: from_half: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fmov s0, w0 +; CHECK-SD-NEXT: fcvt s0, h0 +; CHECK-SD-NEXT: ret ; -; FAST-LABEL: from_half: -; FAST: // %bb.0: -; FAST-NEXT: fmov s0, w0 -; FAST-NEXT: // kill: def $h0 killed $h0 killed $s0 -; FAST-NEXT: fcvt s0, h0 -; FAST-NEXT: ret +; CHECK-FI-LABEL: from_half: +; CHECK-FI: // %bb.0: +; CHECK-FI-NEXT: fmov s0, w0 +; CHECK-FI-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-FI-NEXT: fcvt s0, h0 +; CHECK-FI-NEXT: ret ; -; GISEL-LABEL: from_half: -; GISEL: // %bb.0: -; GISEL-NEXT: fmov s0, w0 -; GISEL-NEXT: fcvt s0, h0 -; GISEL-NEXT: ret +; CHECK-GI-LABEL: from_half: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fmov s0, w0 +; CHECK-GI-NEXT: fcvt s0, h0 +; CHECK-GI-NEXT: ret %res = call float @llvm.convert.from.fp16.f32(i16 %in) ret float %res } - -declare float @llvm.convert.from.fp16.f32(i16) #1 -declare i16 @llvm.convert.to.fp16.f32(float) #1 -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; FALLBACK: {{.*}} diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir index 470a30fd..bd4e9a4 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-leading-zeros.mir @@ -37,9 +37,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT1]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -111,9 +111,9 @@ body: | ; X86-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; X86-NEXT: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[CTLZ]], [[C1]] ; X86-NEXT: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s64) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ADD]](s64) ; X86-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[CTLZ_ZERO_UNDEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV2]], [[UV4]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV3]], [[UV5]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) diff --git a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir index a7cbb35..6ab424e 100644 --- a/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir +++ b/llvm/test/CodeGen/X86/GlobalISel/legalize-select.mir @@ -33,9 +33,9 @@ body: | ; X86: [[DEF:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF1:%[0-9]+]]:_(s64) = IMPLICIT_DEF ; X86-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](s64) ; X86-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](s64) - ; X86-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X86-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV]], [[UV2]] ; X86-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ZEXT]](s32), [[UV1]], [[UV3]] ; X86-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[SELECT1]](s32) @@ -115,9 +115,9 @@ body: | ; X64: [[DEF:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF1:%[0-9]+]]:_(s8) = IMPLICIT_DEF ; X64-NEXT: [[DEF2:%[0-9]+]]:_(s1) = IMPLICIT_DEF + ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF1]](s8) ; X64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[DEF]](s8) - ; X64-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[DEF2]](s1) ; X64-NEXT: [[SELECT:%[0-9]+]]:_(s16) = G_SELECT [[ZEXT]](s32), [[ANYEXT]], [[ANYEXT1]] ; X64-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[SELECT]](s16) ; X64-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) diff --git a/llvm/test/CodeGen/X86/fcmove.ll b/llvm/test/CodeGen/X86/fcmove.ll deleted file mode 100644 index 6bb0148..0000000 --- a/llvm/test/CodeGen/X86/fcmove.ll +++ /dev/null @@ -1,15 +0,0 @@ -; RUN: llc %s -o - -verify-machineinstrs | FileCheck %s - -target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-unknown" - -; Test that we can generate an fcmove, and also that it passes verification. - -; CHECK-LABEL: cmove_f -; CHECK: fcmove %st({{[0-7]}}), %st -define x86_fp80 @cmove_f(x86_fp80 %a, x86_fp80 %b, i32 %c) { - %test = icmp eq i32 %c, 0 - %add = fadd x86_fp80 %a, %b - %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b - ret x86_fp80 %ret -} diff --git a/llvm/test/CodeGen/X86/isel-select-fcmov.ll b/llvm/test/CodeGen/X86/isel-select-fcmov.ll new file mode 100644 index 0000000..cb441b8 --- /dev/null +++ b/llvm/test/CodeGen/X86/isel-select-fcmov.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X86-GISEL +; RUN: llc < %s -mtriple=i686-linux-gnu -mattr=+cmov -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=x86_64-linux-gnu -global-isel -global-isel-abort=1 | FileCheck %s --check-prefix=X64-GISEL +; RUN: llc < %s -mtriple=x86_64-linux-gnu -fast-isel=0 -global-isel=0 | FileCheck %s --check-prefix=X64 + +; Test that we can generate an fcmove, and also that it passes verification. + +define x86_fp80 @cmove_cmp(x86_fp80 %a, x86_fp80 %b, i32 %c) { +; X86-LABEL: cmove_cmp: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmove %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_cmp: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: xorl %eax, %eax +; X86-GISEL-NEXT: cmpl $0, {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_cmp: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: testl %edi, %edi +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmove %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_cmp: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: cmpl $0, %edi +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %test = icmp eq i32 %c, 0 + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} + +define x86_fp80 @cmove_arg(x86_fp80 %a, x86_fp80 %b, i1 %test) { +; X86-LABEL: cmove_arg: +; X86: # %bb.0: +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: testb $1, {{[0-9]+}}(%esp) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmovne %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_arg: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movl $1, %eax +; X86-GISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_arg: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: testb $1, %dil +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmovne %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_arg: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: andl $1, %edi +; X64-GISEL-NEXT: testl %edi, %edi +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} + +define x86_fp80 @cmove_load(x86_fp80 %a, x86_fp80 %b, ptr %p) { +; X86-LABEL: cmove_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: fadd %st(1), %st +; X86-NEXT: cmpb $0, (%eax) +; X86-NEXT: fxch %st(1) +; X86-NEXT: fcmovne %st(1), %st +; X86-NEXT: fstp %st(1) +; X86-NEXT: retl +; +; X86-GISEL-LABEL: cmove_load: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: fldt {{[0-9]+}}(%esp) +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: fadd %st, %st(1) +; X86-GISEL-NEXT: movzbl (%eax), %eax +; X86-GISEL-NEXT: andl $1, %eax +; X86-GISEL-NEXT: testl %eax, %eax +; X86-GISEL-NEXT: fxch %st(1) +; X86-GISEL-NEXT: fcmove %st(1), %st +; X86-GISEL-NEXT: fstp %st(1) +; X86-GISEL-NEXT: retl +; +; X64-LABEL: cmove_load: +; X64: # %bb.0: +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fadd %st(1), %st +; X64-NEXT: cmpb $0, (%rdi) +; X64-NEXT: fxch %st(1) +; X64-NEXT: fcmovne %st(1), %st +; X64-NEXT: fstp %st(1) +; X64-NEXT: retq +; +; X64-GISEL-LABEL: cmove_load: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-GISEL-NEXT: fadd %st, %st(1) +; X64-GISEL-NEXT: movzbl (%rdi), %eax +; X64-GISEL-NEXT: andl $1, %eax +; X64-GISEL-NEXT: testl %eax, %eax +; X64-GISEL-NEXT: fxch %st(1) +; X64-GISEL-NEXT: fcmove %st(1), %st +; X64-GISEL-NEXT: fstp %st(1) +; X64-GISEL-NEXT: retq + %test = load i1, ptr %p + %add = fadd x86_fp80 %a, %b + %ret = select i1 %test, x86_fp80 %add, x86_fp80 %b + ret x86_fp80 %ret +} diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 3279a50..7a08f3e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -624,6 +624,52 @@ define void @PR48908(<4 x double> %v0, <4 x double> %v1, <4 x double> %v2, ptr n ret void } +define i32 @PR164107(<16 x i1> %0) { +; AVX1-LABEL: PR164107: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX1-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 +; AVX1-NEXT: vmovd %xmm0, %eax +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX2-LABEL: PR164107: +; AVX2: # %bb.0: +; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 +; AVX2-NEXT: vpsllw $15, %xmm0, %xmm0 +; AVX2-NEXT: vpsraw $15, %xmm0, %xmm0 +; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vmovd %xmm0, %eax +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: PR164107: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 +; AVX512-NEXT: vpslld $31, %zmm0, %zmm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k1 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 +; AVX512-NEXT: vpbroadcastq %xmm0, %zmm0 +; AVX512-NEXT: vptestmq %zmm0, %zmm0, %k1 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 {%k1} {z} = -1 +; AVX512-NEXT: vextracti32x4 $3, %zmm0, %xmm0 +; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX512-NEXT: vmovd %xmm0, %eax +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: ret{{[l|q]}} + %cmp = shufflevector <16 x i1> %0, <16 x i1> zeroinitializer, <16 x i32> zeroinitializer + %sext = sext <16 x i1> %cmp to <16 x i64> + %bc.1 = bitcast <16 x i64> %sext to <64 x i16> + %vecinit15.i = shufflevector <64 x i16> %bc.1, <64 x i16> zeroinitializer, <16 x i32> <i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56> + %conv16.i = sext <16 x i16> %vecinit15.i to <16 x i64> + %bc.2 = bitcast <16 x i64> %conv16.i to <32 x i32> + %conv22.i = extractelement <32 x i32> %bc.2, i64 4 + ret i32 %conv22.i +} + define <4 x i64> @concat_self_v4i64(<2 x i64> %x) { ; AVX1-LABEL: concat_self_v4i64: ; AVX1: # %bb.0: |