diff options
Diffstat (limited to 'llvm/test')
103 files changed, 8396 insertions, 1304 deletions
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll index 609a23b..0976a10 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-intrinsics.ll @@ -40,50 +40,50 @@ declare <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x define void @vector_insert_extract_idxzero_128b() #1 { ; CHECK-VSCALE-1-LABEL: 'vector_insert_extract_idxzero_128b' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'vector_insert_extract_idxzero_128b' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_128b' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> undef, i64 0) - %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> undef, i64 0) - %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) - %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) - %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) - %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) - %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) - %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) - %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) + %insert_legal_fixed_into_scalable = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> poison, <4 x float> poison, i64 0) + %extract_legal_fixed_from_scalable = call <2 x double> @llvm.vector.extract.v2f64.nxv2f64(<vscale x 2 x double> poison, i64 0) + %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) + %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) + %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) + %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) + %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) + %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) + %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ret void } declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64) @@ -97,50 +97,50 @@ declare <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x define void @vector_insert_extract_idxzero_256b() #2 { ; CHECK-VSCALE-1-LABEL: 'vector_insert_extract_idxzero_256b' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'vector_insert_extract_idxzero_256b' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 0 for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:48 CodeSize:32 Lat:48 SizeLat:48 for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:12 CodeSize:6 Lat:12 SizeLat:12 for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_insert_extract_idxzero_256b' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.nxv2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> undef, <16 x i16> undef, i64 0) - %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nx4f32(<vscale x 4 x float> undef, i64 0) - %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> undef, <vscale x 2 x i1> undef, i64 0) - %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> undef, i64 0) - %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> undef, i64 0) - %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> undef, <2 x float> undef, i64 0) - %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> undef, i64 0) - %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> undef, <vscale x 2 x float> undef, i64 0) - %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> undef, i64 0) + %insert_legal_fixed_into_scalable = call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16> poison, <16 x i16> poison, i64 0) + %extract_legal_fixed_from_scalable = call <8 x float> @llvm.vector.extract.v8f32.nx4f32(<vscale x 4 x float> poison, i64 0) + %insert_nxv16i1_nxv2i1 = call <vscale x 16 x i1> @llvm.vector.insert.nxv16i1.v2i1(<vscale x 16 x i1> poison, <vscale x 2 x i1> poison, i64 0) + %extract_nxv4i1_nxv16i1 = call <vscale x 4 x i1> @llvm.vector.extract.nxv4i1.nxv16i1(<vscale x 16 x i1> poison, i64 0) + %extract_v8i1_nxv8i1 = call <8 x i1> @llvm.vector.extract.v8i1.nxv8i1(<vscale x 8 x i1> poison, i64 0) + %insert_v2f32_nxv2f32 = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v2f32(<vscale x 2 x float> poison, <2 x float> poison, i64 0) + %extract_v4f16_nxv4f16 = call <4 x half> @llvm.vector.extract.v4f16.nxv4f16(<vscale x 4 x half> poison, i64 0) + %insert_nxv2f32_nxv4f32 = call <vscale x 4 x float> @llvm.vector.insert.nxv4f32.nxv2f32(<vscale x 4 x float> poison, <vscale x 2 x float> poison, i64 0) + %extract_nxv4f32_nxv8f32 = call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> poison, i64 0) ret void } declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v16i16(<vscale x 8 x i16>, <16 x i16>, i64) @@ -148,157 +148,157 @@ declare <8 x float> @llvm.vector.extract.v8f32.nxv4f32(<vscale x 4 x float>, i64 define void @reductions(<vscale x 4 x i32> %v0, <vscale x 4 x i64> %v1, <vscale x 4 x float> %v2, <vscale x 4 x double> %v3) { ; CHECK-VSCALE-1-LABEL: 'reductions' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 3 for: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'reductions' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 3 for: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'reductions' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 3 for: %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.000000e+00, <vscale x 1 x float> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.000000e+00, <vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:3 CodeSize:3 Lat:5 SizeLat:3 for: %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.000000e+00, <vscale x 4 x double> %v3) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of Invalid for: %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> undef) + %add_nxv1i32 = call i32 @llvm.vector.reduce.add.nxv1i32(<vscale x 1 x i32> poison) %add_nxv4i32 = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> %v0) %add_nxv4i64 = call i64 @llvm.vector.reduce.add.nxv4i64(<vscale x 4 x i64> %v1) - %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> undef) + %mul_nxv1i32 = call i32 @llvm.vector.reduce.mul.nxv1i32(<vscale x 1 x i32> poison) %mul_nxv4i32 = call i32 @llvm.vector.reduce.mul.nxv4i32(<vscale x 4 x i32> %v0) %mul_nxv4i64 = call i64 @llvm.vector.reduce.mul.nxv4i64(<vscale x 4 x i64> %v1) - %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> undef) + %and_nxv1i32 = call i32 @llvm.vector.reduce.and.nxv1i32(<vscale x 1 x i32> poison) %and_nxv4i32 = call i32 @llvm.vector.reduce.and.nxv4i32(<vscale x 4 x i32> %v0) %and_nxv4i64 = call i64 @llvm.vector.reduce.and.nxv4i64(<vscale x 4 x i64> %v1) - %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> undef) + %or_nxv1i32 = call i32 @llvm.vector.reduce.or.nxv1i32(<vscale x 1 x i32> poison) %or_nxv4i32 = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> %v0) %or_nxv4i64 = call i64 @llvm.vector.reduce.or.nxv4i64(<vscale x 4 x i64> %v1) - %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> undef) + %xor_nxv1i32 = call i32 @llvm.vector.reduce.xor.nxv1i32(<vscale x 1 x i32> poison) %xor_nxv4i32 = call i32 @llvm.vector.reduce.xor.nxv4i32(<vscale x 4 x i32> %v0) %xor_nxv4i64 = call i64 @llvm.vector.reduce.xor.nxv4i64(<vscale x 4 x i64> %v1) - %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> undef) + %umin_nxv1i64 = call i64 @llvm.vector.reduce.umin.nxv1i64(<vscale x 1 x i64> poison) %umin_nxv4i32 = call i32 @llvm.vector.reduce.umin.nxv4i32(<vscale x 4 x i32> %v0) %umin_nxv4i64 = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %v1) - %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> undef) + %smin_nxv1i64 = call i64 @llvm.vector.reduce.smin.nxv1i64(<vscale x 1 x i64> poison) %smin_nxv4i32 = call i32 @llvm.vector.reduce.smin.nxv4i32(<vscale x 4 x i32> %v0) %smin_nxv4i64 = call i64 @llvm.vector.reduce.smin.nxv4i64(<vscale x 4 x i64> %v1) - %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> undef) + %umax_nxv1i64 = call i64 @llvm.vector.reduce.umax.nxv1i64(<vscale x 1 x i64> poison) %umax_nxv4i32 = call i32 @llvm.vector.reduce.umax.nxv4i32(<vscale x 4 x i32> %v0) %umax_nxv4i64 = call i64 @llvm.vector.reduce.umax.nxv4i64(<vscale x 4 x i64> %v1) - %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> undef) + %smax_nxv1i64 = call i64 @llvm.vector.reduce.smax.nxv1i64(<vscale x 1 x i64> poison) %smax_nxv4i32 = call i32 @llvm.vector.reduce.smax.nxv4i32(<vscale x 4 x i32> %v0) %smax_nxv4i64 = call i64 @llvm.vector.reduce.smax.nxv4i64(<vscale x 4 x i64> %v1) - %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> undef) + %fadd_nxv1f32 = call fast float @llvm.vector.reduce.fadd.nxv1f32(float 0.0, <vscale x 1 x float> poison) %fadd_nxv4f32 = call fast float @llvm.vector.reduce.fadd.nxv4f32(float 0.0, <vscale x 4 x float> %v2) %fadd_nxv4f64 = call fast double @llvm.vector.reduce.fadd.nxv4f64(double 0.0, <vscale x 4 x double> %v3) - %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> undef) + %fmin_nxv1f32 = call fast float @llvm.vector.reduce.fmin.nxv1f32(<vscale x 1 x float> poison) %fmin_nxv4f32 = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> %v2) %fmin_nxv4f64 = call fast double @llvm.vector.reduce.fmin.nxv4f64(<vscale x 4 x double> %v3) - %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> undef) + %fmax_nxv1f32 = call fast float @llvm.vector.reduce.fmax.nxv1f32(<vscale x 1 x float> poison) %fmax_nxv4f32 = call fast float @llvm.vector.reduce.fmax.nxv4f32(<vscale x 4 x float> %v2) %fmax_nxv4f64 = call fast double @llvm.vector.reduce.fmax.nxv4f64(<vscale x 4 x double> %v3) @@ -389,123 +389,123 @@ declare <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32>, i1) define void @vector_reverse() #0 { ; CHECK-VSCALE-1-LABEL: 'vector_reverse' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'vector_reverse' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'vector_reverse' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> undef) - %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> undef) - %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> undef) - %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> undef) - %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> undef) - %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> undef) - %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> undef) - %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> undef) - %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> undef) - %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> undef) - %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> undef) - %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> undef) - %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> undef) - %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> undef) - %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> undef) - %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> undef) - %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> undef) - %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> undef) - %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> undef) - %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> undef) - %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> undef) - %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> undef) - %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> undef) - %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> undef) - %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> undef) - %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> undef) - %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> undef) + %reverse_nxv16i8 = call <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8> poison) + %reverse_nxv32i8 = call <vscale x 32 x i8> @llvm.vector.reverse.nxv32i8(<vscale x 32 x i8> poison) + %reverse_nxv2i16 = call <vscale x 2 x i16> @llvm.vector.reverse.nxv2i16(<vscale x 2 x i16> poison) + %reverse_nxv4i16 = call <vscale x 4 x i16> @llvm.vector.reverse.nxv4i16(<vscale x 4 x i16> poison) + %reverse_nxv8i16 = call <vscale x 8 x i16> @llvm.vector.reverse.nxv8i16(<vscale x 8 x i16> poison) + %reverse_nxv16i16 = call <vscale x 16 x i16> @llvm.vector.reverse.nxv16i16(<vscale x 16 x i16> poison) + %reverse_nxv4i32 = call <vscale x 4 x i32> @llvm.vector.reverse.nxv4i32(<vscale x 4 x i32> poison) + %reverse_nxv8i32 = call <vscale x 8 x i32> @llvm.vector.reverse.nxv8i32(<vscale x 8 x i32> poison) + %reverse_nxv2i64 = call <vscale x 2 x i64> @llvm.vector.reverse.nxv2i64(<vscale x 2 x i64> poison) + %reverse_nxv4i64 = call <vscale x 4 x i64> @llvm.vector.reverse.nxv4i64(<vscale x 4 x i64> poison) + %reverse_nxv2f16 = call <vscale x 2 x half> @llvm.vector.reverse.nxv2f16(<vscale x 2 x half> poison) + %reverse_nxv4f16 = call <vscale x 4 x half> @llvm.vector.reverse.nxv4f16(<vscale x 4 x half> poison) + %reverse_nxv8f16 = call <vscale x 8 x half> @llvm.vector.reverse.nxv8f16(<vscale x 8 x half> poison) + %reverse_nxv16f16 = call <vscale x 16 x half> @llvm.vector.reverse.nxv16f16(<vscale x 16 x half> poison) + %reverse_nxv2f32 = call <vscale x 2 x float> @llvm.vector.reverse.nxv2f32(<vscale x 2 x float> poison) + %reverse_nxv4f32 = call <vscale x 4 x float> @llvm.vector.reverse.nxv4f32(<vscale x 4 x float> poison) + %reverse_nxv8f32 = call <vscale x 8 x float> @llvm.vector.reverse.nxv8f32(<vscale x 8 x float> poison) + %reverse_nxv2f64 = call <vscale x 2 x double> @llvm.vector.reverse.nxv2f64(<vscale x 2 x double> poison) + %reverse_nxv4f64 = call <vscale x 4 x double> @llvm.vector.reverse.nxv4f64(<vscale x 4 x double> poison) + %reverse_nxv2bf16 = call <vscale x 2 x bfloat> @llvm.vector.reverse.nxv2bf16(<vscale x 2 x bfloat> poison) + %reverse_nxv4bf16 = call <vscale x 4 x bfloat> @llvm.vector.reverse.nxv4bf16(<vscale x 4 x bfloat> poison) + %reverse_nxv8bf16 = call <vscale x 8 x bfloat> @llvm.vector.reverse.nxv8bf16(<vscale x 8 x bfloat> poison) + %reverse_nxv16bf16 = call <vscale x 16 x bfloat> @llvm.vector.reverse.nxv16bf16(<vscale x 16 x bfloat> poison) + %reverse_nxv16i1 = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> poison) + %reverse_nxv8i1 = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> poison) + %reverse_nxv4i1 = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> poison) + %reverse_nxv2i1 = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> poison) ret void } declare <vscale x 16 x i8> @llvm.vector.reverse.nxv16i8(<vscale x 16 x i8>) @@ -912,158 +912,158 @@ declare <vscale x 4 x double> @llvm.vector.splice.nxv4f64(<vscale x 4 x double>, define void @get_lane_mask() #0 { ; CHECK-VSCALE-1-LABEL: 'get_lane_mask' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'get_lane_mask' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'get_lane_mask' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 2 for: %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 1 for: %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 32 for: %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 16 for: %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 8 for: %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 48 for: %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 6 for: %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 undef, i64 undef) - %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 undef, i64 undef) - %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 undef, i64 undef) - %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 undef, i64 undef) + %mask_nxv16i1_i64 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 poison, i64 poison) + %mask_nxv8i1_i64 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 poison, i64 poison) + %mask_nxv4i1_i64 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 poison, i64 poison) + %mask_nxv2i1_i64 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 poison, i64 poison) - %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 undef, i32 undef) - %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 undef, i32 undef) - %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 undef, i32 undef) - %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 undef, i32 undef) + %mask_nxv16i1_i32 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i32(i32 poison, i32 poison) + %mask_nxv8i1_i32 = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i32(i32 poison, i32 poison) + %mask_nxv4i1_i32 = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 poison, i32 poison) + %mask_nxv2i1_i32 = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i32(i32 poison, i32 poison) - %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 undef, i64 undef) - %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 undef, i16 undef) + %mask_nxv32i1_i64 = call <vscale x 32 x i1> @llvm.get.active.lane.mask.nxv32i1.i64(i64 poison, i64 poison) + %mask_nxv16i1_i16 = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i16(i16 poison, i16 poison) - %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 undef, i64 undef) - %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 undef, i64 undef) - %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 undef, i64 undef) - %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 undef, i64 undef) + %mask_v16i1_i64 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i64(i64 poison, i64 poison) + %mask_v8i1_i64 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 poison, i64 poison) + %mask_v4i1_i64 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 poison, i64 poison) + %mask_v2i1_i64 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 poison, i64 poison) - %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 undef, i32 undef) - %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 undef, i32 undef) - %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 undef, i32 undef) - %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 undef, i32 undef) + %mask_v16i1_i32 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 poison, i32 poison) + %mask_v8i1_i32 = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i32(i32 poison, i32 poison) + %mask_v4i1_i32 = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 poison) + %mask_v2i1_i32 = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 poison, i32 poison) - %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 undef, i64 undef) - %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 undef, i16 undef) + %mask_v32i1_i64 = call <32 x i1> @llvm.get.active.lane.mask.v32i1.i64(i64 poison, i64 poison) + %mask_v16i1_i16 = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i16(i16 poison, i16 poison) ret void } define void @fshr() #0 { ; CHECK-VSCALE-1-LABEL: 'fshr' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'fshr' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'fshr' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) - call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) - call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) - call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) + call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) + call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) + call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) + call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ret void } define void @fshl() #0 { ; CHECK-VSCALE-1-LABEL: 'fshl' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'fshl' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 5 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'fshl' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %1 = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %2 = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %3 = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 7 for: %4 = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i8> undef, <vscale x 16 x i8> undef) - call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i16> undef, <vscale x 8 x i16> undef) - call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i32> undef, <vscale x 4 x i32> undef) - call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i64> undef, <vscale x 2 x i64> undef) + call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> poison, <vscale x 16 x i8> poison, <vscale x 16 x i8> poison) + call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> poison, <vscale x 8 x i16> poison, <vscale x 8 x i16> poison) + call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> poison, <vscale x 4 x i32> poison, <vscale x 4 x i32> poison) + call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> poison, <vscale x 2 x i64> poison, <vscale x 2 x i64> poison) ret void } @@ -1362,48 +1362,48 @@ define void @histogram_nxv4i64(<vscale x 4 x ptr> %buckets, <vscale x 4 x i1> %m define void @match() #3 { ; CHECK-VSCALE-1-LABEL: 'match' -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) +; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ; CHECK-VSCALE-1-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-VSCALE-2-LABEL: 'match' -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) +; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ; CHECK-VSCALE-2-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; TYPE_BASED_ONLY-LABEL: 'match' -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) -; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 4 for: %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of 14 for: %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:29 CodeSize:21 Lat:29 SizeLat:29 for: %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) +; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:15 CodeSize:11 Lat:15 SizeLat:15 for: %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ; TYPE_BASED_ONLY-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> undef, <vscale x 16 x i1> undef) - %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> undef, <vscale x 8 x i1> undef) - %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> undef, <vscale x 4 x i1> undef) - %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> undef, <vscale x 2 x i1> undef) + %match_nxv16i8_v16i8 = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> poison, <16 x i8> poison, <vscale x 16 x i1> poison) + %match_nxv8i16_v8i16 = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> poison, <8 x i16> poison, <vscale x 8 x i1> poison) + %match_nxv4i32_v4i32 = call <vscale x 4 x i1> @llvm.experimental.vector.match.nxv4i32.v4i32(<vscale x 4 x i32> poison, <4 x i32> poison, <vscale x 4 x i1> poison) + %match_nxv2i64_v2i64 = call <vscale x 2 x i1> @llvm.experimental.vector.match.nxv2i64.v2i64(<vscale x 2 x i64> poison, <2 x i64> poison, <vscale x 2 x i1> poison) - %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> undef, <16 x i8> undef, <16 x i1> undef) - %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> undef, <8 x i16> undef, <8 x i1> undef) - %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> undef, <4 x i32> undef, <4 x i1> undef) - %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> undef, <2 x i64> undef, <2 x i1> undef) + %match_v16i8_v16i8 = call <16 x i1> @llvm.experimental.vector.match.v16i8.v16i8(<16 x i8> poison, <16 x i8> poison, <16 x i1> poison) + %match_v8i16_v8i16 = call <8 x i1> @llvm.experimental.vector.match.v8i16.v8i16(<8 x i16> poison, <8 x i16> poison, <8 x i1> poison) + %match_v4i32_v4i32 = call <4 x i1> @llvm.experimental.vector.match.v4i32.v4i32(<4 x i32> poison, <4 x i32> poison, <4 x i1> poison) + %match_v2i64_v2i64 = call <2 x i1> @llvm.experimental.vector.match.v2i64.v2i64(<2 x i64> poison, <2 x i64> poison, <2 x i1> poison) ret void } diff --git a/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll b/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll new file mode 100644 index 0000000..49fbad3 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/becount-couldnotcompute.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -disable-output "-passes=print<da>" -aa-pipeline=basic-aa 2>&1 | FileCheck %s + +; Test for function isKnownLessThan that calculates a back-edge taken count, +; which can return a CouldNotCompute SCEV. + +define void @test(i64 %conv, ptr %a) { +; CHECK-LABEL: 'test' +; CHECK-NEXT: Src: %ld = load i32, ptr %arrayidx12, align 4 --> Dst: %ld = load i32, ptr %arrayidx12, align 4 +; CHECK-NEXT: da analyze - none! +; +entry: + %sub = add i64 %conv, 1 + br label %loop + +loop: + %i = phi i64 [ %add26, %loop ], [ 0, %entry ] + %arrayidx12 = getelementptr i32, ptr %a, i64 %i + %ld = load i32, ptr %arrayidx12, align 4 + %add26 = add nsw i64 %sub, %i + br label %loop +} diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll new file mode 100644 index 0000000..ebab9f0 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoaddr.ll @@ -0,0 +1,135 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s --data-layout="e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X64 %s +; RUN: opt < %s --data-layout="e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" -S -disable-output -disable-verify "-passes=print<scalar-evolution>" 2>&1 | FileCheck --check-prefixes=ALL,X32 %s + +declare void @useptr(ptr) + +define void @ptrtoaddr(ptr %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) { +; X64-LABEL: 'ptrtoaddr' +; X64-NEXT: Classifying expressions for: @ptrtoaddr +; X64-NEXT: %p0 = ptrtoaddr ptr %in to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: %p1 = ptrtoaddr ptr %in to i32 +; X64-NEXT: --> %p1 U: full-set S: full-set +; X64-NEXT: %p2 = ptrtoaddr ptr %in to i16 +; X64-NEXT: --> %p2 U: full-set S: full-set +; X64-NEXT: %p3 = ptrtoaddr ptr %in to i128 +; X64-NEXT: --> %p3 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr +; +; X32-LABEL: 'ptrtoaddr' +; X32-NEXT: Classifying expressions for: @ptrtoaddr +; X32-NEXT: %p0 = ptrtoaddr ptr %in to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: %p1 = ptrtoaddr ptr %in to i32 +; X32-NEXT: --> %p1 U: full-set S: full-set +; X32-NEXT: %p2 = ptrtoaddr ptr %in to i16 +; X32-NEXT: --> %p2 U: full-set S: full-set +; X32-NEXT: %p3 = ptrtoaddr ptr %in to i128 +; X32-NEXT: --> %p3 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr +; + %p0 = ptrtoaddr ptr %in to i64 + %p1 = ptrtoaddr ptr %in to i32 + %p2 = ptrtoaddr ptr %in to i16 + %p3 = ptrtoaddr ptr %in to i128 + store i64 %p0, ptr %out0 + store i32 %p1, ptr %out1 + store i16 %p2, ptr %out2 + store i128 %p3, ptr %out3 + ret void +} + +define void @ptrtoaddr_as1(ptr addrspace(1) %in, ptr %out0, ptr %out1, ptr %out2, ptr %out3) { +; X64-LABEL: 'ptrtoaddr_as1' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_as1 +; X64-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32 +; X64-NEXT: --> %p1 U: full-set S: full-set +; X64-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16 +; X64-NEXT: --> %p2 U: full-set S: full-set +; X64-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128 +; X64-NEXT: --> %p3 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_as1 +; +; X32-LABEL: 'ptrtoaddr_as1' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_as1 +; X32-NEXT: %p0 = ptrtoaddr ptr addrspace(1) %in to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: %p1 = ptrtoaddr ptr addrspace(1) %in to i32 +; X32-NEXT: --> %p1 U: full-set S: full-set +; X32-NEXT: %p2 = ptrtoaddr ptr addrspace(1) %in to i16 +; X32-NEXT: --> %p2 U: full-set S: full-set +; X32-NEXT: %p3 = ptrtoaddr ptr addrspace(1) %in to i128 +; X32-NEXT: --> %p3 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_as1 +; + %p0 = ptrtoaddr ptr addrspace(1) %in to i64 + %p1 = ptrtoaddr ptr addrspace(1) %in to i32 + %p2 = ptrtoaddr ptr addrspace(1) %in to i16 + %p3 = ptrtoaddr ptr addrspace(1) %in to i128 + store i64 %p0, ptr %out0 + store i32 %p1, ptr %out1 + store i16 %p2, ptr %out2 + store i128 %p3, ptr %out3 + ret void +} + +define void @ptrtoaddr_of_bitcast(ptr %in, ptr %out0) { +; X64-LABEL: 'ptrtoaddr_of_bitcast' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast +; X64-NEXT: %in_casted = bitcast ptr %in to ptr +; X64-NEXT: --> %in U: full-set S: full-set +; X64-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast +; +; X32-LABEL: 'ptrtoaddr_of_bitcast' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_bitcast +; X32-NEXT: %in_casted = bitcast ptr %in to ptr +; X32-NEXT: --> %in U: full-set S: full-set +; X32-NEXT: %p0 = ptrtoaddr ptr %in_casted to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_bitcast +; + %in_casted = bitcast ptr %in to ptr + %p0 = ptrtoaddr ptr %in_casted to i64 + store i64 %p0, ptr %out0 + ret void +} + +define void @ptrtoaddr_of_nullptr(ptr %out0) { +; ALL-LABEL: 'ptrtoaddr_of_nullptr' +; ALL-NEXT: Classifying expressions for: @ptrtoaddr_of_nullptr +; ALL-NEXT: %p0 = ptrtoaddr ptr null to i64 +; ALL-NEXT: --> %p0 U: full-set S: full-set +; ALL-NEXT: Determining loop execution counts for: @ptrtoaddr_of_nullptr +; + %p0 = ptrtoaddr ptr null to i64 + store i64 %p0, ptr %out0 + ret void +} + +define void @ptrtoaddr_of_gep(ptr %in, ptr %out0) { +; X64-LABEL: 'ptrtoaddr_of_gep' +; X64-NEXT: Classifying expressions for: @ptrtoaddr_of_gep +; X64-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42 +; X64-NEXT: --> (42 + %in) U: full-set S: full-set +; X64-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64 +; X64-NEXT: --> %p0 U: full-set S: full-set +; X64-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep +; +; X32-LABEL: 'ptrtoaddr_of_gep' +; X32-NEXT: Classifying expressions for: @ptrtoaddr_of_gep +; X32-NEXT: %in_adj = getelementptr inbounds i8, ptr %in, i64 42 +; X32-NEXT: --> (42 + %in) U: full-set S: full-set +; X32-NEXT: %p0 = ptrtoaddr ptr %in_adj to i64 +; X32-NEXT: --> %p0 U: full-set S: full-set +; X32-NEXT: Determining loop execution counts for: @ptrtoaddr_of_gep +; + %in_adj = getelementptr inbounds i8, ptr %in, i64 42 + %p0 = ptrtoaddr ptr %in_adj to i64 + store i64 %p0, ptr %out0 + ret void +} diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index acac2c9..0c1f37b 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -382,7 +382,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64 ; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -393,7 +393,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) -; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1 +; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2 ; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) ; X64-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -406,9 +406,9 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: %i8 = load i8, ptr %i7, align 1 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -419,7 +419,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char ; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) -; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1 +; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2 ; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) ; X32-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -459,7 +459,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: %i9 = ptrtoint ptr %i7 to i64 ; X64-NEXT: --> {(ptrtoint ptr %arg to i64),+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i10 = sub i64 %i9, %i4 -; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } +; X64-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,-1) S: [0,-1) Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X64-NEXT: --> {%arg2,+,1}<nw><%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg2) LoopDispositions: { %bb6: Computable } ; X64-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -470,7 +470,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X64-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64) + %arg) LoopDispositions: { %bb6: Computable } ; X64-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped ; X64-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) -; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -1 +; X64-NEXT: Loop %bb6: constant max backedge-taken count is i64 -2 ; X64-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i64)) + (ptrtoint ptr %arg1 to i64)) ; X64-NEXT: Loop %bb6: Trip multiple is 1 ; @@ -483,9 +483,9 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: %i8 = load i8, ptr %i7, align 1 ; X32-NEXT: --> %i8 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i9 = ptrtoint ptr %i7 to i64 -; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934591) S: [0,8589934591) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {(zext i32 (ptrtoint ptr %arg to i32) to i64),+,1}<nuw><%bb6> U: [0,8589934590) S: [0,8589934590) Exits: ((zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) + (zext i32 (ptrtoint ptr %arg to i32) to i64)) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i10 = sub i64 %i9, %i4 -; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967296) S: [0,4294967296) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {0,+,1}<nuw><%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 ; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, ptr %i11, align 1 @@ -496,7 +496,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: --> {(1 + %arg),+,1}<nuw><%bb6> U: full-set S: full-set Exits: ((-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg) LoopDispositions: { %bb6: Computable } ; X32-NEXT: Determining loop execution counts for: @pr46786_c26_char_cmp_ops_swapped ; X32-NEXT: Loop %bb6: backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) -; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -1 +; X32-NEXT: Loop %bb6: constant max backedge-taken count is i32 -2 ; X32-NEXT: Loop %bb6: symbolic max backedge-taken count is (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) ; X32-NEXT: Loop %bb6: Trip multiple is 1 ; diff --git a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll index 7ba422d..a477465c 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-multiple-guard-info.ll @@ -578,22 +578,22 @@ define void @test_ptr_aligned_by_2_and_4_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptr_aligned_by_2_and_4_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptr_aligned_by_2_and_4_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_2_and_4_via_assumption ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] @@ -615,9 +615,9 @@ define void @test_ptrs_aligned_by_4_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_4_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_4_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_4_via_assumption ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 @@ -644,9 +644,9 @@ define void @test_ptrs_aligned_by_8_via_assumption(ptr %start, ptr %end) { ; CHECK-LABEL: 'test_ptrs_aligned_by_8_via_assumption' ; CHECK-NEXT: Classifying expressions for: @test_ptrs_aligned_by_8_via_assumption ; CHECK-NEXT: %iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: ((4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: (-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: (4 + (4 * ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4))<nuw> + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start)<nuw><nsw>,+,4}<%loop> U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: ((-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptrs_aligned_by_8_via_assumption ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903 @@ -677,22 +677,22 @@ define void @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors(ptr ; CHECK-NEXT: %c = call i1 @cond() ; CHECK-NEXT: --> %c U: full-set S: full-set ; CHECK-NEXT: %iv = phi ptr [ %start, %then ], [ %start, %else ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = getelementptr i8, ptr %iv, i64 4 -; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(4 + %start),+,4}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_ptr_aligned_by_4_via_assumption_multiple_loop_predecessors ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable constant max backedge-taken count. ; CHECK-NEXT: Loop %loop: Unpredictable symbolic max backedge-taken count. ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4611686018427387903 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: Equal predicate: (zext i2 ((trunc i64 (ptrtoint ptr %end to i64) to i2) + (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2))) to i64) == 0 +; CHECK-NEXT: Equal predicate: (zext i2 (-1 * (trunc i64 (ptrtoint ptr %start to i64) to i2)) to i64) == 0 ; entry: call void @llvm.assume(i1 true) [ "align"(ptr %start, i64 2) ] diff --git a/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll b/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll index 377c002..49174d2 100644 --- a/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll +++ b/llvm/test/Assembler/autoupgrade-lifetime-intrinsics.ll @@ -56,6 +56,45 @@ define void @remove_unanalyzable(ptr %p) { ret void } +define void @no_declaration() { +; CHECK-LABEL: define void @no_declaration() { +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1, addrspace(2) +; CHECK-NEXT: call void @llvm.lifetime.start.p2(ptr addrspace(2) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p2(ptr addrspace(2) [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i8, addrspace(2) + call void @llvm.lifetime.start.p2(i64 1, ptr addrspace(2) %a) + call void @llvm.lifetime.end.p2(i64 1, ptr addrspace(2) %a) + ret void +} + +define void @no_suffix1() { +; CHECK-LABEL: define void @no_suffix1() { +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1, addrspace(3) +; CHECK-NEXT: call void @llvm.lifetime.start.p3(ptr addrspace(3) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p3(ptr addrspace(3) [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i8, addrspace(3) + call void @llvm.lifetime.start(i64 1, ptr addrspace(3) %a) + call void @llvm.lifetime.end(i64 1, ptr addrspace(3) %a) + ret void +} + +define void @no_suffix2() { +; CHECK-LABEL: define void @no_suffix2() { +; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1, addrspace(4) +; CHECK-NEXT: call void @llvm.lifetime.start.p4(ptr addrspace(4) [[A]]) +; CHECK-NEXT: call void @llvm.lifetime.end.p4(ptr addrspace(4) [[A]]) +; CHECK-NEXT: ret void +; + %a = alloca i8, addrspace(4) + call void @llvm.lifetime.start(i64 1, ptr addrspace(4) %a) + call void @llvm.lifetime.end(i64 1, ptr addrspace(4) %a) + ret void +} + declare void @llvm.lifetime.start.p0(i64, ptr) declare void @llvm.lifetime.end.p0(i64, ptr) declare void @llvm.lifetime.start.p1(i64, ptr addrspace(1)) diff --git a/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll b/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll index 012fa1d..e54efa4 100644 --- a/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll +++ b/llvm/test/Assembler/autoupgrade-wasm-intrinsics.ll @@ -46,7 +46,10 @@ define <4 x float> @test_fms(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ret <4 x float> %res } -declare <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) +; This declaration is intentionally omitted to check that intrinsic upgrade +; also works without a declaration. +; declare <16 x i8> @llvm.wasm.laneselect.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) + declare <8 x i16> @llvm.wasm.dot.i8x16.i7x16.signed(<16 x i8>, <16 x i8>) declare <4 x i32> @llvm.wasm.dot.i8x16.i7x16.add.signed(<16 x i8>, <16 x i8>, <4 x i32>) declare <4 x float> @llvm.wasm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) diff --git a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll index ad5a96a..4caee57 100644 --- a/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll +++ b/llvm/test/Assembler/implicit-intrinsic-declaration-invalid3.ll @@ -2,7 +2,7 @@ ; Use of unknown intrinsic without declaration should be rejected. -; CHECK: error: use of undefined value '@llvm.foobar' +; CHECK: error: unknown intrinsic 'llvm.foobar' define void @test() { call i8 @llvm.foobar(i8 0, i16 1) ret void diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir new file mode 100644 index 0000000..824ada1 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/knownbits-add.mir @@ -0,0 +1,278 @@ +# NOTE: Assertions have been autogenerated by utils/update_givaluetracking_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=aarch64 -passes="print<gisel-value-tracking>" -filetype=null %s 2>&1 | FileCheck %s + +--- +name: Cst +body: | + bb.1: + ; CHECK-LABEL: name: @Cst + ; CHECK-NEXT: %0:_ KnownBits:00000010 SignBits:6 + ; CHECK-NEXT: %1:_ KnownBits:00011000 SignBits:3 + ; CHECK-NEXT: %2:_ KnownBits:00011010 SignBits:3 + %0:_(s8) = G_CONSTANT i8 2 + %1:_(s8) = G_CONSTANT i8 24 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstZero +body: | + bb.1: + ; CHECK-LABEL: name: @CstZero + ; CHECK-NEXT: %0:_ KnownBits:00000001 SignBits:7 + ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:00000000 SignBits:8 + %0:_(s8) = G_CONSTANT i8 1 + %1:_(s8) = G_CONSTANT i8 255 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstNegOne +body: | + bb.1: + ; CHECK-LABEL: name: @CstNegOne + ; CHECK-NEXT: %0:_ KnownBits:00000000 SignBits:8 + ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:11111111 SignBits:8 + %0:_(s8) = G_CONSTANT i8 0 + %1:_(s8) = G_CONSTANT i8 255 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstSeven +body: | + bb.1: + ; CHECK-LABEL: name: @CstSeven + ; CHECK-NEXT: %0:_ KnownBits:00001000 SignBits:4 + ; CHECK-NEXT: %1:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:00000111 SignBits:5 + %0:_(s8) = G_CONSTANT i8 8 + %1:_(s8) = G_CONSTANT i8 255 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: CstNeg +body: | + bb.1: + ; CHECK-LABEL: name: @CstNeg + ; CHECK-NEXT: %0:_ KnownBits:11100000 SignBits:3 + ; CHECK-NEXT: %1:_ KnownBits:00000010 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:11100010 SignBits:3 + %0:_(s8) = G_CONSTANT i8 224 + %1:_(s8) = G_CONSTANT i8 2 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: ScalarVar +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarVar + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(s8) = COPY $b0 + %1:_(s8) = COPY $b1 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: ScalarRhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarRhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 3 + %2:_(s8) = G_ADD %0, %1 +... +--- +name: ScalarNonNegative +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarNonNegative + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4 + ; CHECK-NEXT: %3:_ KnownBits:11111111 SignBits:8 + ; CHECK-NEXT: %4:_ KnownBits:???????? SignBits:4 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 15 + %2:_(s8) = G_AND %0, %1 + %3:_(s8) = G_CONSTANT i8 255 + %4:_(s8) = G_ADD %2, %3 +... +--- +name: ScalarLhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarLhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00000011 SignBits:6 + ; CHECK-NEXT: %2:_ KnownBits:???????? SignBits:1 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 3 + %2:_(s8) = G_ADD %1, %0 +... +--- +name: ScalarPartKnown +body: | + bb.1: + ; CHECK-LABEL: name: @ScalarPartKnown + ; CHECK-NEXT: %0:_ KnownBits:???????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:00001111 SignBits:4 + ; CHECK-NEXT: %2:_ KnownBits:0000???? SignBits:4 + ; CHECK-NEXT: %3:_ KnownBits:00000101 SignBits:5 + ; CHECK-NEXT: %4:_ KnownBits:000????? SignBits:3 + %0:_(s8) = COPY $b0 + %1:_(s8) = G_CONSTANT i8 15 + %2:_(s8) = G_AND %0, %1 + %3:_(s8) = G_CONSTANT i8 5 + %4:_(s8) = G_ADD %2, %3 +... +--- +name: VectorCstZero +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCstZero + ; CHECK-NEXT: %0:_ KnownBits:0000000000000001 SignBits:15 + ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000001 SignBits:15 + ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %4:_ KnownBits:0000000000000000 SignBits:16 + %0:_(s16) = G_CONSTANT i16 1 + %1:_(s16) = G_CONSTANT i16 65535 + %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %4:_(<4 x s16>) = G_ADD %2, %3 +... +--- +name: VectorCstNegOne +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCstNegOne + ; CHECK-NEXT: %0:_ KnownBits:0000000000000000 SignBits:16 + ; CHECK-NEXT: %1:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000000 SignBits:16 + ; CHECK-NEXT: %3:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16 + %0:_(s16) = G_CONSTANT i16 0 + %1:_(s16) = G_CONSTANT i16 65535 + %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %0, %0, %0 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %4:_(<4 x s16>) = G_ADD %2, %3 +... +--- +name: VectorVar +body: | + bb.1: + ; CHECK-LABEL: name: @VectorVar + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(<4 x s16>) = COPY $d1 + %2:_(<4 x s16>) = G_ADD %0, %1 +... +--- +name: VectorRhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @VectorRhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 3 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_ADD %2, %0 +... +--- +name: VectorNonNegative +body: | + bb.1: + ; CHECK-LABEL: name: @VectorNonNegative + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %4:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %5:_ KnownBits:1111111111111111 SignBits:16 + ; CHECK-NEXT: %6:_ KnownBits:???????????????? SignBits:8 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 255 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_AND %0, %2 + %4:_(s16) = G_CONSTANT i16 65535 + %5:_(<4 x s16>) = G_BUILD_VECTOR %4, %4, %4, %4 + %6:_(<4 x s16>) = G_ADD %3, %5 +... +--- +name: VectorLhsEarlyOut +body: | + bb.1: + ; CHECK-LABEL: name: @VectorLhsEarlyOut + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 3 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_ADD %0, %2 +... +--- +name: VectorPartKnown +body: | + bb.1: + ; CHECK-LABEL: name: @VectorPartKnown + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %2:_ KnownBits:0000000011111111 SignBits:8 + ; CHECK-NEXT: %3:_ KnownBits:00000000???????? SignBits:8 + ; CHECK-NEXT: %4:_ KnownBits:0000000000101010 SignBits:10 + ; CHECK-NEXT: %5:_ KnownBits:0000000001001010 SignBits:9 + ; CHECK-NEXT: %6:_ KnownBits:000000000??01010 SignBits:9 + ; CHECK-NEXT: %7:_ KnownBits:0000000????????? SignBits:7 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = G_CONSTANT i16 255 + %2:_(<4 x s16>) = G_BUILD_VECTOR %1, %1, %1, %1 + %3:_(<4 x s16>) = G_AND %0, %2 + %4:_(s16) = G_CONSTANT i16 42 + %5:_(s16) = G_CONSTANT i16 74 + %6:_(<4 x s16>) = G_BUILD_VECTOR %4, %5, %5, %4 + %7:_(<4 x s16>) = G_ADD %6, %3 +... +--- +name: VectorCst36 +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCst36 + ; CHECK-NEXT: %0:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %1:_ KnownBits:0000000000000110 SignBits:13 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000?1? SignBits:13 + ; CHECK-NEXT: %3:_ KnownBits:0000000000000?1? SignBits:13 + ; CHECK-NEXT: %4:_ KnownBits:000000000000???? SignBits:12 + %0:_(s16) = G_CONSTANT i16 3 + %1:_(s16) = G_CONSTANT i16 6 + %2:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0 + %3:_(<4 x s16>) = G_BUILD_VECTOR %0, %1, %1, %0 + %4:_(<4 x s16>) = G_ADD %2, %3 +... + +--- +name: VectorCst3unknown +body: | + bb.1: + ; CHECK-LABEL: name: @VectorCst3unknown + ; CHECK-NEXT: %0:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %1:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %2:_ KnownBits:0000000000000011 SignBits:14 + ; CHECK-NEXT: %3:_ KnownBits:???????????????? SignBits:1 + ; CHECK-NEXT: %4:_ KnownBits:???????????????? SignBits:1 + %0:_(<4 x s16>) = COPY $d0 + %1:_(s16) = COPY $h0 + %2:_(s16) = G_CONSTANT i16 3 + %3:_(<4 x s16>) = G_BUILD_VECTOR %1, %2, %2, %1 + %4:_(<4 x s16>) = G_ADD %0, %3 +... diff --git a/llvm/test/CodeGen/AArch64/combine-sdiv.ll b/llvm/test/CodeGen/AArch64/combine-sdiv.ll index dc88f94..cca190f 100644 --- a/llvm/test/CodeGen/AArch64/combine-sdiv.ll +++ b/llvm/test/CodeGen/AArch64/combine-sdiv.ll @@ -1774,3 +1774,88 @@ define i128 @combine_i128_sdiv_const100(i128 %x) { %1 = sdiv i128 %x, 100 ret i128 %1 } + +; The following only becomes an sdiv_by_one after type legalisation, after which +; the splatted scalar constant has a different type to the splat vector. This +; test verifies DAGCombiner does not care about this type difference. +define <16 x i16> @combine_vec_sdiv_by_one_obfuscated(<16 x i16> %x) "target-features"="+sve" { +; CHECK-SD-LABEL: combine_vec_sdiv_by_one_obfuscated: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: combine_vec_sdiv_by_one_obfuscated: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: movi v2.2d, #0000000000000000 +; CHECK-GI-NEXT: movi v3.8h, #1 +; CHECK-GI-NEXT: smov w8, v0.h[0] +; CHECK-GI-NEXT: mov v3.h[0], v2.h[0] +; CHECK-GI-NEXT: smov w9, v3.h[0] +; CHECK-GI-NEXT: smov w16, v3.h[7] +; CHECK-GI-NEXT: sdiv w14, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[1] +; CHECK-GI-NEXT: smov w9, v3.h[1] +; CHECK-GI-NEXT: sdiv w15, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[2] +; CHECK-GI-NEXT: smov w9, v3.h[2] +; CHECK-GI-NEXT: sdiv w13, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[3] +; CHECK-GI-NEXT: smov w9, v3.h[3] +; CHECK-GI-NEXT: sdiv w12, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[4] +; CHECK-GI-NEXT: smov w9, v3.h[4] +; CHECK-GI-NEXT: sdiv w11, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[5] +; CHECK-GI-NEXT: smov w9, v3.h[5] +; CHECK-GI-NEXT: sdiv w10, w8, w9 +; CHECK-GI-NEXT: smov w8, v0.h[6] +; CHECK-GI-NEXT: smov w9, v3.h[6] +; CHECK-GI-NEXT: movi v3.8h, #1 +; CHECK-GI-NEXT: smov w17, v3.h[0] +; CHECK-GI-NEXT: smov w18, v3.h[1] +; CHECK-GI-NEXT: smov w0, v3.h[2] +; CHECK-GI-NEXT: smov w1, v3.h[3] +; CHECK-GI-NEXT: smov w2, v3.h[4] +; CHECK-GI-NEXT: smov w3, v3.h[5] +; CHECK-GI-NEXT: sdiv w8, w8, w9 +; CHECK-GI-NEXT: smov w9, v0.h[7] +; CHECK-GI-NEXT: fmov s0, w14 +; CHECK-GI-NEXT: mov v0.h[1], w15 +; CHECK-GI-NEXT: smov w15, v1.h[6] +; CHECK-GI-NEXT: mov v0.h[2], w13 +; CHECK-GI-NEXT: sdiv w9, w9, w16 +; CHECK-GI-NEXT: smov w16, v1.h[0] +; CHECK-GI-NEXT: mov v0.h[3], w12 +; CHECK-GI-NEXT: smov w12, v1.h[7] +; CHECK-GI-NEXT: mov v0.h[4], w11 +; CHECK-GI-NEXT: sdiv w16, w16, w17 +; CHECK-GI-NEXT: smov w17, v1.h[1] +; CHECK-GI-NEXT: mov v0.h[5], w10 +; CHECK-GI-NEXT: mov v0.h[6], w8 +; CHECK-GI-NEXT: sdiv w17, w17, w18 +; CHECK-GI-NEXT: smov w18, v1.h[2] +; CHECK-GI-NEXT: fmov s2, w16 +; CHECK-GI-NEXT: smov w16, v3.h[6] +; CHECK-GI-NEXT: mov v0.h[7], w9 +; CHECK-GI-NEXT: sdiv w18, w18, w0 +; CHECK-GI-NEXT: smov w0, v1.h[3] +; CHECK-GI-NEXT: mov v2.h[1], w17 +; CHECK-GI-NEXT: sdiv w0, w0, w1 +; CHECK-GI-NEXT: smov w1, v1.h[4] +; CHECK-GI-NEXT: mov v2.h[2], w18 +; CHECK-GI-NEXT: sdiv w1, w1, w2 +; CHECK-GI-NEXT: smov w2, v1.h[5] +; CHECK-GI-NEXT: mov v2.h[3], w0 +; CHECK-GI-NEXT: sdiv w14, w2, w3 +; CHECK-GI-NEXT: mov v2.h[4], w1 +; CHECK-GI-NEXT: sdiv w13, w15, w16 +; CHECK-GI-NEXT: smov w15, v3.h[7] +; CHECK-GI-NEXT: mov v2.h[5], w14 +; CHECK-GI-NEXT: sdiv w10, w12, w15 +; CHECK-GI-NEXT: mov v2.h[6], w13 +; CHECK-GI-NEXT: mov v2.h[7], w10 +; CHECK-GI-NEXT: mov v1.16b, v2.16b +; CHECK-GI-NEXT: ret + %zero_and_ones = shufflevector <16 x i16> zeroinitializer, <16 x i16> splat (i16 1), <16 x i32> <i32 0, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> + %div = sdiv <16 x i16> %x, %zero_and_ones + ret <16 x i16> %div +} diff --git a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll index b6dee97e..b8d6c88 100644 --- a/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll +++ b/llvm/test/CodeGen/AArch64/sme-za-exceptions.ll @@ -732,6 +732,247 @@ exit: ret void } +; This example corresponds to: +; +; __arm_agnostic("sme_za_state") void try_catch_agnostic_za_invoke() +; { +; try { +; agnostic_za_call(); +; } catch(...) { +; } +; } +; +; In this example we preserve all SME state enabled by PSTATE.ZA using +; `__arm_sme_save` before agnostic_za_call(). This is because on all normal +; returns from an agnostic ZA function ZA state should be preserved. That means +; we need to make sure ZA state is saved in case agnostic_za_call() throws, and +; we need to restore ZA state after unwinding to the catch block. + +define void @try_catch_agnostic_za_invoke() "aarch64_za_state_agnostic" personality ptr @__gxx_personality_v0 { +; CHECK-LABEL: try_catch_agnostic_za_invoke: +; CHECK: .Lfunc_begin5: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-NEXT: .cfi_lsda 28, .Lexception5 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: bl __arm_sme_state_size +; CHECK-NEXT: sub sp, sp, x0 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .Ltmp15: // EH_LABEL +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_sme_save +; CHECK-NEXT: bl agnostic_za_call +; CHECK-NEXT: .Ltmp16: // EH_LABEL +; CHECK-NEXT: .LBB5_1: // %exit +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_sme_restore +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: // %catch +; CHECK-NEXT: .Ltmp17: // EH_LABEL +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: b .LBB5_1 +; +; CHECK-SDAG-LABEL: try_catch_agnostic_za_invoke: +; CHECK-SDAG: .Lfunc_begin5: +; CHECK-SDAG-NEXT: .cfi_startproc +; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception5 +; CHECK-SDAG-NEXT: // %bb.0: // %entry +; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: mov x29, sp +; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 +; CHECK-SDAG-NEXT: .cfi_offset w19, -16 +; CHECK-SDAG-NEXT: .cfi_offset w30, -24 +; CHECK-SDAG-NEXT: .cfi_offset w29, -32 +; CHECK-SDAG-NEXT: bl __arm_sme_state_size +; CHECK-SDAG-NEXT: sub sp, sp, x0 +; CHECK-SDAG-NEXT: mov x19, sp +; CHECK-SDAG-NEXT: .Ltmp15: // EH_LABEL +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_save +; CHECK-SDAG-NEXT: bl agnostic_za_call +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: .Ltmp16: // EH_LABEL +; CHECK-SDAG-NEXT: .LBB5_1: // %exit +; CHECK-SDAG-NEXT: mov sp, x29 +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-SDAG-NEXT: ret +; CHECK-SDAG-NEXT: .LBB5_2: // %catch +; CHECK-SDAG-NEXT: .Ltmp17: // EH_LABEL +; CHECK-SDAG-NEXT: mov x1, x0 +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_save +; CHECK-SDAG-NEXT: mov x0, x1 +; CHECK-SDAG-NEXT: bl __cxa_begin_catch +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_save +; CHECK-SDAG-NEXT: bl __cxa_end_catch +; CHECK-SDAG-NEXT: mov x0, x19 +; CHECK-SDAG-NEXT: bl __arm_sme_restore +; CHECK-SDAG-NEXT: b .LBB5_1 +entry: + invoke void @agnostic_za_call() + to label %exit unwind label %catch + +catch: + %eh_info = landingpad { ptr, i32 } + catch ptr null + %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0 + tail call ptr @__cxa_begin_catch(ptr %exception_ptr) + tail call void @__cxa_end_catch() + br label %exit + +exit: + ret void +} + +; This is the same `try_catch_agnostic_za_invoke`, but shows a lazy save would +; also need to be committed in a shared-ZA function calling an agnostic-ZA function. +define void @try_catch_inout_za_agnostic_za_callee() "aarch64_inout_za" personality ptr @__gxx_personality_v0 { +; CHECK-LABEL: try_catch_inout_za_agnostic_za_callee: +; CHECK: .Lfunc_begin6: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-NEXT: .cfi_lsda 28, .Lexception6 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: msub x9, x8, x8, x9 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stp x9, x8, [x29, #-16] +; CHECK-NEXT: .Ltmp18: // EH_LABEL +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl agnostic_za_call +; CHECK-NEXT: .Ltmp19: // EH_LABEL +; CHECK-NEXT: .LBB6_1: // %exit +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: cbnz x8, .LBB6_3 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB6_3: // %exit +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB6_4: // %catch +; CHECK-NEXT: .Ltmp20: // EH_LABEL +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: b .LBB6_1 +; +; CHECK-SDAG-LABEL: try_catch_inout_za_agnostic_za_callee: +; CHECK-SDAG: .Lfunc_begin6: +; CHECK-SDAG-NEXT: .cfi_startproc +; CHECK-SDAG-NEXT: .cfi_personality 156, DW.ref.__gxx_personality_v0 +; CHECK-SDAG-NEXT: .cfi_lsda 28, .Lexception6 +; CHECK-SDAG-NEXT: // %bb.0: // %entry +; CHECK-SDAG-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-SDAG-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-SDAG-NEXT: mov x29, sp +; CHECK-SDAG-NEXT: sub sp, sp, #16 +; CHECK-SDAG-NEXT: .cfi_def_cfa w29, 32 +; CHECK-SDAG-NEXT: .cfi_offset w19, -16 +; CHECK-SDAG-NEXT: .cfi_offset w30, -24 +; CHECK-SDAG-NEXT: .cfi_offset w29, -32 +; CHECK-SDAG-NEXT: rdsvl x8, #1 +; CHECK-SDAG-NEXT: mov x9, sp +; CHECK-SDAG-NEXT: msub x9, x8, x8, x9 +; CHECK-SDAG-NEXT: mov sp, x9 +; CHECK-SDAG-NEXT: stp x9, x8, [x29, #-16] +; CHECK-SDAG-NEXT: .Ltmp18: // EH_LABEL +; CHECK-SDAG-NEXT: sub x19, x29, #16 +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19 +; CHECK-SDAG-NEXT: bl agnostic_za_call +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_2 +; CHECK-SDAG-NEXT: // %bb.1: // %entry +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_2: // %entry +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: .Ltmp19: // EH_LABEL +; CHECK-SDAG-NEXT: .LBB6_3: // %exit +; CHECK-SDAG-NEXT: mov sp, x29 +; CHECK-SDAG-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-SDAG-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-SDAG-NEXT: ret +; CHECK-SDAG-NEXT: .LBB6_4: // %catch +; CHECK-SDAG-NEXT: .Ltmp20: // EH_LABEL +; CHECK-SDAG-NEXT: mov x1, x0 +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_6 +; CHECK-SDAG-NEXT: // %bb.5: // %catch +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_6: // %catch +; CHECK-SDAG-NEXT: mov x0, x1 +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19 +; CHECK-SDAG-NEXT: bl __cxa_begin_catch +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_8 +; CHECK-SDAG-NEXT: // %bb.7: // %catch +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_8: // %catch +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, x19 +; CHECK-SDAG-NEXT: bl __cxa_end_catch +; CHECK-SDAG-NEXT: smstart za +; CHECK-SDAG-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-SDAG-NEXT: sub x0, x29, #16 +; CHECK-SDAG-NEXT: cbnz x8, .LBB6_10 +; CHECK-SDAG-NEXT: // %bb.9: // %catch +; CHECK-SDAG-NEXT: bl __arm_tpidr2_restore +; CHECK-SDAG-NEXT: .LBB6_10: // %catch +; CHECK-SDAG-NEXT: msr TPIDR2_EL0, xzr +; CHECK-SDAG-NEXT: b .LBB6_3 +entry: + invoke void @agnostic_za_call() + to label %exit unwind label %catch + +catch: + %eh_info = landingpad { ptr, i32 } + catch ptr null + %exception_ptr = extractvalue { ptr, i32 } %eh_info, 0 + tail call ptr @__cxa_begin_catch(ptr %exception_ptr) + tail call void @__cxa_end_catch() + br label %exit + +exit: + ret void +} + declare ptr @__cxa_allocate_exception(i64) declare void @__cxa_throw(ptr, ptr, ptr) declare ptr @__cxa_begin_catch(ptr) @@ -742,3 +983,4 @@ declare void @may_throw() declare void @shared_za_call() "aarch64_inout_za" declare void @noexcept_shared_za_call() "aarch64_inout_za" declare void @shared_zt0_call() "aarch64_inout_zt0" +declare void @agnostic_za_call() "aarch64_za_state_agnostic" diff --git a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll index bea0310..70224fc 100644 --- a/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll +++ b/llvm/test/CodeGen/DirectX/ContainerData/PSVResources.ll @@ -94,6 +94,18 @@ define void @main() #0 { %uav2_2 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) @llvm.dx.resource.handlefrombinding.tdx.TypedBuffer_f32_1_0( i32 4, i32 0, i32 10, i32 5, ptr null) + + ; RWBuffer<float4> UnboundedArray[] : register(u10, space5) +; CHECK: - Type: UAVTyped +; CHECK: Space: 5 +; CHECK: LowerBound: 10 +; CHECK: UpperBound: 4294967295 +; CHECK: Kind: TypedBuffer +; CHECK: Flags: +; CHECK: UsedByAtomic64: false + ; RWBuffer<float4> Buf = BufferArray[100]; + %uav3 = call target("dx.TypedBuffer", <4 x float>, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 5, i32 10, i32 -1, i32 100, ptr null) ret void } diff --git a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll index bf31ccb..559cc53 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/vselect.ll @@ -32,6 +32,40 @@ define void @select_v32i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v32i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI2_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI2_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + +define void @select_v32i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v32i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <32 x i8>, ptr %a0 + %v1 = load <32 x i8>, ptr %a1 + %sel = select <32 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <32 x i8> %v0, <32 x i8> %v1 + store <32 x i8> %sel, ptr %res + ret void +} + define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i16: ; CHECK: # %bb.0: @@ -49,6 +83,40 @@ define void @select_v16i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI5_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI5_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + +define void @select_v16i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i16>, ptr %a0 + %v1 = load <16 x i16>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i16> %v0, <16 x i16> %v1 + store <16 x i16> %sel, ptr %res + ret void +} + define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i32: ; CHECK: # %bb.0: @@ -65,19 +133,70 @@ define void @select_v8i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI8_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI8_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i32>, ptr %a0 + %v1 = load <8 x i32>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <8 x i32> %v0, <8 x i32> %v1 + store <8 x i32> %sel, ptr %res + ret void +} + +define void @select_v8f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x float>, ptr %a0 + %v1 = load <8 x float>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false>, <8 x float> %v0, <8 x float> %v1 + store <8 x float> %sel, ptr %res + ret void +} + define void @select_v4i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i64: ; CHECK: # %bb.0: ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI10_0) ; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret %v0 = load <4 x i64>, ptr %a0 %v1 = load <4 x i64>, ptr %a1 - %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i64> %v0, <4 x i64> %v1 + %sel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %v0, <4 x i64> %v1 store <4 x i64> %sel, ptr %res ret void } + +define void @select_v4f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvld $xr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: xvld $xr2, $a1, %pc_lo12(.LCPI11_0) +; CHECK-NEXT: xvbitsel.v $xr0, $xr1, $xr0, $xr2 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x double>, ptr %a0 + %v1 = load <4 x double>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v0, <4 x double> %v1 + store <4 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll index 8f25a6b..25c4f09 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/vselect.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/vselect.ll @@ -16,6 +16,20 @@ define void @select_v16i8_imm(ptr %res, ptr %a0) nounwind { ret void } +define void @select_v16i8_imm_1(ptr %res, ptr %a0) nounwind { +; CHECK-LABEL: select_v16i8_imm_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vrepli.h $vr1, -256 +; CHECK-NEXT: vbitseli.b $vr1, $vr0, 1 +; CHECK-NEXT: vst $vr1, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %sel = select <16 x i1> <i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true>, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> %v0 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v16i8: ; CHECK: # %bb.0: @@ -32,6 +46,40 @@ define void @select_v16i8(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v16i8_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI3_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI3_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + +define void @select_v16i8_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v16i8_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <16 x i8>, ptr %a0 + %v1 = load <16 x i8>, ptr %a1 + %sel = select <16 x i1> <i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false>, <16 x i8> %v0, <16 x i8> %v1 + store <16 x i8> %sel, ptr %res + ret void +} + define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v8i16: ; CHECK: # %bb.0: @@ -49,6 +97,40 @@ define void @select_v8i16(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v8i16_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI6_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI6_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + +define void @select_v8i16_2(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v8i16_2: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI7_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI7_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <8 x i16>, ptr %a0 + %v1 = load <8 x i16>, ptr %a1 + %sel = select <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i16> %v0, <8 x i16> %v1 + store <8 x i16> %sel, ptr %res + ret void +} + define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v4i32: ; CHECK: # %bb.0: @@ -65,13 +147,47 @@ define void @select_v4i32(ptr %res, ptr %a0, ptr %a1) nounwind { ret void } +define void @select_v4i32_1(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4i32_1: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI9_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI9_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x i32>, ptr %a0 + %v1 = load <4 x i32>, ptr %a1 + %sel = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> %v0, <4 x i32> %v1 + store <4 x i32> %sel, ptr %res + ret void +} + +define void @select_v4f32(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI10_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI10_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <4 x float>, ptr %a0 + %v1 = load <4 x float>, ptr %a1 + %sel = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x float> %v0, <4 x float> %v1 + store <4 x float> %sel, ptr %res + ret void +} + define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { ; CHECK-LABEL: select_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI4_0) -; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI4_0) +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI11_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI11_0) ; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret @@ -81,3 +197,20 @@ define void @select_v2i64(ptr %res, ptr %a0, ptr %a1) nounwind { store <2 x i64> %sel, ptr %res ret void } + +define void @select_v2f64(ptr %res, ptr %a0, ptr %a1) nounwind { +; CHECK-LABEL: select_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vld $vr1, $a2, 0 +; CHECK-NEXT: pcalau12i $a1, %pc_hi20(.LCPI12_0) +; CHECK-NEXT: vld $vr2, $a1, %pc_lo12(.LCPI12_0) +; CHECK-NEXT: vbitsel.v $vr0, $vr1, $vr0, $vr2 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret + %v0 = load <2 x double>, ptr %a0 + %v1 = load <2 x double>, ptr %a1 + %sel = select <2 x i1> <i1 false, i1 true>, <2 x double> %v0, <2 x double> %v1 + store <2 x double> %sel, ptr %res + ret void +} diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll index 0f7e0c7..1325abf 100644 --- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll +++ b/llvm/test/CodeGen/PowerPC/compare-vector-with-zero.ll @@ -95,3 +95,80 @@ declare i4 @llvm.ctpop.i4(i4) #1 !6 = !{!"short", !7, i64 0} !7 = !{!"omnipotent char", !8, i64 0} !8 = !{!"Simple C/C++ TBAA"} + +; Function to lockdown changes for floating point vector comparisons +define range(i32 0, 5) i32 @cols_needed(ptr %colauths){ +; POWERPC_64LE-LABEL: cols_needed: +; POWERPC_64LE: # %bb.0: # %entry +; POWERPC_64LE-NEXT: lxv vs0, 0(r3) +; POWERPC_64LE-NEXT: xxlxor vs1, vs1, vs1 +; POWERPC_64LE-NEXT: li r4, 4 +; POWERPC_64LE-NEXT: li r3, 0 +; POWERPC_64LE-NEXT: xvcmpeqsp vs0, vs0, vs1 +; POWERPC_64LE-NEXT: xxlnor v2, vs0, vs0 +; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2 +; POWERPC_64LE-NEXT: vextuwrx r3, r3, v2 +; POWERPC_64LE-NEXT: rlwinm r4, r4, 1, 30, 30 +; POWERPC_64LE-NEXT: sub r3, r4, r3 +; POWERPC_64LE-NEXT: mfvsrwz r4, v2 +; POWERPC_64LE-NEXT: rlwinm r4, r4, 2, 29, 29 +; POWERPC_64LE-NEXT: or r3, r3, r4 +; POWERPC_64LE-NEXT: li r4, 12 +; POWERPC_64LE-NEXT: vextuwrx r4, r4, v2 +; POWERPC_64LE-NEXT: slwi r4, r4, 3 +; POWERPC_64LE-NEXT: or r3, r3, r4 +; POWERPC_64LE-NEXT: clrlwi r3, r3, 28 +; POWERPC_64LE-NEXT: stb r3, -1(r1) +; POWERPC_64LE-NEXT: lbz r3, -1(r1) +; POWERPC_64LE-NEXT: popcntd r3, r3 +; POWERPC_64LE-NEXT: blr +; +; POWERPC_64-LABEL: cols_needed: +; POWERPC_64: # %bb.0: # %entry +; POWERPC_64-NEXT: lxv vs0, 0(r3) +; POWERPC_64-NEXT: xxlxor vs1, vs1, vs1 +; POWERPC_64-NEXT: li r4, 8 +; POWERPC_64-NEXT: xvcmpeqsp vs0, vs0, vs1 +; POWERPC_64-NEXT: xxlnor v2, vs0, vs0 +; POWERPC_64-NEXT: vextuwlx r4, r4, v2 +; POWERPC_64-NEXT: mfvsrwz r3, v2 +; POWERPC_64-NEXT: rlwinm r4, r4, 1, 30, 30 +; POWERPC_64-NEXT: rlwimi r4, r3, 2, 29, 29 +; POWERPC_64-NEXT: li r3, 0 +; POWERPC_64-NEXT: vextuwlx r3, r3, v2 +; POWERPC_64-NEXT: rlwimi r4, r3, 3, 0, 28 +; POWERPC_64-NEXT: li r3, 12 +; POWERPC_64-NEXT: vextuwlx r3, r3, v2 +; POWERPC_64-NEXT: sub r3, r4, r3 +; POWERPC_64-NEXT: clrlwi r3, r3, 28 +; POWERPC_64-NEXT: stb r3, -1(r1) +; POWERPC_64-NEXT: lbz r3, -1(r1) +; POWERPC_64-NEXT: popcntd r3, r3 +; POWERPC_64-NEXT: blr +; +; POWERPC_32-LABEL: cols_needed: +; POWERPC_32: # %bb.0: # %entry +; POWERPC_32-NEXT: lxv vs0, 0(r3) +; POWERPC_32-NEXT: xxlxor vs1, vs1, vs1 +; POWERPC_32-NEXT: xvcmpeqsp vs0, vs0, vs1 +; POWERPC_32-NEXT: xxlnor vs0, vs0, vs0 +; POWERPC_32-NEXT: stxv vs0, -32(r1) +; POWERPC_32-NEXT: lwz r3, -24(r1) +; POWERPC_32-NEXT: lwz r4, -28(r1) +; POWERPC_32-NEXT: rlwinm r3, r3, 1, 30, 30 +; POWERPC_32-NEXT: rlwimi r3, r4, 2, 29, 29 +; POWERPC_32-NEXT: lwz r4, -32(r1) +; POWERPC_32-NEXT: rlwimi r3, r4, 3, 0, 28 +; POWERPC_32-NEXT: lwz r4, -20(r1) +; POWERPC_32-NEXT: sub r3, r3, r4 +; POWERPC_32-NEXT: clrlwi r3, r3, 28 +; POWERPC_32-NEXT: popcntw r3, r3 +; POWERPC_32-NEXT: blr +entry: + %0 = load <4 x float>, ptr %colauths, align 4, !tbaa !5 + %1 = fcmp une <4 x float> %0, zeroinitializer + %2 = bitcast <4 x i1> %1 to i4 + %3 = tail call range(i4 0, 5) i4 @llvm.ctpop.i4(i4 %2) + %4 = zext nneg i4 %3 to i32 + ret i32 %4 +} diff --git a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll index e71f59c..cad684e 100644 --- a/llvm/test/CodeGen/PowerPC/fmf-propagation.ll +++ b/llvm/test/CodeGen/PowerPC/fmf-propagation.ll @@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_afn_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB11_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB11_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_afn_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB13_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB13_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 { ; ; GLOBAL-LABEL: sqrt_fast_ieee: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha -; GLOBAL-NEXT: xsabsdp 0, 1 -; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) -; GLOBAL-NEXT: fcmpu 0, 0, 2 -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: blt 0, .LBB15_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsabsdp 1, 1 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB15_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3) +; GLOBAL-NEXT: xssubsp 1, 1, 2 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 1, 1, 0, 2 ; GLOBAL-NEXT: blr %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x) ret float %rt @@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 { ; ; GLOBAL-LABEL: sqrt_fast_preserve_sign: ; GLOBAL: # %bb.0: -; GLOBAL-NEXT: xxlxor 0, 0, 0 -; GLOBAL-NEXT: fcmpu 0, 1, 0 -; GLOBAL-NEXT: beq 0, .LBB16_2 -; GLOBAL-NEXT: # %bb.1: ; GLOBAL-NEXT: xsrsqrtesp 0, 1 ; GLOBAL-NEXT: vspltisw 2, -3 ; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha -; GLOBAL-NEXT: xvcvsxwdp 2, 34 -; GLOBAL-NEXT: xsmulsp 1, 1, 0 -; GLOBAL-NEXT: xsmaddasp 2, 1, 0 +; GLOBAL-NEXT: xvcvsxwdp 3, 34 +; GLOBAL-NEXT: xsmulsp 2, 1, 0 +; GLOBAL-NEXT: xsmaddasp 3, 2, 0 ; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3) -; GLOBAL-NEXT: xsmulsp 0, 1, 0 -; GLOBAL-NEXT: xsmulsp 0, 0, 2 -; GLOBAL-NEXT: .LBB16_2: -; GLOBAL-NEXT: fmr 1, 0 +; GLOBAL-NEXT: xsmulsp 0, 2, 0 +; GLOBAL-NEXT: xxlxor 2, 2, 2 +; GLOBAL-NEXT: xsmulsp 0, 0, 3 +; GLOBAL-NEXT: fsel 2, 1, 2, 0 +; GLOBAL-NEXT: xsnegdp 1, 1 +; GLOBAL-NEXT: fsel 1, 1, 2, 0 ; GLOBAL-NEXT: blr %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x) ret float %rt diff --git a/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll new file mode 100644 index 0000000..0ee4524 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/lxvkq-vec-constant.ll @@ -0,0 +1,307 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-LE-10 + +; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64-unknown-unknown \ +; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=POWERPC64-BE-10 + +; Test LXVKQ instruction generation for special vector constants matching 128 bit patterns: +; 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern) +; 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern) + +; ============================================================================= +; v2i64 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-9223372036854775808, 0> +define dso_local noundef <2 x i64> @test_v2i64_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI0_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 -9223372036854775808, i64 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, -9223372036854775808> +define dso_local noundef <2 x i64> @test_v2i64_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI1_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI1_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 0, i64 -9223372036854775808> +} + +; ============================================================================= +; v4i32 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-2147483648, 0, 0, 0> +define dso_local noundef <4 x i32> @test_v4i32_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI2_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 -2147483648, i32 0, i32 0, i32 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, -2147483648> +define dso_local noundef <4 x i32> @test_v4i32_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI3_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 0, i32 0, i32 0, i32 -2147483648> +} + +; ============================================================================= +; v8i16 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-32768, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <8 x i16> @test_v8i16_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI4_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 -32768, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, -32768> +define dso_local noundef <8 x i16> @test_v8i16_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI5_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 -32768> +} + +; ============================================================================= +; v16i8 tests - MSB set pattern (0x8000_0000_0000_0000_0000_0000_0000_0000) +; ============================================================================= + +; Big-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <-128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <16 x i8> @test_v16i8_msb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI6_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: lxvkq v2, 16 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 -128, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> +} + +; Little-Endian: 0x8000_0000_0000_0000_0000_0000_0000_0000 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -128> +define dso_local noundef <16 x i8> @test_v16i8_msb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_msb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: lxvkq v2, 16 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_msb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI7_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI7_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -128> +} + +; ============================================================================= +; v2i64 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 1> +define dso_local noundef <2 x i64> @test_v2i64_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI8_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 0, i64 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0> +define dso_local noundef <2 x i64> @test_v2i64_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v2i64_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v2i64_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI9_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <2 x i64> <i64 1, i64 0> +} + +; ============================================================================= +; v4i32 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 1> +define dso_local noundef <4 x i32> @test_v4i32_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI10_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 0, i32 0, i32 0, i32 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0> +define dso_local noundef <4 x i32> @test_v4i32_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v4i32_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v4i32_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI11_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI11_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <4 x i32> <i32 1, i32 0, i32 0, i32 0> +} + +; ============================================================================= +; v8i16 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 1> +define dso_local noundef <8 x i16> @test_v8i16_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI12_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <8 x i16> @test_v8i16_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v8i16_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v8i16_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI13_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <8 x i16> <i16 1, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0> +} + +; ============================================================================= +; v16i8 tests - LSB set pattern (0x0000_0000_0000_0000_0000_0000_0000_0001) +; ============================================================================= + +; Big-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1> +define dso_local noundef <16 x i8> @test_v16i8_lsb_set_bigendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_bigendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: plxv v2, .LCPI14_0@PCREL(0), 1 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_bigendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: xxspltib v2, 255 +; POWERPC64-BE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1> +} + +; Little-Endian: 0x0000_0000_0000_0000_0000_0000_0000_0001 represents <1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0> +define dso_local noundef <16 x i8> @test_v16i8_lsb_set_littleendian() local_unnamed_addr { +; POWERPC64-LE-10-LABEL: test_v16i8_lsb_set_littleendian: +; POWERPC64-LE-10: # %bb.0: # %entry +; POWERPC64-LE-10-NEXT: xxspltib v2, 255 +; POWERPC64-LE-10-NEXT: vsrq v2, v2, v2 +; POWERPC64-LE-10-NEXT: blr +; +; POWERPC64-BE-10-LABEL: test_v16i8_lsb_set_littleendian: +; POWERPC64-BE-10: # %bb.0: # %entry +; POWERPC64-BE-10-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; POWERPC64-BE-10-NEXT: addi r3, r3, .LCPI15_0@toc@l +; POWERPC64-BE-10-NEXT: lxv v2, 0(r3) +; POWERPC64-BE-10-NEXT: blr +entry: + ret <16 x i8> <i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0> +}
\ No newline at end of file diff --git a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll index 0892210..d506d20 100644 --- a/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll +++ b/llvm/test/CodeGen/PowerPC/vector-reduce-add.ll @@ -1566,12 +1566,16 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-LABEL: v16i8tov16i64_sign: ; PWR10BE: # %bb.0: # %entry ; PWR10BE-NEXT: addis r3, r2, .LCPI23_0@toc@ha +; PWR10BE-NEXT: xxspltib v1, 255 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_0@toc@l +; PWR10BE-NEXT: vsrq v1, v1, v1 ; PWR10BE-NEXT: lxv v3, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_1@toc@ha ; PWR10BE-NEXT: addi r3, r3, .LCPI23_1@toc@l +; PWR10BE-NEXT: vperm v1, v2, v2, v1 ; PWR10BE-NEXT: lxv v4, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_2@toc@ha +; PWR10BE-NEXT: vextsb2d v1, v1 ; PWR10BE-NEXT: vperm v3, v2, v2, v3 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_2@toc@l ; PWR10BE-NEXT: vextsb2d v3, v3 @@ -1585,23 +1589,18 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-NEXT: vperm v5, v2, v2, v5 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_4@toc@l ; PWR10BE-NEXT: vextsb2d v5, v5 -; PWR10BE-NEXT: lxv v1, 0(r3) +; PWR10BE-NEXT: lxv v6, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_5@toc@ha ; PWR10BE-NEXT: vperm v0, v2, v2, v0 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_5@toc@l ; PWR10BE-NEXT: vextsb2d v0, v0 -; PWR10BE-NEXT: lxv v6, 0(r3) +; PWR10BE-NEXT: lxv v7, 0(r3) ; PWR10BE-NEXT: addis r3, r2, .LCPI23_6@toc@ha -; PWR10BE-NEXT: vperm v1, v2, v2, v1 +; PWR10BE-NEXT: vperm v6, v2, v2, v6 ; PWR10BE-NEXT: vaddudm v5, v0, v5 ; PWR10BE-NEXT: vaddudm v3, v4, v3 ; PWR10BE-NEXT: vaddudm v3, v3, v5 ; PWR10BE-NEXT: addi r3, r3, .LCPI23_6@toc@l -; PWR10BE-NEXT: vextsb2d v1, v1 -; PWR10BE-NEXT: lxv v7, 0(r3) -; PWR10BE-NEXT: addis r3, r2, .LCPI23_7@toc@ha -; PWR10BE-NEXT: vperm v6, v2, v2, v6 -; PWR10BE-NEXT: addi r3, r3, .LCPI23_7@toc@l ; PWR10BE-NEXT: vextsb2d v6, v6 ; PWR10BE-NEXT: lxv v8, 0(r3) ; PWR10BE-NEXT: vperm v7, v2, v2, v7 @@ -1609,7 +1608,7 @@ define dso_local i64 @v16i8tov16i64_sign(<16 x i8> %a) local_unnamed_addr #0 { ; PWR10BE-NEXT: vperm v2, v2, v2, v8 ; PWR10BE-NEXT: vextsb2d v2, v2 ; PWR10BE-NEXT: vaddudm v2, v2, v7 -; PWR10BE-NEXT: vaddudm v4, v6, v1 +; PWR10BE-NEXT: vaddudm v4, v1, v6 ; PWR10BE-NEXT: vaddudm v2, v4, v2 ; PWR10BE-NEXT: vaddudm v2, v2, v3 ; PWR10BE-NEXT: xxswapd v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll index 24a1724..ba7680b 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_or_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -34,12 +32,10 @@ define <2 x i64> @ternary_A_or_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -54,11 +50,9 @@ define <16 x i8> @ternary_A_or_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -73,11 +67,9 @@ define <8 x i16> @ternary_A_or_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_or_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 151 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -92,11 +84,9 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -112,12 +102,10 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -133,11 +121,9 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -153,11 +139,9 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -173,10 +157,9 @@ define <4 x i32> @ternary_A_not_C_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_C_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, vs0, v3, 99 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -191,12 +174,10 @@ define <2 x i64> @ternary_A_not_C_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_C_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxleqv vs1, v4, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation @@ -211,11 +192,9 @@ define <16 x i8> @ternary_A_not_C_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_C_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxleqv vs1, v4, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -230,11 +209,9 @@ define <8 x i16> @ternary_A_not_C_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_C_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxleqv vs1, v4, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 154 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -249,11 +226,9 @@ define <4 x i32> @ternary_A_nand_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -269,12 +244,10 @@ define <2 x i64> @ternary_A_nand_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -290,11 +263,9 @@ define <16 x i8> @ternary_A_nand_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -310,11 +281,9 @@ define <8 x i16> @ternary_A_nand_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_nand_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 158 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll index 7a6733d3..067b089 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nand.ll @@ -15,10 +15,9 @@ define <4 x i32> @ternary_A_B_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_B_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -32,11 +31,10 @@ define <2 x i64> @ternary_A_B_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_B_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -50,10 +48,9 @@ define <16 x i8> @ternary_A_B_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -67,10 +64,9 @@ define <8 x i16> @ternary_A_B_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_B_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 227 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -84,10 +80,9 @@ define <4 x i32> @ternary_A_C_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> ; CHECK-LABEL: ternary_A_C_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -101,11 +96,10 @@ define <2 x i64> @ternary_A_C_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> ; CHECK-LABEL: ternary_A_C_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -119,10 +113,9 @@ define <16 x i8> @ternary_A_C_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -136,10 +129,9 @@ define <8 x i16> @ternary_A_C_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> ; CHECK-LABEL: ternary_A_C_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 229 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -153,11 +145,9 @@ define <4 x i32> @ternary_A_xor_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -172,12 +162,10 @@ define <2 x i64> @ternary_A_xor_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -192,11 +180,9 @@ define <16 x i8> @ternary_A_xor_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -211,11 +197,9 @@ define <8 x i16> @ternary_A_xor_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_xor_BC_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 230 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -230,11 +214,9 @@ define <4 x i32> @ternary_A_or_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_or_BC_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -249,12 +231,10 @@ define <2 x i64> @ternary_A_or_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_or_BC_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -269,11 +249,9 @@ define <16 x i8> @ternary_A_or_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_or_BC_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -288,11 +266,9 @@ define <8 x i16> @ternary_A_or_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_or_BC_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 231 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -307,11 +283,9 @@ define <4 x i32> @ternary_A_eqv_BC_nand_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -327,12 +301,10 @@ define <2 x i64> @ternary_A_eqv_BC_nand_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -348,11 +320,9 @@ define <16 x i8> @ternary_A_eqv_BC_nand_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -368,11 +338,9 @@ define <8 x i16> @ternary_A_eqv_BC_nand_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_eqv_BC_nand_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, v3, v4 -; CHECK-NEXT: xxlnand vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 233 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll index d635952..3695874 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -34,12 +32,10 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -54,11 +50,9 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -73,11 +67,9 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -92,10 +84,9 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_B_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -109,11 +100,10 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -127,10 +117,9 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -144,10 +133,9 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -161,10 +149,9 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK-LABEL: ternary_A_C_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -178,11 +165,10 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -196,10 +182,9 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -213,10 +198,9 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -230,11 +214,9 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -249,12 +231,10 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -269,11 +249,9 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -288,11 +266,9 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -307,11 +283,9 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -326,12 +300,10 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation @@ -346,11 +318,9 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -365,11 +335,9 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -384,11 +352,9 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -403,12 +369,10 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation @@ -423,11 +387,9 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -442,11 +404,9 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -461,11 +421,9 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -481,12 +439,10 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -502,11 +458,9 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -522,11 +476,9 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll index 6203a96..a67d9cf 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-b.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_and_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_and_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_and_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -52,11 +48,9 @@ define <16 x i8> @ternary_A_and_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -70,11 +64,9 @@ define <8 x i16> @ternary_A_and_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_and_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 193 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -88,11 +80,9 @@ define <4 x i32> @ternary_A_xor_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_xor_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -106,12 +96,10 @@ define <2 x i64> @ternary_A_xor_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_xor_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -125,11 +113,9 @@ define <16 x i8> @ternary_A_xor_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -143,11 +129,9 @@ define <8 x i16> @ternary_A_xor_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_xor_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 198 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -161,11 +145,9 @@ define <4 x i32> @ternary_A_or_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32 ; CHECK-LABEL: ternary_A_or_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -179,12 +161,10 @@ define <2 x i64> @ternary_A_or_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64 ; CHECK-LABEL: ternary_A_or_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -198,11 +178,9 @@ define <16 x i8> @ternary_A_or_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i ; CHECK-LABEL: ternary_A_or_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -216,11 +194,9 @@ define <8 x i16> @ternary_A_or_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16 ; CHECK-LABEL: ternary_A_or_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 199 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -234,11 +210,9 @@ define <4 x i32> @ternary_A_nand_BC_not_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nand_BC_not_B_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -253,12 +227,10 @@ define <2 x i64> @ternary_A_nand_BC_not_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nand_BC_not_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -273,11 +245,9 @@ define <16 x i8> @ternary_A_nand_BC_not_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nand_BC_not_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -292,11 +262,9 @@ define <8 x i16> @ternary_A_nand_BC_not_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nand_BC_not_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 206 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll index 3479d94..98c1f28 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-not-c.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_and_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -33,12 +31,10 @@ define <2 x i64> @ternary_A_and_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_and_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -52,11 +48,9 @@ define <16 x i8> @ternary_A_and_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -70,11 +64,9 @@ define <8 x i16> @ternary_A_and_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_and_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 161 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -88,10 +80,9 @@ define <4 x i32> @ternary_A_B_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> %C ; CHECK-LABEL: ternary_A_B_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -104,11 +95,10 @@ define <2 x i64> @ternary_A_B_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> %C ; CHECK-LABEL: ternary_A_B_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, <i64 -1, i64 -1> ; Vector not operation @@ -121,10 +111,9 @@ define <16 x i8> @ternary_A_B_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> % ; CHECK-LABEL: ternary_A_B_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -137,10 +126,9 @@ define <8 x i16> @ternary_A_B_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> %C ; CHECK-LABEL: ternary_A_B_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 163 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -153,11 +141,9 @@ define <4 x i32> @ternary_A_xor_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_xor_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -171,12 +157,10 @@ define <2 x i64> @ternary_A_xor_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_xor_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -190,11 +174,9 @@ define <16 x i8> @ternary_A_xor_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -208,11 +190,9 @@ define <8 x i16> @ternary_A_xor_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_xor_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 166 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -226,11 +206,9 @@ define <4 x i32> @ternary_A_or_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32 ; CHECK-LABEL: ternary_A_or_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -244,12 +222,10 @@ define <2 x i64> @ternary_A_or_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64 ; CHECK-LABEL: ternary_A_or_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -263,11 +239,9 @@ define <16 x i8> @ternary_A_or_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i ; CHECK-LABEL: ternary_A_or_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -281,11 +255,9 @@ define <8 x i16> @ternary_A_or_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16 ; CHECK-LABEL: ternary_A_or_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlor vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 167 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -299,11 +271,9 @@ define <4 x i32> @ternary_A_not_B_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32 ; CHECK-LABEL: ternary_A_not_B_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <4 x i32> %B, <i32 -1, i32 -1, i32 -1, i32 -1> ; Vector not operation @@ -317,12 +287,10 @@ define <2 x i64> @ternary_A_not_B_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64 ; CHECK-LABEL: ternary_A_not_B_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <2 x i64> %B, <i64 -1, i64 -1> ; Vector not operation @@ -336,11 +304,9 @@ define <16 x i8> @ternary_A_not_B_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i ; CHECK-LABEL: ternary_A_not_B_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <16 x i8> %B, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> ; Vector not operation @@ -354,11 +320,9 @@ define <8 x i16> @ternary_A_not_B_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16 ; CHECK-LABEL: ternary_A_not_B_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 172 ; CHECK-NEXT: blr entry: %not_b = xor <8 x i16> %B, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> ; Vector not operation @@ -372,11 +336,9 @@ define <4 x i32> @ternary_A_nand_BC_not_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nand_BC_not_C_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -391,12 +353,10 @@ define <2 x i64> @ternary_A_nand_BC_not_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nand_BC_not_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -411,11 +371,9 @@ define <16 x i8> @ternary_A_nand_BC_not_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nand_BC_not_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -430,11 +388,9 @@ define <8 x i16> @ternary_A_nand_BC_not_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nand_BC_not_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxlnor vs1, v4, v4 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 174 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/RISCV/min-max.ll b/llvm/test/CodeGen/RISCV/min-max.ll index acde8ad..e7f6899 100644 --- a/llvm/test/CodeGen/RISCV/min-max.ll +++ b/llvm/test/CodeGen/RISCV/min-max.ll @@ -5,6 +5,12 @@ ; RUN: FileCheck %s --check-prefixes=ZBB,RV32ZBB ; RUN: llc < %s -mtriple=riscv64 -mattr=+zbb | \ ; RUN: FileCheck %s --check-prefixes=ZBB,RV64ZBB +; RUN: llc -mtriple=riscv32 -mattr=+experimental-xqcicm,+experimental-xqcics,+experimental-xqcicli,+zca,+short-forward-branch-opt,+conditional-cmv-fusion -verify-machineinstrs < %s | \ +; RUN: FileCheck %s --check-prefixes=XQCI +; RUN: llc < %s -mtriple=riscv32 -mattr=+short-forward-branch-opt | \ +; RUN: FileCheck %s --check-prefixes=RV32I-SFB +; RUN: llc < %s -mtriple=riscv64 -mattr=+short-forward-branch-opt | \ +; RUN: FileCheck %s --check-prefixes=RV64I-SFB ; Basic tests. @@ -23,6 +29,27 @@ define signext i8 @smax_i8(i8 signext %a, i8 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: max a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smax_i8: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvge a0, a1, a0, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_i8: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: blt a1, a0, .LBB0_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB0_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_i8: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a1, a0, .LBB0_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB0_2: +; RV64I-SFB-NEXT: ret %c = call i8 @llvm.smax.i8(i8 %a, i8 %b) ret i8 %c } @@ -42,6 +69,27 @@ define signext i16 @smax_i16(i16 signext %a, i16 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: max a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smax_i16: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvge a0, a1, a0, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_i16: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: blt a1, a0, .LBB1_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB1_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_i16: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a1, a0, .LBB1_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB1_2: +; RV64I-SFB-NEXT: ret %c = call i16 @llvm.smax.i16(i16 %a, i16 %b) ret i16 %c } @@ -61,6 +109,27 @@ define signext i32 @smax_i32(i32 signext %a, i32 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: max a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smax_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvge a0, a1, a0, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: blt a1, a0, .LBB2_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB2_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a1, a0, .LBB2_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB2_2: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smax.i32(i32 %a, i32 %b) ret i32 %c } @@ -112,6 +181,41 @@ define i64 @smax_i64(i64 %a, i64 %b) { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: max a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: smax_i64: +; XQCI: # %bb.0: +; XQCI-NEXT: sltu a4, a2, a0 +; XQCI-NEXT: slt a5, a3, a1 +; XQCI-NEXT: qc.mveq a5, a1, a3, a4 +; XQCI-NEXT: qc.mveqi a0, a5, 0, a2 +; XQCI-NEXT: qc.mveqi a1, a5, 0, a3 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_i64: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: sltu a4, a2, a0 +; RV32I-SFB-NEXT: slt a5, a3, a1 +; RV32I-SFB-NEXT: bne a1, a3, .LBB3_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a5, a4 +; RV32I-SFB-NEXT: .LBB3_2: +; RV32I-SFB-NEXT: bnez a5, .LBB3_4 +; RV32I-SFB-NEXT: # %bb.3: +; RV32I-SFB-NEXT: mv a0, a2 +; RV32I-SFB-NEXT: .LBB3_4: +; RV32I-SFB-NEXT: bnez a5, .LBB3_6 +; RV32I-SFB-NEXT: # %bb.5: +; RV32I-SFB-NEXT: mv a1, a3 +; RV32I-SFB-NEXT: .LBB3_6: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_i64: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a1, a0, .LBB3_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB3_2: +; RV64I-SFB-NEXT: ret %c = call i64 @llvm.smax.i64(i64 %a, i64 %b) ret i64 %c } @@ -131,6 +235,27 @@ define signext i8 @smin_i8(i8 signext %a, i8 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: min a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smin_i8: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvge a0, a0, a1, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_i8: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: blt a0, a1, .LBB4_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB4_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_i8: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a0, a1, .LBB4_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB4_2: +; RV64I-SFB-NEXT: ret %c = call i8 @llvm.smin.i8(i8 %a, i8 %b) ret i8 %c } @@ -150,6 +275,27 @@ define signext i16 @smin_i16(i16 signext %a, i16 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: min a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smin_i16: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvge a0, a0, a1, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_i16: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: blt a0, a1, .LBB5_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB5_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_i16: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a0, a1, .LBB5_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB5_2: +; RV64I-SFB-NEXT: ret %c = call i16 @llvm.smin.i16(i16 %a, i16 %b) ret i16 %c } @@ -169,6 +315,27 @@ define signext i32 @smin_i32(i32 signext %a, i32 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: min a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smin_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvge a0, a0, a1, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: blt a0, a1, .LBB6_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB6_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a0, a1, .LBB6_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB6_2: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smin.i32(i32 %a, i32 %b) ret i32 %c } @@ -220,6 +387,41 @@ define i64 @smin_i64(i64 %a, i64 %b) { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: min a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: smin_i64: +; XQCI: # %bb.0: +; XQCI-NEXT: sltu a4, a0, a2 +; XQCI-NEXT: slt a5, a1, a3 +; XQCI-NEXT: qc.mveq a5, a1, a3, a4 +; XQCI-NEXT: qc.mveqi a0, a5, 0, a2 +; XQCI-NEXT: qc.mveqi a1, a5, 0, a3 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_i64: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: sltu a4, a0, a2 +; RV32I-SFB-NEXT: slt a5, a1, a3 +; RV32I-SFB-NEXT: bne a1, a3, .LBB7_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a5, a4 +; RV32I-SFB-NEXT: .LBB7_2: +; RV32I-SFB-NEXT: bnez a5, .LBB7_4 +; RV32I-SFB-NEXT: # %bb.3: +; RV32I-SFB-NEXT: mv a0, a2 +; RV32I-SFB-NEXT: .LBB7_4: +; RV32I-SFB-NEXT: bnez a5, .LBB7_6 +; RV32I-SFB-NEXT: # %bb.5: +; RV32I-SFB-NEXT: mv a1, a3 +; RV32I-SFB-NEXT: .LBB7_6: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_i64: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: blt a0, a1, .LBB7_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB7_2: +; RV64I-SFB-NEXT: ret %c = call i64 @llvm.smin.i64(i64 %a, i64 %b) ret i64 %c } @@ -239,6 +441,27 @@ define i8 @umax_i8(i8 zeroext %a, i8 zeroext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: maxu a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: umax_i8: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvgeu a0, a1, a0, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umax_i8: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: bltu a1, a0, .LBB8_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB8_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umax_i8: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a1, a0, .LBB8_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB8_2: +; RV64I-SFB-NEXT: ret %c = call i8 @llvm.umax.i8(i8 %a, i8 %b) ret i8 %c } @@ -258,6 +481,27 @@ define i16 @umax_i16(i16 zeroext %a, i16 zeroext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: maxu a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: umax_i16: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvgeu a0, a1, a0, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umax_i16: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: bltu a1, a0, .LBB9_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB9_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umax_i16: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a1, a0, .LBB9_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB9_2: +; RV64I-SFB-NEXT: ret %c = call i16 @llvm.umax.i16(i16 %a, i16 %b) ret i16 %c } @@ -277,6 +521,27 @@ define signext i32 @umax_i32(i32 signext %a, i32 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: maxu a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: umax_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvgeu a0, a1, a0, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umax_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: bltu a1, a0, .LBB10_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB10_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umax_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a1, a0, .LBB10_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB10_2: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.umax.i32(i32 %a, i32 %b) ret i32 %c } @@ -328,6 +593,41 @@ define i64 @umax_i64(i64 %a, i64 %b) { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: maxu a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: umax_i64: +; XQCI: # %bb.0: +; XQCI-NEXT: sltu a4, a2, a0 +; XQCI-NEXT: sltu a5, a3, a1 +; XQCI-NEXT: qc.mveq a5, a1, a3, a4 +; XQCI-NEXT: qc.mveqi a0, a5, 0, a2 +; XQCI-NEXT: qc.mveqi a1, a5, 0, a3 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umax_i64: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: sltu a4, a2, a0 +; RV32I-SFB-NEXT: sltu a5, a3, a1 +; RV32I-SFB-NEXT: bne a1, a3, .LBB11_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a5, a4 +; RV32I-SFB-NEXT: .LBB11_2: +; RV32I-SFB-NEXT: bnez a5, .LBB11_4 +; RV32I-SFB-NEXT: # %bb.3: +; RV32I-SFB-NEXT: mv a0, a2 +; RV32I-SFB-NEXT: .LBB11_4: +; RV32I-SFB-NEXT: bnez a5, .LBB11_6 +; RV32I-SFB-NEXT: # %bb.5: +; RV32I-SFB-NEXT: mv a1, a3 +; RV32I-SFB-NEXT: .LBB11_6: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umax_i64: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a1, a0, .LBB11_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB11_2: +; RV64I-SFB-NEXT: ret %c = call i64 @llvm.umax.i64(i64 %a, i64 %b) ret i64 %c } @@ -347,6 +647,27 @@ define zeroext i8 @umin_i8(i8 zeroext %a, i8 zeroext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: minu a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: umin_i8: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvgeu a0, a0, a1, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umin_i8: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: bltu a0, a1, .LBB12_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB12_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umin_i8: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a0, a1, .LBB12_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB12_2: +; RV64I-SFB-NEXT: ret %c = call i8 @llvm.umin.i8(i8 %a, i8 %b) ret i8 %c } @@ -366,6 +687,27 @@ define zeroext i16 @umin_i16(i16 zeroext %a, i16 zeroext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: minu a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: umin_i16: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvgeu a0, a0, a1, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umin_i16: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: bltu a0, a1, .LBB13_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB13_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umin_i16: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a0, a1, .LBB13_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB13_2: +; RV64I-SFB-NEXT: ret %c = call i16 @llvm.umin.i16(i16 %a, i16 %b) ret i16 %c } @@ -385,6 +727,27 @@ define signext i32 @umin_i32(i32 signext %a, i32 signext %b) { ; ZBB: # %bb.0: ; ZBB-NEXT: minu a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: umin_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.mvgeu a0, a0, a1, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umin_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: bltu a0, a1, .LBB14_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB14_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umin_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a0, a1, .LBB14_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB14_2: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.umin.i32(i32 %a, i32 %b) ret i32 %c } @@ -436,6 +799,41 @@ define i64 @umin_i64(i64 %a, i64 %b) { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: minu a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: umin_i64: +; XQCI: # %bb.0: +; XQCI-NEXT: sltu a4, a0, a2 +; XQCI-NEXT: sltu a5, a1, a3 +; XQCI-NEXT: qc.mveq a5, a1, a3, a4 +; XQCI-NEXT: qc.mveqi a0, a5, 0, a2 +; XQCI-NEXT: qc.mveqi a1, a5, 0, a3 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umin_i64: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: sltu a4, a0, a2 +; RV32I-SFB-NEXT: sltu a5, a1, a3 +; RV32I-SFB-NEXT: bne a1, a3, .LBB15_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a5, a4 +; RV32I-SFB-NEXT: .LBB15_2: +; RV32I-SFB-NEXT: bnez a5, .LBB15_4 +; RV32I-SFB-NEXT: # %bb.3: +; RV32I-SFB-NEXT: mv a0, a2 +; RV32I-SFB-NEXT: .LBB15_4: +; RV32I-SFB-NEXT: bnez a5, .LBB15_6 +; RV32I-SFB-NEXT: # %bb.5: +; RV32I-SFB-NEXT: mv a1, a3 +; RV32I-SFB-NEXT: .LBB15_6: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umin_i64: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: bltu a0, a1, .LBB15_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB15_2: +; RV64I-SFB-NEXT: ret %c = call i64 @llvm.umin.i64(i64 %a, i64 %b) ret i64 %c } @@ -450,6 +848,18 @@ define signext i32 @smin_same_op_i32(i32 signext %a) { ; ZBB-LABEL: smin_same_op_i32: ; ZBB: # %bb.0: ; ZBB-NEXT: ret +; +; XQCI-LABEL: smin_same_op_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_same_op_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_same_op_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smin.i32(i32 %a, i32 %a) ret i32 %c } @@ -462,6 +872,18 @@ define signext i32 @smax_same_op_i32(i32 signext %a) { ; ZBB-LABEL: smax_same_op_i32: ; ZBB: # %bb.0: ; ZBB-NEXT: ret +; +; XQCI-LABEL: smax_same_op_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_same_op_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_same_op_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smax.i32(i32 %a, i32 %a) ret i32 %c } @@ -474,6 +896,18 @@ define signext i32 @umin_same_op_i32(i32 signext %a) { ; ZBB-LABEL: umin_same_op_i32: ; ZBB: # %bb.0: ; ZBB-NEXT: ret +; +; XQCI-LABEL: umin_same_op_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umin_same_op_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umin_same_op_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.umin.i32(i32 %a, i32 %a) ret i32 %c } @@ -486,6 +920,18 @@ define signext i32 @umax_same_op_i32(i32 signext %a) { ; ZBB-LABEL: umax_same_op_i32: ; ZBB: # %bb.0: ; ZBB-NEXT: ret +; +; XQCI-LABEL: umax_same_op_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umax_same_op_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umax_same_op_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.umax.i32(i32 %a, i32 %a) ret i32 %c } @@ -510,6 +956,19 @@ define signext i32 @smin_undef_i32() { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: li a0, 0 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: smin_undef_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_undef_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_undef_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a0, 0 +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smin.i32(i32 undef, i32 undef) ret i32 %c } @@ -532,6 +991,19 @@ define signext i32 @smax_undef_i32() { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: li a0, 0 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: smax_undef_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_undef_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_undef_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a0, 0 +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smax.i32(i32 undef, i32 undef) ret i32 %c } @@ -554,6 +1026,19 @@ define signext i32 @umin_undef_i32() { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: li a0, 0 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: umin_undef_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umin_undef_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umin_undef_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a0, 0 +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.umin.i32(i32 undef, i32 undef) ret i32 %c } @@ -576,6 +1061,19 @@ define signext i32 @umax_undef_i32() { ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: li a0, 0 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: umax_undef_i32: +; XQCI: # %bb.0: +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umax_undef_i32: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umax_undef_i32: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a0, 0 +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.umax.i32(i32 undef, i32 undef) ret i32 %c } @@ -595,6 +1093,29 @@ define signext i32 @smax_i32_pos_constant(i32 signext %a) { ; ZBB-NEXT: li a1, 10 ; ZBB-NEXT: max a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smax_i32_pos_constant: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.lilti a0, a0, 11, 10 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_i32_pos_constant: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: li a1, 10 +; RV32I-SFB-NEXT: blt a1, a0, .LBB24_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB24_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_i32_pos_constant: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a1, 10 +; RV64I-SFB-NEXT: blt a1, a0, .LBB24_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB24_2: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smax.i32(i32 %a, i32 10) ret i32 %c } @@ -616,6 +1137,33 @@ define signext i32 @smax_i32_pos_constant_trailing_zeros(i32 signext %a) { ; ZBB-NEXT: li a1, 16 ; ZBB-NEXT: max a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smax_i32_pos_constant_trailing_zeros: +; XQCI: # %bb.0: +; XQCI-NEXT: andi a1, a0, -8 +; XQCI-NEXT: li a0, 16 +; XQCI-NEXT: qc.mvlt a0, a0, a1, a1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smax_i32_pos_constant_trailing_zeros: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: andi a1, a0, -8 +; RV32I-SFB-NEXT: li a0, 16 +; RV32I-SFB-NEXT: bge a0, a1, .LBB25_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB25_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smax_i32_pos_constant_trailing_zeros: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: andi a1, a0, -8 +; RV64I-SFB-NEXT: li a0, 16 +; RV64I-SFB-NEXT: bge a0, a1, .LBB25_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB25_2: +; RV64I-SFB-NEXT: ret %b = and i32 %a, -8 %c = call i32 @llvm.smax.i32(i32 %b, i32 16) %d = and i32 %c, -4 @@ -635,6 +1183,29 @@ define signext i32 @smin_i32_negone(i32 signext %a) { ; ZBB-NEXT: li a1, -1 ; ZBB-NEXT: min a0, a0, a1 ; ZBB-NEXT: ret +; +; XQCI-LABEL: smin_i32_negone: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.ligei a0, a0, 0, -1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_i32_negone: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: li a1, -1 +; RV32I-SFB-NEXT: bltz a0, .LBB26_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a1 +; RV32I-SFB-NEXT: .LBB26_2: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_i32_negone: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a1, -1 +; RV64I-SFB-NEXT: bltz a0, .LBB26_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB26_2: +; RV64I-SFB-NEXT: ret %c = call i32 @llvm.smin.i32(i32 %a, i32 -1) ret i32 %c } @@ -672,6 +1243,34 @@ define i64 @smin_i64_negone(i64 %a) { ; RV64ZBB-NEXT: li a1, -1 ; RV64ZBB-NEXT: min a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: smin_i64_negone: +; XQCI: # %bb.0: +; XQCI-NEXT: qc.ligei a0, a1, 0, -1 +; XQCI-NEXT: qc.ligei a1, a1, 0, -1 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: smin_i64_negone: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: li a2, -1 +; RV32I-SFB-NEXT: bltz a1, .LBB27_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a0, a2 +; RV32I-SFB-NEXT: .LBB27_2: +; RV32I-SFB-NEXT: bltz a1, .LBB27_4 +; RV32I-SFB-NEXT: # %bb.3: +; RV32I-SFB-NEXT: mv a1, a2 +; RV32I-SFB-NEXT: .LBB27_4: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: smin_i64_negone: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a1, -1 +; RV64I-SFB-NEXT: bltz a0, .LBB27_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB27_2: +; RV64I-SFB-NEXT: ret %c = call i64 @llvm.smin.i64(i64 %a, i64 -1) ret i64 %c } @@ -720,6 +1319,41 @@ define i64 @umax_i64_one(i64 %a, i64 %b) { ; RV64ZBB-NEXT: li a1, 1 ; RV64ZBB-NEXT: maxu a0, a0, a1 ; RV64ZBB-NEXT: ret +; +; XQCI-LABEL: umax_i64_one: +; XQCI: # %bb.0: +; XQCI-NEXT: mv a2, a1 +; XQCI-NEXT: qc.selectinei a2, 0, a0, 1 +; XQCI-NEXT: qc.liltui a0, a0, 2, 1 +; XQCI-NEXT: qc.mvnei a0, a1, 0, a2 +; XQCI-NEXT: ret +; +; RV32I-SFB-LABEL: umax_i64_one: +; RV32I-SFB: # %bb.0: +; RV32I-SFB-NEXT: li a2, 1 +; RV32I-SFB-NEXT: li a3, 1 +; RV32I-SFB-NEXT: beqz a1, .LBB28_2 +; RV32I-SFB-NEXT: # %bb.1: +; RV32I-SFB-NEXT: mv a3, a0 +; RV32I-SFB-NEXT: .LBB28_2: +; RV32I-SFB-NEXT: bnez a0, .LBB28_4 +; RV32I-SFB-NEXT: # %bb.3: +; RV32I-SFB-NEXT: mv a0, a2 +; RV32I-SFB-NEXT: .LBB28_4: +; RV32I-SFB-NEXT: beqz a1, .LBB28_6 +; RV32I-SFB-NEXT: # %bb.5: +; RV32I-SFB-NEXT: mv a0, a3 +; RV32I-SFB-NEXT: .LBB28_6: +; RV32I-SFB-NEXT: ret +; +; RV64I-SFB-LABEL: umax_i64_one: +; RV64I-SFB: # %bb.0: +; RV64I-SFB-NEXT: li a1, 1 +; RV64I-SFB-NEXT: bnez a0, .LBB28_2 +; RV64I-SFB-NEXT: # %bb.1: +; RV64I-SFB-NEXT: mv a0, a1 +; RV64I-SFB-NEXT: .LBB28_2: +; RV64I-SFB-NEXT: ret %c = call i64 @llvm.umax.i64(i64 %a, i64 1) ret i64 %c } diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll new file mode 100644 index 0000000..c19e93d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -O1 -mtriple=riscv64 -mattr=+v < %s | FileCheck %s + +define i32 @pr134424(i64 %input_value, i32 %base_value, i1 %cond_flag1, i1 %cond_flag2, i1 %cond_flag3) { +; CHECK-LABEL: pr134424: +; CHECK: # %bb.0: # %for.body.us.preheader.i +; CHECK-NEXT: andi a3, a3, 1 +; CHECK-NEXT: andi a5, a2, 1 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmv.v.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 14 +; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: bnez a5, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %for.body.us.preheader.i +; CHECK-NEXT: li a2, 1 +; CHECK-NEXT: .LBB0_2: # %for.body.us.preheader.i +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: andi a4, a4, 1 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: bnez a3, .LBB0_4 +; CHECK-NEXT: # %bb.3: # %for.body.us.preheader.i +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: .LBB0_4: # %for.body.us.preheader.i +; CHECK-NEXT: vmsle.vi v0, v8, 0 +; CHECK-NEXT: sext.w a2, a2 +; CHECK-NEXT: bnez a4, .LBB0_6 +; CHECK-NEXT: # %bb.5: # %for.body.us.preheader.i +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: .LBB0_6: # %for.body.us.preheader.i +; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vredmin.vs v8, v8, v8 +; CHECK-NEXT: vmv.x.s a3, v8 +; CHECK-NEXT: sext.w a1, a1 +; CHECK-NEXT: bge a3, a2, .LBB0_11 +; CHECK-NEXT: # %bb.7: # %for.body.us.preheader.i +; CHECK-NEXT: bge a0, a1, .LBB0_12 +; CHECK-NEXT: .LBB0_8: # %for.body.us.preheader.i +; CHECK-NEXT: blt a3, a0, .LBB0_10 +; CHECK-NEXT: .LBB0_9: # %for.body.us.preheader.i +; CHECK-NEXT: mv a3, a0 +; CHECK-NEXT: .LBB0_10: # %for.body.us.preheader.i +; CHECK-NEXT: sw a3, 0(zero) +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_11: # %for.body.us.preheader.i +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: blt a0, a1, .LBB0_8 +; CHECK-NEXT: .LBB0_12: # %for.body.us.preheader.i +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: bge a3, a0, .LBB0_9 +; CHECK-NEXT: j .LBB0_10 +for.body.us.preheader.i: + %partial_vector = insertelement <4 x i64> zeroinitializer, i64 %input_value, i64 1 + %comparison_vector = shufflevector <4 x i64> %partial_vector, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 1, i32 1> + %comparison_result = icmp sle <4 x i64> %comparison_vector, zeroinitializer + %selected_value1 = select i1 %cond_flag1, i32 %base_value, i32 1 + %selected_value2 = select i1 %cond_flag2, i32 %base_value, i32 1 + %selected_value3 = select i1 %cond_flag3, i32 %base_value, i32 1 + %bool_to_int = zext <4 x i1> %comparison_result to <4 x i32> + %extended_vector = shufflevector <4 x i32> %bool_to_int, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison> + %vector_min = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %extended_vector) + %min1 = call i32 @llvm.smin.i32(i32 %vector_min, i32 %selected_value1) + %min2 = call i32 @llvm.smin.i32(i32 %selected_value2, i32 %selected_value3) + %final_min = call i32 @llvm.smin.i32(i32 %min1, i32 %min2) + store i32 %final_min, ptr null, align 4 + ret i32 0 +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir new file mode 100644 index 0000000..aeab8f6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/regcoal-liveinterval-pruning-crash.mir @@ -0,0 +1,57 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=register-coalescer -o - %s | FileCheck %s + +--- +name: pr71023 +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: pr71023 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $x10, $v8, $v10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead [[DEF:%[0-9]+]]:gpr = IMPLICIT_DEF + ; CHECK-NEXT: undef [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_2:vrn8m1 = PseudoVMV_V_I_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]].sub_vrm1_6:vrn8m1 = COPY undef [[PseudoVMV_V_I_M1_]].sub_vrm1_2 + ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3 + ; CHECK-NEXT: PseudoBR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BNE undef [[DEF]], $x0, %bb.3 + ; CHECK-NEXT: PseudoBR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: dead [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: early-clobber [[PseudoVMV_V_I_M1_]].sub_vrm1_0:vrn8m1 = PseudoVRGATHER_VI_M1 undef [[PseudoVMV_V_I_M1_]].sub_vrm1_0, [[PseudoVMV_V_I_M1_]].sub_vrm1_2, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSSEG6E8_V_M1_MASK [[PseudoVMV_V_I_M1_]].sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5, undef [[DEF]], killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1) + ; CHECK-NEXT: PseudoRET + bb.0: + successors: %bb.3(0x40000000), %bb.1(0x40000000) + liveins: $x10, $v8, $v10 + %0:gpr = IMPLICIT_DEF + %1:vrnov0 = PseudoVMV_V_I_M1 undef %1, 0, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + %2:vrnov0 = IMPLICIT_DEF + undef %3.sub_vrm1_0:vrn6m1nov0 = COPY undef %1 + %3.sub_vrm1_3:vrn6m1nov0 = COPY %2 + %3.sub_vrm1_4:vrn6m1nov0 = COPY undef %1 + BNE undef %0, $x0, %bb.3 + PseudoBR %bb.1 + bb.1: + successors: %bb.3(0x40000000), %bb.2(0x40000000) + BNE killed undef %0, $x0, %bb.3 + PseudoBR %bb.2 + bb.2: + successors: %bb.3(0x80000000) + bb.3: + %4:vr = IMPLICIT_DEF + early-clobber %4:vr = PseudoVRGATHER_VI_M1 undef %4, killed %1, 0, 0, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + undef %5.sub_vrm1_0:vrn6m1 = COPY killed %4 + %5.sub_vrm1_5:vrn6m1 = COPY killed %2 + PseudoVSSEG6E8_V_M1_MASK killed %5, undef %0, killed undef $v0, 0, 3 /* e8 */, implicit $vl, implicit $vtype :: (store unknown-size, align 1) + PseudoRET +... diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll new file mode 100644 index 0000000..d9a49a1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive-O0-ATM-ATK.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -mattr=+v -O0 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-RV64 + +define void @matmul() { +; CHECK-RV64-LABEL: matmul: +; CHECK-RV64: # %bb.0: # %entry +; CHECK-RV64-NEXT: li a0, 0 +; CHECK-RV64-NEXT: vsetvli zero, a0, 512 +; CHECK-RV64-NEXT: sf.vsettm zero, a0 +; CHECK-RV64-NEXT: sf.vtzero.t mt0 +; CHECK-RV64-NEXT: ret +entry: + call void @llvm.riscv.sf.vtzero.t.i64(i64 0, i64 0, i64 0, i64 3, i64 1) + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare void @llvm.riscv.sf.vtzero.t.i64(i64 immarg, i64, i64, i64 immarg, i64 immarg) #0 diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll new file mode 100644 index 0000000..9b9a849 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e4m3.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e4m3_e4m3_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e4m3.e4m3 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e4m3.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll new file mode 100644 index 0000000..b63974f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e4m3_e5m2.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e4m3.e5m2 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e4m3.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll new file mode 100644 index 0000000..62d629b1 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e4m3.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e5m2_e5m2_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e5m2.e4m3 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e5m2.e4m3.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll new file mode 100644 index 0000000..7a90c97 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_e5m2_e5m2.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_e4m3_e5m2_w4_u8m8_u8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.e5m2.e5m2 mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.e5m2.e5m2.iXLen.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll new file mode 100644 index 0000000..29451c6 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_f_f.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+xsfmm32a32f -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen, <vscale x 32 x half>, <vscale x 32 x half>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_f_f_w2_f16m8(iXLen %mtd, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_f_f_w2_f16m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e16, w2 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.f.f mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.f.f.iXLen.nxv32f16(iXLen 0, <vscale x 32 x half> %v1, <vscale x 32 x half> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 2) + ret void +} + +declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen, <vscale x 16 x float>, <vscale x 16 x float>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_f_f_w1_f32m8(iXLen %mtd, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_f_f_w1_f32m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.f.f mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.f.f.iXLen.nxv16f32(iXLen 0, <vscale x 16 x float> %v1, <vscale x 16 x float> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1) + ret void +} + +declare void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen, <vscale x 8 x double>, <vscale x 8 x double>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_f_f_w1_f64m8(iXLen %mtd, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_f_f_w1_f64m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.f.f mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.f.f.iXLen.nxv8f64(iXLen 0, <vscale x 8 x double> %v1, <vscale x 8 x double> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 1) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll new file mode 100644 index 0000000..6a4b29f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_s.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_s_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_s_s_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.s.s mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.s.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll new file mode 100644 index 0000000..79239b0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_s_u.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_s_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_s_u_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.s.u mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.s.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll new file mode 100644 index 0000000..b0d039b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_s.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_u_s_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_u_s_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.u.s mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.u.s.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll new file mode 100644 index 0000000..913c277 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_mm_u_u.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+xsfmm32a8i \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen, <vscale x 64 x i8>, <vscale x 64 x i8>, iXLen, iXLen, iXLen, iXLen) + +define void @test_sf_mm_u_u_w4_i8m8_i8m8(iXLen %mtd, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk) { +; CHECK-LABEL: test_sf_mm_u_u_w4_i8m8_i8m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a1 +; CHECK-NEXT: sf.vsettk zero, a3 +; CHECK-NEXT: sf.mm.u.u mt0, v8, v16 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.mm.u.u.iXLen.nxv64i8.nxv64i8(iXLen 0, <vscale x 64 x i8> %v1, <vscale x 64 x i8> %v2, iXLen %tm, iXLen %tn, iXLen %tk, iXLen 4) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll new file mode 100644 index 0000000..8048dec --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte16.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte16.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte16(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e16, w1 +; CHECK-NEXT: sf.vlte16 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte16.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll new file mode 100644 index 0000000..a526dc8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte32.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte32.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte32(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1 +; CHECK-NEXT: sf.vlte32 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte32.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll new file mode 100644 index 0000000..ed0c48a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte64.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte64.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte64(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1 +; CHECK-NEXT: sf.vlte64 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte64.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll new file mode 100644 index 0000000..67b3ed2 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vlte8.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vlte8.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vlte8(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vlte8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w1 +; CHECK-NEXT: sf.vlte8 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vlte8.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll new file mode 100644 index 0000000..4da37fa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettk.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen, iXLen, iXLen) + +define iXLen @test_sf_vsettk(iXLen %tk) { +; CHECK-LABEL: test_sf_vsettk: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a1, zero, e16, w2 +; CHECK-NEXT: sf.vsettk a0, a0 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettk.iXLen(iXLen %tk, iXLen 1, iXLen 2) + ret iXLen %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll new file mode 100644 index 0000000..143c26c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettm.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen, iXLen, iXLen) + +define iXLen @test_sf_vsettm(iXLen %tm) { +; CHECK-LABEL: test_sf_vsettm: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a1, zero, e8, w4 +; CHECK-NEXT: sf.vsettm a0, a0 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettm.iXLen(iXLen %tm, iXLen 0, iXLen 3) + ret iXLen %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll new file mode 100644 index 0000000..48fa1bc8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vsettnt.ll @@ -0,0 +1,72 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen, iXLen, iXLen) + +define iXLen @test_sf_vsettnt_e8w1(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e8w1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e8, w1 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 1) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e8w2(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e8w2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e8, w2 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 2) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e8w4(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e8w4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e8, w4 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 0, iXLen 3) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e16w1(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e16w1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e16, w1 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 1) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e16w2(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e16w2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e16, w2 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 2) + ret iXLen %0 +} + +define iXLen @test_sf_vsettnt_e16w4(iXLen %tn) { +; CHECK-LABEL: test_sf_vsettnt_e16w4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt a0, a0, e16, w4 +; CHECK-NEXT: ret + entry: + %0 = call iXLen @llvm.riscv.sf.vsettnt.iXLen(iXLen %tn, iXLen 1, iXLen 3) + ret iXLen %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll new file mode 100644 index 0000000..7a76151 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste16.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste16.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste16(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e16, w1 +; CHECK-NEXT: sf.vste16 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste16.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll new file mode 100644 index 0000000..8ff6e6a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste32.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste32.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste32(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e32, w1 +; CHECK-NEXT: sf.vste32 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste32.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll new file mode 100644 index 0000000..53990e4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste64.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste64.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste64(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e64, w1 +; CHECK-NEXT: sf.vste64 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste64.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll new file mode 100644 index 0000000..09b7259 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vste8.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vste8.iXLen(iXLen, ptr, iXLen) + +define dso_local void @test_sf_vste8(iXLen %tss, ptr %base, iXLen %vl) { +; CHECK-LABEL: test_sf_vste8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a2, e8, w1 +; CHECK-NEXT: sf.vste8 a0, (a1) +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vste8.iXLen(iXLen %tss, ptr %base, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll new file mode 100644 index 0000000..394eb60 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtdiscard.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vtdiscard() + +define dso_local void @test_sf_vtdiscard() { +; CHECK-LABEL: test_sf_vtdiscard: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vtdiscard +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtdiscard() + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll new file mode 100644 index 0000000..66c9d26 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_t_v.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen, <vscale x 32 x bfloat>, iXLen) + +define void @test_sf_vtmv_t_v_bf16m8(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_bf16m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv32bf16.iXLen(iXLen %tss, <vscale x 32 x bfloat> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen, <vscale x 32 x half>, iXLen) + +define void @test_sf_vtmv_t_v_f16(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv32f16.iXLen(iXLen %tss, <vscale x 32 x half> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen, <vscale x 16 x float>, iXLen) + +define void @test_sf_vtmv_t_v_f32(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv16f32.iXLen(iXLen %tss, <vscale x 16 x float> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen, <vscale x 8 x double>, iXLen) + +define void @test_sf_vtmv_t_v_f64(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv8f64.iXLen(iXLen %tss, <vscale x 8 x double> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen, <vscale x 64 x i8>, iXLen) + +define void @test_sf_vtmv_t_v_i8(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e8, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv64i8.iXLen(iXLen %tss, <vscale x 64 x i8> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen, <vscale x 32 x i16>, iXLen) + +define void @test_sf_vtmv_t_v_i16(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv32i16.iXLen(iXLen %tss, <vscale x 32 x i16> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen, <vscale x 16 x i32>, iXLen) + +define void @test_sf_vtmv_t_v_i32(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv16i32.iXLen(iXLen %tss, <vscale x 16 x i32> %src, iXLen %vl) + ret void +} + +declare void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen, <vscale x 8 x i64>, iXLen) + +define void @test_sf_vtmv_t_v_i64(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_t_v_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.t.v a0, v8 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtmv.t.v.nxv8i64.iXLen(iXLen %tss, <vscale x 8 x i64> %src, iXLen %vl) + ret void +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll new file mode 100644 index 0000000..0dcc2ab --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtmv_v_t.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen, iXLen) + +define <vscale x 32 x bfloat> @test_sf_vtmv_v_t_bf16m8(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_bf16m8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 32 x bfloat> @llvm.riscv.sf.vtmv.v.t.nxv32bf16.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 32 x bfloat> %0 +} + +declare <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen, iXLen) + +define <vscale x 32 x half> @test_sf_vtmv_v_t_f16(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_f16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 32 x half> @llvm.riscv.sf.vtmv.v.t.nxv32f16.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 32 x half> %0 +} + +declare <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen, iXLen) + +define <vscale x 16 x float> @test_sf_vtmv_v_t_f32(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 16 x float> @llvm.riscv.sf.vtmv.v.t.nxv16f32.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 16 x float> %0 +} + +declare <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen, iXLen) + +define <vscale x 8 x double> @test_sf_vtmv_v_t_f64(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 8 x double> @llvm.riscv.sf.vtmv.v.t.nxv8f64.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 8 x double> %0 +} + +declare <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen, iXLen) + +define <vscale x 64 x i8> @test_sf_vtmv_v_t_i8(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e8, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 64 x i8> @llvm.riscv.sf.vtmv.v.t.nxv64i8.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 64 x i8> %0 +} + +declare <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen, iXLen) + +define <vscale x 32 x i16> @test_sf_vtmv_v_t_i16(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e16, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 32 x i16> @llvm.riscv.sf.vtmv.v.t.nxv32i16.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 32 x i16> %0 +} + +declare <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen, iXLen) + +define <vscale x 16 x i32> @test_sf_vtmv_v_t_i32(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e32, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 16 x i32> @llvm.riscv.sf.vtmv.v.t.nxv16i32.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 16 x i32> %0 +} + +declare <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen, iXLen) + +define <vscale x 8 x i64> @test_sf_vtmv_v_t_i64(iXLen %tss, iXLen %vl) { +; CHECK-LABEL: test_sf_vtmv_v_t_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e64, w1 +; CHECK-NEXT: sf.vtmv.v.t v8, a0 +; CHECK-NEXT: ret + entry: + %0 = call <vscale x 8 x i64> @llvm.riscv.sf.vtmv.v.t.nxv8i64.iXLen(iXLen %tss, iXLen %vl) + ret <vscale x 8 x i64> %0 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll new file mode 100644 index 0000000..bbccb02 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/sifive_sf_vtzero_t.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: -mattr=+zvfh -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+experimental-zvfbfmin -mattr=+xsfmmbase \ +; RUN: -mattr=+xsfmm32a -mattr=+xsfmm32a8f -mattr=+xsfmm32a4i -mattr=+xsfmm64a64f \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK + +declare void @llvm.riscv.sf.vtzero.t.iXLen(iXLen, iXLen, iXLen, iXLen, iXLen) +define void @test_sf_vtzero_t(iXLen %tm, iXLen %tn) { +; CHECK-LABEL: test_sf_vtzero_t: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: sf.vsettnt zero, a1, e8, w4 +; CHECK-NEXT: sf.vsettm zero, a0 +; CHECK-NEXT: sf.vtzero.t mt0 +; CHECK-NEXT: ret + entry: + call void @llvm.riscv.sf.vtzero.t.iXLen(iXLen 0, iXLen %tm, iXLen %tn, iXLen 3, iXLen 4) + ret void +} + diff --git a/llvm/test/CodeGen/SPIRV/FCmpFalse.ll b/llvm/test/CodeGen/SPIRV/FCmpFalse.ll new file mode 100644 index 0000000..55d64196 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/FCmpFalse.ll @@ -0,0 +1,10 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#FalseVal:]] = OpConstantFalse %[[#]] +; CHECK: OpReturnValue %[[#FalseVal:]] + +define spir_func i1 @f(float %0) { + %2 = fcmp false float %0, %0 + ret i1 %2 +} diff --git a/llvm/test/CodeGen/SPIRV/FCmpFalse_Vec.ll b/llvm/test/CodeGen/SPIRV/FCmpFalse_Vec.ll new file mode 100644 index 0000000..c410b64 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/FCmpFalse_Vec.ll @@ -0,0 +1,13 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: %[[#BoolTy:]] = OpTypeBool +; CHECK: %[[#VecTy:]] = OpTypeVector %[[#BoolTy]] 4 +; CHECK: %[[#False:]] = OpConstantFalse %[[#BoolTy]] +; CHECK: %[[#Composite:]] = OpConstantComposite %[[#VecTy]] %[[#False]] %[[#False]] %[[#False]] %[[#False]] +; CHECK: OpReturnValue %[[#Composite]] + +define spir_func <4 x i1> @test(<4 x float> %a) { + %compare = fcmp false <4 x float> %a, %a + ret <4 x i1> %compare +} diff --git a/llvm/test/CodeGen/SPIRV/builtin_duplicate.ll b/llvm/test/CodeGen/SPIRV/builtin_duplicate.ll new file mode 100644 index 0000000..8786554 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/builtin_duplicate.ll @@ -0,0 +1,20 @@ +;; This test checks if we generate a single builtin variable for the following +;; LLVM IR. +;; @__spirv_BuiltInLocalInvocationId - A global variable +;; %3 = tail call i64 @_Z12get_local_idj(i32 0) - A function call + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpName %[[#]] "__spirv_BuiltInLocalInvocationId" +; CHECK-NOT: OpName %[[#]] "__spirv_BuiltInLocalInvocationId.1" + +@__spirv_BuiltInLocalInvocationId = external dso_local local_unnamed_addr addrspace(1) constant <3 x i64>, align 32 + +declare spir_func i64 @_Z12get_local_idj(i32) local_unnamed_addr + +define spir_kernel void @test(i32 %a) { +entry: + %builtin_call = tail call i64 @_Z12get_local_idj(i32 0) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/complex-constexpr.ll b/llvm/test/CodeGen/SPIRV/complex-constexpr.ll new file mode 100644 index 0000000..e2c1d00 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/complex-constexpr.ll @@ -0,0 +1,21 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +@.str.1 = private unnamed_addr addrspace(1) constant [1 x i8] zeroinitializer, align 1 + +define linkonce_odr hidden spir_func void @test() { +entry: +; CHECK: %[[#MinusOne:]] = OpConstant %[[#]] 18446744073709551615 +; CHECK: %[[#Ptr:]] = OpConvertUToPtr %[[#]] %[[#MinusOne]] +; CHECK: %[[#PtrCast:]] = OpPtrCastToGeneric %[[#]] %[[#]] +; CHECK: %[[#]] = OpFunctionCall %[[#]] %[[#]] %[[#PtrCast]] %[[#Ptr]] + + %cast = bitcast ptr addrspace(4) inttoptr (i64 -1 to ptr addrspace(4)) to ptr addrspace(4) + call spir_func void @bar(ptr addrspace(4) addrspacecast (ptr addrspace(1) @.str.1 to ptr addrspace(4)), ptr addrspace(4) %cast) + ret void +} + +define linkonce_odr hidden spir_func void @bar(ptr addrspace(4) %begin, ptr addrspace(4) %end) { +entry: + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/dominator-order.ll b/llvm/test/CodeGen/SPIRV/dominator-order.ll new file mode 100644 index 0000000..2ecdddc --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/dominator-order.ll @@ -0,0 +1,25 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; This test checks that basic blocks are reordered in SPIR-V so that dominators +; are emitted ahead of their dominated blocks as required by the SPIR-V +; specification. + +; CHECK-DAG: OpName %[[#ENTRY:]] "entry" +; CHECK-DAG: OpName %[[#FOR_BODY137_LR_PH:]] "for.body137.lr.ph" +; CHECK-DAG: OpName %[[#FOR_BODY:]] "for.body" + +; CHECK: %[[#ENTRY]] = OpLabel +; CHECK: %[[#FOR_BODY]] = OpLabel +; CHECK: %[[#FOR_BODY137_LR_PH]] = OpLabel + +define spir_kernel void @test(ptr addrspace(1) %arg, i1 %cond) { +entry: + br label %for.body + +for.body137.lr.ph: ; preds = %for.body + ret void + +for.body: ; preds = %for.body, %entry + br i1 %cond, label %for.body, label %for.body137.lr.ph +} diff --git a/llvm/test/CodeGen/SPIRV/llvm-compiler-used.ll b/llvm/test/CodeGen/SPIRV/llvm-compiler-used.ll new file mode 100644 index 0000000..ddc2585 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-compiler-used.ll @@ -0,0 +1,19 @@ +; RUN: llc -verify-machineinstrs -mtriple=spirv-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -mtriple=spirv-unknown-unknown %s -o - -filetype=obj | spirv-val %} +; RUN: llc -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; Verify that llvm.compiler.used is not lowered. +; CHECK: OpName %{{[0-9]+}} "unused" +; CHECK-NOT: OpName %{{[0-9]+}} "llvm.compiler.used" + +; Check that the type of llvm.compiler.used is not emitted too. +; CHECK-NOT: OpTypeArray + +@unused = private addrspace(3) global i32 0 +@llvm.compiler.used = appending addrspace(2) global [1 x ptr addrspace (4)] [ptr addrspace(4) addrspacecast (ptr addrspace(3) @unused to ptr addrspace(4))] + +define spir_func void @foo() { +entry: + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fake_use.ll b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fake_use.ll new file mode 100644 index 0000000..5370b51 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/llvm-intrinsics/fake_use.ll @@ -0,0 +1,13 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpCapability Addresses +; CHECK-DAG: OpName %[[#]] "foo" + +declare void @llvm.fake.use(...) + +define spir_kernel void @foo(ptr addrspace(1) %a) { +entry: + call void (...) @llvm.fake.use(ptr addrspace(1) %a) + ret void +} diff --git a/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchange_cl20.ll b/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchange_cl20.ll new file mode 100644 index 0000000..8357373 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/transcoding/AtomicCompareExchange_cl20.ll @@ -0,0 +1,84 @@ +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64v1.2-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-NOT: OpCapability Int64Atomics + +; CHECK-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#int8:]] = OpTypeInt 8 0 +; CHECK-DAG: %[[#DeviceScope:]] = OpConstant %[[#int]] 1 +; CHECK-DAG: %[[#SequentiallyConsistent_MS:]] = OpConstant %[[#int]] 16 +; CHECK-DAG: %[[#int_ptr:]] = OpTypePointer Generic %[[#int]] +; CHECK-DAG: %[[#int_ptr8:]] = OpTypePointer Generic %[[#int8]] +; CHECK-DAG: %[[#bool:]] = OpTypeBool + +define spir_func void @test(ptr addrspace(4) %object, ptr addrspace(4) %expected, i32 %desired) { + +; CHECK: %[[#object:]] = OpFunctionParameter %[[#int_ptr8]] +; CHECK: %[[#expected:]] = OpFunctionParameter %[[#int_ptr8]] +; CHECK: %[[#desired:]] = OpFunctionParameter %[[#int]] + +entry: + %object.addr = alloca ptr addrspace(4), align 4 + %expected.addr = alloca ptr addrspace(4), align 4 + %desired.addr = alloca i32, align 4 + %strong_res = alloca i8, align 1 + %res = alloca i8, align 1 + %weak_res = alloca i8, align 1 + store ptr addrspace(4) %object, ptr %object.addr, align 4 + store ptr addrspace(4) %expected, ptr %expected.addr, align 4 + store i32 %desired, ptr %desired.addr, align 4 + %0 = load ptr addrspace(4), ptr %object.addr, align 4 + %1 = load ptr addrspace(4), ptr %expected.addr, align 4 + %2 = load i32, ptr %desired.addr, align 4 + +; CHECK-DAG: OpStore %[[#object_addr:]] %[[#object]] +; CHECK-DAG: OpStore %[[#expected_addr:]] %[[#expected]] +; CHECK-DAG: OpStore %[[#desired_addr:]] %[[#desired]] + +; CHECK: %[[#Pointer:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#exp:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#Value:]] = OpLoad %[[#int]] %[[#desired_addr]] +; CHECK: %[[#Comparator:]] = OpLoad %[[#int]] %[[#exp]] + +; CHECK: %[[#Result:]] = OpAtomicCompareExchange %[[#int]] %[[#]] %[[#DeviceScope]] %[[#SequentiallyConsistent_MS]] %[[#SequentiallyConsistent_MS]] %[[#Value]] %[[#Comparator]] + %call = call spir_func zeroext i1 @_Z30atomic_compare_exchange_strongPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4) %0, ptr addrspace(4) %1, i32 %2) + +; CHECK-NEXT: OpStore %[[#exp]] %[[#Result]] +; CHECK-NEXT: %[[#CallRes:]] = OpIEqual %[[#bool]] %[[#Result]] %[[#Comparator]] +; CHECK-NOT: %[[#Result]] + + %frombool = zext i1 %call to i8 + store i8 %frombool, ptr %strong_res, align 1 + %3 = load i8, ptr %strong_res, align 1 + %tobool = trunc i8 %3 to i1 + %lnot = xor i1 %tobool, true + %frombool1 = zext i1 %lnot to i8 + store i8 %frombool1, ptr %res, align 1 + %4 = load ptr addrspace(4), ptr %object.addr, align 4 + %5 = load ptr addrspace(4), ptr %expected.addr, align 4 + %6 = load i32, ptr %desired.addr, align 4 + +; CHECK: %[[#Pointer:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#exp:]] = OpLoad %[[#int_ptr]] %[[#]] +; CHECK: %[[#Value:]] = OpLoad %[[#int]] %[[#desired_addr]] +; CHECK: %[[#ComparatorWeak:]] = OpLoad %[[#int]] %[[#exp]] + +; CHECK: %[[#Result:]] = OpAtomicCompareExchangeWeak %[[#int]] %[[#]] %[[#DeviceScope]] %[[#SequentiallyConsistent_MS]] %[[#SequentiallyConsistent_MS]] %[[#Value]] %[[#ComparatorWeak]] + %call2 = call spir_func zeroext i1 @_Z28atomic_compare_exchange_weakPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4) %4, ptr addrspace(4) %5, i32 %6) + +; CHECK-NEXT: OpStore %[[#exp]] %[[#Result]] +; CHECK-NEXT: %[[#CallRes:]] = OpIEqual %[[#bool]] %[[#Result]] %[[#ComparatorWeak]] +; CHECK-NOT: %[[#Result]] + + %frombool3 = zext i1 %call2 to i8 + store i8 %frombool3, ptr %weak_res, align 1 + %7 = load i8, ptr %weak_res, align 1 + %tobool4 = trunc i8 %7 to i1 + %lnot5 = xor i1 %tobool4, true + %frombool6 = zext i1 %lnot5 to i8 + store i8 %frombool6, ptr %res, align 1 + ret void +} + +declare spir_func zeroext i1 @_Z30atomic_compare_exchange_strongPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4), ptr addrspace(4), i32) #1 +declare spir_func zeroext i1 @_Z28atomic_compare_exchange_weakPVU3AS4U7_AtomiciPU3AS4ii(ptr addrspace(4), ptr addrspace(4), i32) #1 diff --git a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll index c6ee804..07fbed9 100644 --- a/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/htm-intrinsics.ll @@ -90,7 +90,7 @@ define i32 @test_tbegin_nofloat4(i32 %pad, ptr %ptr) { ; CHECK: tbegin 0, 65292 ; CHECK: ipm %r2 ; CHECK: srl %r2, 28 -; CHECK: ciblh %r2, 2, 0(%r14) +; CHECK: bnhr %r14 ; CHECK: mvhi 0(%r3), 0 ; CHECK: br %r14 %res = call i32 @llvm.s390.tbegin.nofloat(ptr null, i32 65292) @@ -219,7 +219,7 @@ define i32 @test_tend2(i32 %pad, ptr %ptr) { ; CHECK: tend ; CHECK: ipm %r2 ; CHECK: srl %r2, 28 -; CHECK: ciblh %r2, 2, 0(%r14) +; CHECK: bnhr %r14 ; CHECK: mvhi 0(%r3), 0 ; CHECK: br %r14 %res = call i32 @llvm.s390.tend() diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll new file mode 100644 index 0000000..6b8746e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-01.ll @@ -0,0 +1,738 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s +; Test implementation of combining br_ccmask for flag output operand, and +; optimizing ipm sequence using conditional branches. + +declare void @dummy() + +; Check a case where the cc is used as an integer. +; Just (srl (ipm)) sequence without optimization. +define i32 @test(ptr %a) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ipm %r2 +; CHECK-NEXT: srl %r2, 28 +; CHECK-NEXT: br %r14 + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + ret i32 %cc +} + +; Test-1(f1_0_*). Test all 14 valid combinations, where cc is being used for +; branching. + +; Check (cc == 0). +define void @f1_0_eq_0(ptr %a) { +; CHECK-LABEL: f1_0_eq_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jge dummy@PLT +; CHECK-NEXT: .LBB1_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 0). +define void @f1_0_ne_0(ptr %a) { +; CHECK-LABEL: f1_0_ne_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgne dummy@PLT +; CHECK-NEXT: .LBB2_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ugt i32 %cc, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 1). +define void @f1_0_eq_1(ptr %a) { +; CHECK-LABEL: f1_0_eq_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgl dummy@PLT +; CHECK-NEXT: .LBB3_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 1 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 1). +define void @f1_0_ne_1(ptr %a) { +; CHECK-LABEL: f1_0_ne_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB4_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ne i32 %cc, 1 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 2). +define void @f1_0_eq_2(ptr %a) { +; CHECK-LABEL: f1_0_eq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgh dummy@PLT +; CHECK-NEXT: .LBB5_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 2). +define void @f1_0_ne_2(ptr %a) { +; CHECK-LABEL: f1_0_ne_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnh dummy@PLT +; CHECK-NEXT: .LBB6_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ne i32 %cc, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 3). +define void @f1_0_eq_3(ptr %a) { +; CHECK-LABEL: f1_0_eq_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgo dummy@PLT +; CHECK-NEXT: .LBB7_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp eq i32 %cc, 3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc != 3). +define void @f1_0_ne_3(ptr %a) { +; CHECK-LABEL: f1_0_ne_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgno dummy@PLT +; CHECK-NEXT: .LBB8_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ult i32 %cc, 3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 0|1). +define void @f1_0_01(ptr %a) { +; CHECK-LABEL: f1_0_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgle dummy@PLT +; CHECK-NEXT: .LBB9_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ult i32 %cc, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 0|2). +define void @f1_0_02(ptr %a) { +; CHECK-LABEL: f1_0_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jghe dummy@PLT +; CHECK-NEXT: .LBB10_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 0|3). +define void @f1_0_03(ptr %a) { +; CHECK-LABEL: f1_0_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnlh dummy@PLT +; CHECK-NEXT: .LBB11_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp0 = icmp ne i32 %cc, 0 + %cmp3 = icmp ne i32 %cc, 3 + %cmp.inv = and i1 %cmp0, %cmp3 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 1|2). +define void @f1_0_12(ptr %a) { +; CHECK-LABEL: f1_0_12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jglh dummy@PLT +; CHECK-NEXT: .LBB12_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq1 = icmp eq i32 %cc, 1 + %cmpeq2 = icmp eq i32 %cc, 2 + %cmp = or i1 %cmpeq1, %cmpeq2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 1|3). +define void @f1_0_13(ptr %a) { +; CHECK-LABEL: f1_0_13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnhe dummy@PLT +; CHECK-NEXT: .LBB13_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq1 = icmp eq i32 %cc, 1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cmp = or i1 %cmpeq1, %cmpeq3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check (cc == 2|3). +define void @f1_0_23(ptr %a) { +; CHECK-LABEL: f1_0_23: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnle dummy@PLT +; CHECK-NEXT: .LBB14_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmp = icmp ugt i32 %cc, 1 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Test-2(f1_1_*/f1_2_*/fl_3_*/f1_4_*). +; Test Mixed patterns involving Binary Ops. + +; Check 'add' for (cc != 0). +define void @f1_1_1(ptr %a) { +; CHECK-LABEL: f1_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgne dummy@PLT +; CHECK-NEXT: .LBB15_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cmp = icmp ult i32 %add, 3 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'add' for (cc == 1|2). +define void @f1_1_2(ptr %a) { +; CHECK-LABEL: f1_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jglh dummy@PLT +; CHECK-NEXT: .LBB16_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cmp = icmp ult i32 %add, 2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'add' for (cc == 1|2). +define void @f1_1_3(ptr %a) { +; CHECK-LABEL: f1_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jglh dummy@PLT +; CHECK-NEXT: .LBB17_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cmp.inv = icmp ult i32 %add, -2 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define void @f1_2_1(ptr %a) { +; CHECK-LABEL: f1_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB18_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cmp.inv = and i1 %cmpne3, %cmpne0 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define void @f1_2_2(ptr %a) { +; CHECK-LABEL: f1_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnh dummy@PLT +; CHECK-NEXT: .LBB19_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %and.cond.inv = and i1 %ugt1, %cmpne3 + br i1 %and.cond.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and/tm' for (cc == 0|2). +define void @f1_2_3(ptr %a) { +; CHECK-LABEL: f1_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jghe dummy@PLT +; CHECK-NEXT: .LBB20_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'and/tm' for (cc == 1|3). +define void @f1_2_4(ptr %a) { +; CHECK-LABEL: f1_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnhe dummy@PLT +; CHECK-NEXT: .LBB21_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cmp = icmp eq i32 %and, 0 + br i1 %cmp, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define void @f1_2_5(ptr %a) { +; CHECK-LABEL: f1_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB22_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cmp = xor i1 %cmpne3, %trunc + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define void @f1_3_1(ptr %a) { +; CHECK-LABEL: f1_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB23_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cmp.inv = xor i1 %cmpne3, %xor + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define void @f1_3_2(ptr %a) { +; CHECK-LABEL: f1_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnl dummy@PLT +; CHECK-NEXT: .LBB24_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cmp.inv = xor i1 %cmpeq3, %trunc + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define void @f1_3_3(ptr %a) { +; CHECK-LABEL: f1_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgnh dummy@PLT +; CHECK-NEXT: .LBB25_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cmp.cond.inv = xor i1 %cmpne0, %trunc + br i1 %cmp.cond.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'or' with both operands are select_ccmask one with TM and other with +; ICMP(cc == 1). +define void @f1_4_1(ptr %a) { +; CHECK-LABEL: f1_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgl dummy@PLT +; CHECK-NEXT: .LBB26_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cmp.cond.inv = or i1 %cmpeq3, %cmpeq0 + br i1 %cmp.cond.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'or' for (cc == 0|1). +define void @f1_4_2(ptr %a) { +; CHECK-LABEL: f1_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgle dummy@PLT +; CHECK-NEXT: .LBB27_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cmp.inv = icmp samesign ugt i32 %or, -3 + br i1 %cmp.inv, label %exit, label %branch +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + +; Check 'or' for (cc == 0|1). +define void @f1_4_3(ptr %a) { +; CHECK-LABEL: f1_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: jgle dummy@PLT +; CHECK-NEXT: .LBB28_1: # %exit +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cmp = icmp samesign ult i32 %or, -2 + br i1 %cmp, label %branch, label %exit +branch: + tail call void @dummy() + br label %exit +exit: + ret void +} + diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll new file mode 100644 index 0000000..b9b9a4b --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/inline-asm-flag-output-02.ll @@ -0,0 +1,1665 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -O2 | FileCheck %s +; Test implementation of combining select_ccmask for flag output operand and +; optimizing ipm sequence using conditional branches. + +; Test-1(f2_0_*): Both TrueVal and FalseVal non-const(14-valid CCMask). + +; Check (cc == 0). +define i64 @f2_0_eq_0(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: ber %r14 +; CHECK-NEXT: .LBB0_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 0). +define i64 @f2_0_ne_0(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_0: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ugt i32 %cc, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 1). +define i64 @f2_0_eq_1(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 1 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 1). +define i64 @f2_0_ne_1(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB3_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ne i32 %cc, 1 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 2). +define i64 @f2_0_eq_2(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB4_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 2). +define i64 @f2_0_ne_2(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnhr %r14 +; CHECK-NEXT: .LBB5_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ne i32 %cc, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 3). +define i64 @f2_0_eq_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_eq_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bor %r14 +; CHECK-NEXT: .LBB6_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp eq i32 %cc, 3 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc != 3). +define i64 @f2_0_ne_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_ne_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnor %r14 +; CHECK-NEXT: .LBB7_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ult i32 %cc, 3 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 0|1). +define i64 @f2_0_01(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_01: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB8_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ult i32 %cc, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 0|2). +define i64 @f2_0_02(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_02: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB9_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check (cc == 0|3). +define i64 @f2_0_03(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_0_03: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB10_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cmp0 = icmp ne i32 %cc, 0 + %cmp3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmp0, %cmp3 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check (cc == 1|2). +define i64 @f2_0_12(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_0_12: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB11_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check (cc == 1|3). +define i64 @f2_0_13(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_0_13: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB12_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check (cc == 2|3). +define i64 @f2_0_23(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_0_23: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB13_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %cond = icmp ugt i32 %cc, 1 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Test-2(f2_1_*/f2_2_*/f2_3_*/f2_4_*). +; Both TrueVal and FalseVal are non-const with mixed patterns involving +; Binary Ops. + +; Check 'add' for (cc != 0). +define i64 @f2_1_1(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB14_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f2_1_2(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB15_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f2_1_3(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB16_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f2_2_1(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB17_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f2_2_2(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB18_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f2_2_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB19_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f2_2_4(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB20_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f2_2_5(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB21_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f2_3_1(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB22_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f2_3_2(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB23_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f2_3_3(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB24_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f2_4_1(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB25_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f2_4_2(i64 %y, i64 %x, ptr %a) { +; CHECK-LABEL: f2_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB26_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 %y, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f2_4_3(i64 %x, i64 %y, ptr %a) { +; CHECK-LABEL: f2_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r4), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB27_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 %x, i64 %y + ret i64 %res +} + +; Test-3(f3_1_*/f3_2_*/f3_3_*/f3_4_*). +; TrueVal is non-const and FalseVal is const with mixed patterns involving +; Binary Ops. + +; Check 'add' for (cc != 0). +define i64 @f3_1_1(i64 %x, ptr %a) { +; CHECK-LABEL: f3_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB28_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f3_1_2(i64 %x, ptr %a) { +; CHECK-LABEL: f3_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB29_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f3_1_3(ptr %a, i64 %x) { +; CHECK-LABEL: f3_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB30_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f3_2_1(ptr %a, i64 %x) { +; CHECK-LABEL: f3_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB31_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f3_2_2(ptr %a, i64 %x) { +; CHECK-LABEL: f3_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB32_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f3_2_3(i64 %x, ptr %a) { +; CHECK-LABEL: f3_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB33_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f3_2_4(ptr %a, i64 %x) { +; CHECK-LABEL: f3_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB34_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f3_2_5(i64 %x, ptr %a) { +; CHECK-LABEL: f3_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB35_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f3_3_1(ptr %a, i64 %x) { +; CHECK-LABEL: f3_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB36_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f3_3_2(ptr %a, i64 %x) { +; CHECK-LABEL: f3_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB37_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f3_3_3(ptr %a, i64 %x) { +; CHECK-LABEL: f3_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB38_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f3_4_1(ptr %a, i64 %x) { +; CHECK-LABEL: f3_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB39_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f3_4_2(ptr %a, i64 %x) { +; CHECK-LABEL: f3_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB40_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 5, i64 %x + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f3_4_3(i64 %x, ptr %a) { +; CHECK-LABEL: f3_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB41_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 %x, i64 5 + ret i64 %res +} + + +; Test-4(f4_1_*/f4_2_*/f4_3_*/f4_4_*). +; TrueVal is const and FalseVal is non-const with mixed patterns involving +; Binary Ops. + +; Check 'add' for (cc != 0). +define i64 @f4_1_1(ptr %a, i64 %y) { +; CHECK-LABEL: f4_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB42_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f4_1_2(ptr %a, i64 %y) { +; CHECK-LABEL: f4_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB43_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f4_1_3(i64 %y, ptr %a) { +; CHECK-LABEL: f4_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB44_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f4_2_1(i64 %y, ptr %a) { +; CHECK-LABEL: f4_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB45_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f4_2_2(i64 %y, ptr %a) { +; CHECK-LABEL: f4_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB46_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f4_2_3(ptr %a, i64 %y) { +; CHECK-LABEL: f4_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB47_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f4_2_4(i64 %y, ptr %a) { +; CHECK-LABEL: f4_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB48_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f4_2_5(ptr %a, i64 %y) { +; CHECK-LABEL: f4_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB49_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f4_3_1(i64 %y, ptr %a) { +; CHECK-LABEL: f4_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB50_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f4_3_2(i64 %y, ptr %a) { +; CHECK-LABEL: f4_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB51_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f4_3_3(i64 %y, ptr %a) { +; CHECK-LABEL: f4_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB52_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f4_4_1(i64 %y,ptr %a) { +; CHECK-LABEL: f4_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB53_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f4_4_2(i64 %y, ptr %a) { +; CHECK-LABEL: f4_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r3), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB54_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 %y, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f4_4_3(ptr %a, i64 %y) { +; CHECK-LABEL: f4_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB55_1: # %entry +; CHECK-NEXT: lgr %r2, %r3 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 15, i64 %y + ret i64 %res +} + +; Test-5(f5_1_*/f5_2_*/f5_3_*/f5_4_*). +; Both TrueVal and FalseVal are const with mixed patterns involving +; Binary Ops. + + +; Check 'add' for (cc != 0). +define i64 @f5_1_1(ptr %a) { +; CHECK-LABEL: f5_1_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bner %r14 +; CHECK-NEXT: .LBB56_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 3 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f5_1_2(ptr %a) { +; CHECK-LABEL: f5_1_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: blhr %r14 +; CHECK-NEXT: .LBB57_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -1 + %cond = icmp ult i32 %add, 2 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Check 'add' for (cc == 1|2). +define i64 @f5_1_3(ptr %a) { +; CHECK-LABEL: f5_1_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlhr %r14 +; CHECK-NEXT: .LBB58_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %add = add nsw i32 %cc, -3 + %cond.inv = icmp ult i32 %add, -2 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'and' with one operand cc and other select_ccmask(cc !=1). +define i64 @f5_2_1(ptr %a) { +; CHECK-LABEL: f5_2_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB59_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpne0 = icmp ne i32 %andcc, 0 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %cmpne3, %cmpne0 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'and' with both operands select_ccmask(cc != 2). +define i64 @f5_2_2(ptr %a) { +; CHECK-LABEL: f5_2_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB60_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %ugt1 = icmp samesign ugt i32 %cc, 1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = and i1 %ugt1, %cmpne3 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'and/tm' for (cc == 0|2). +define i64 @f5_2_3(ptr %a) { +; CHECK-LABEL: f5_2_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB61_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond = icmp eq i32 %and, 0 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Check 'and/tm' for (cc == 1|3). +define i64 @f5_2_4(ptr %a) { +; CHECK-LABEL: f5_2_4: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB62_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %and = and i32 %cc, 1 + %cond.inv = icmp eq i32 %and, 0 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'icmp' with one operand 'and' and other 'select_ccmask'(cc != 1). +define i64 @f5_2_5(ptr %a) { +; CHECK-LABEL: f5_2_5: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB63_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne3 = icmp ne i32 %cc, 3 + %cond = xor i1 %cmpne3, %trunc + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + + +; Check nested 'xor' cc with select_ccmask(cc != 1). +define i64 @f5_3_1(ptr %a) { +; CHECK-LABEL: f5_3_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB64_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %cmpeq0 = icmp eq i32 %cc, 0 + %cmpeq2 = icmp eq i32 %cc, 2 + %xor = xor i1 %cmpeq0, %cmpeq2 + %cmpne3 = icmp ne i32 %cc, 3 + %cond.inv = xor i1 %cmpne3, %xor + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=1). +define i64 @f5_3_2(ptr %a) { +; CHECK-LABEL: f5_3_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: blr %r14 +; CHECK-NEXT: .LBB65_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = xor i1 %cmpeq3, %trunc + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check branching on 'tm' and 'xor' with one operand cc and the other +; select_ccmask(cc !=2). +define i64 @f5_3_3(ptr %a) { +; CHECK-LABEL: f5_3_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bhr %r14 +; CHECK-NEXT: .LBB66_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %trunc = trunc i32 %cc to i1 + %cmpne0 = icmp ne i32 %cc, 0 + %cond.inv = xor i1 %cmpne0, %trunc + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'or' with both operands select_ccmask with TM and ICMP(cc == 1). +define i64 @f5_4_1(ptr %a) { +; CHECK-LABEL: f5_4_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnlr %r14 +; CHECK-NEXT: .LBB67_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %cond.inv = or i1 %cmpeq3, %cmpeq0 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f5_4_2(ptr %a) { +; CHECK-LABEL: f5_4_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: bnler %r14 +; CHECK-NEXT: .LBB68_1: # %entry +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond.inv = icmp samesign ugt i32 %or, -3 + %res = select i1 %cond.inv, i64 5, i64 15 + ret i64 %res +} + +; Check 'or' for (cc == 0|1). +define i64 @f5_4_3(ptr %a) { +; CHECK-LABEL: f5_4_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bler %r14 +; CHECK-NEXT: .LBB69_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %tmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %tmp) + %or = or disjoint i32 %cc, -4 + %cond = icmp samesign ult i32 %or, -2 + %res = select i1 %cond, i64 15, i64 5 + ret i64 %res +} + +; Nested select_ccmask with TrueVal and FalseVal swapped with each other. +define i64 @f6_1(ptr %a) { +; CHECK-LABEL: f6_1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: #APP +; CHECK-NEXT: alsi 0(%r2), -1 +; CHECK-EMPTY: +; CHECK-NEXT: #NO_APP +; CHECK-NEXT: lghi %r2, 15 +; CHECK-NEXT: bher %r14 +; CHECK-NEXT: .LBB70_1: # %entry +; CHECK-NEXT: lghi %r2, 5 +; CHECK-NEXT: br %r14 +entry: + %cc = tail call i32 asm sideeffect "alsi $1,-1\0A", "={@cc},=*QS,*QS,~{memory}"(ptr elementtype(i32) %a, ptr elementtype(i32) %a) + %cmp = icmp ult i32 %cc, 4 + tail call void @llvm.assume(i1 %cmp) + %andcc = and i32 %cc, 1 + %cmpeq0 = icmp eq i32 %andcc, 0 + %cmpeq3 = icmp eq i32 %cc, 3 + %select = select i1 %cmpeq3, i64 5, i64 15 + %res = select i1 %cmpeq0, i64 %select, i64 5 + ret i64 %res +} + diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll index ae170d7..d949068 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory.ll @@ -104,6 +104,31 @@ define void @memset_i32(ptr %dest, i8 %val, i32 %len) { ret void } +; CHECK-LABEL: memcpy_0: +; CHECK-NEXT: .functype memcpy_0 (i32, i32) -> () +; CHECK-NEXT: return +define void @memcpy_0(ptr %dest, ptr %src) { + call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0) + ret void +} + +; CHECK-LABEL: memmove_0: +; CHECK-NEXT: .functype memmove_0 (i32, i32) -> () +; CHECK-NEXT: return +define void @memmove_0(ptr %dest, ptr %src) { + call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 0, i1 0) + ret void +} + +; CHECK-LABEL: memset_0: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_0 (i32, i32) -> () +; BULK-MEM-NEXT: return +define void @memset_0(ptr %dest, i8 %val) { + call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 0, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -137,14 +162,8 @@ define void @memset_1(ptr %dest, i8 %val) { ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memcpy_1024(ptr %dest, ptr %src) { call void @llvm.memcpy.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) @@ -154,14 +173,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memmove_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memmove_1024(ptr %dest, ptr %src) { call void @llvm.memmove.p0.p0.i32(ptr %dest, ptr %src, i32 1024, i1 0) @@ -171,14 +184,8 @@ define void @memmove_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memset_1024: ; NO-BULK-MEM-NOT: memory.fill ; BULK-MEM-NEXT: .functype memset_1024 (i32, i32) -> () -; BULK-MEM-NEXT: block ; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i32.eqz $push[[L1:[0-9]+]]=, $pop[[L0]] -; BULK-MEM-NEXT: br_if 0, $pop[[L1]] -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L2]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] ; BULK-MEM-NEXT: return define void @memset_1024(ptr %dest, i8 %val) { call void @llvm.memset.p0.i32(ptr %dest, i8 %val, i32 1024, i1 0) @@ -201,17 +208,11 @@ define void @memset_1024(ptr %dest, i8 %val) { ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_src(ptr %dst) { %a = alloca [100 x i8] @@ -224,17 +225,11 @@ define void @memcpy_alloca_src(ptr %dst) { ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i32.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_dst(ptr %src) { %a = alloca [100 x i8] @@ -247,17 +242,11 @@ define void @memcpy_alloca_dst(ptr %src) { ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L0:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i32.const $push[[L1:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i32.sub $1=, $pop[[L0]], $pop[[L1]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i32.const $push[[L2:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i32.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] -; BULK-MEM-NEXT: br_if 0, $pop[[L3]] -; BULK-MEM-NEXT: i32.const $push[[L4:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i32.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] -; BULK-MEM-NEXT: i32.const $push[[L6:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i32.sub $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]] +; BULK-MEM-NEXT: i32.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i32.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i32.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memset_alloca(i8 %val) { %a = alloca [100 x i8] diff --git a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll index 0cf8493..d0206a3 100644 --- a/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll +++ b/llvm/test/CodeGen/WebAssembly/bulk-memory64.ll @@ -110,6 +110,31 @@ define void @memset_i32(ptr %dest, i8 %val, i64 %len) { ret void } +; CHECK-LABEL: memcpy_0: +; CHECK-NEXT: .functype memcpy_0 (i64, i64) -> () +; CHECK-NEXT: return +define void @memcpy_0(ptr %dest, ptr %src) { + call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0) + ret void +} + +; CHECK-LABEL: memmove_0: +; CHECK-NEXT: .functype memmove_0 (i64, i64) -> () +; CHECK-NEXT: return +define void @memmove_0(ptr %dest, ptr %src) { + call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 0, i1 0) + ret void +} + +; CHECK-LABEL: memset_0: +; NO-BULK-MEM-NOT: memory.fill +; BULK-MEM-NEXT: .functype memset_0 (i64, i32) -> () +; BULK-MEM-NEXT: return +define void @memset_0(ptr %dest, i8 %val) { + call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 0, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i64, i64) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -143,14 +168,8 @@ define void @memset_1(ptr %dest, i8 %val) { ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i64, i64) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memcpy_1024(ptr %dest, ptr %src) { call void @llvm.memcpy.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) @@ -160,14 +179,8 @@ define void @memcpy_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memmove_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memmove_1024 (i64, i64) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.copy 0, 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memmove_1024(ptr %dest, ptr %src) { call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 1024, i1 0) @@ -177,14 +190,8 @@ define void @memmove_1024(ptr %dest, ptr %src) { ; CHECK-LABEL: memset_1024: ; NO-BULK-MEM-NOT: memory.fill ; BULK-MEM-NEXT: .functype memset_1024 (i64, i32) -> () -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L1:[0-9]+]]=, 1024 -; BULK-MEM-NEXT: i64.eqz $push0=, $pop[[L1]] -; BULK-MEM-NEXT: br_if 0, $pop0 ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 1024 ; BULK-MEM-NEXT: memory.fill 0, $0, $1, $pop[[L0]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block ; BULK-MEM-NEXT: return define void @memset_1024(ptr %dest, i8 %val) { call void @llvm.memset.p0.i64(ptr %dest, i8 %val, i64 1024, i1 0) @@ -207,17 +214,11 @@ define void @memset_1024(ptr %dest, i8 %val) { ; BULK-MEM-NEXT: .functype memcpy_alloca_src (i64) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L6]], $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $0, $pop[[L4]], $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_src(ptr %dst) { %a = alloca [100 x i8] @@ -230,17 +231,11 @@ define void @memcpy_alloca_src(ptr %dst) { ; BULK-MEM-NEXT: .functype memcpy_alloca_dst (i64) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L4:[0-9]+]]=, $pop[[L3]] -; BULK-MEM-NEXT: br_if 0, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L6:[0-9]+]]=, $[[L2]], $pop[[L5]] -; BULK-MEM-NEXT: i64.const $push[[L7:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L6]], $0, $pop[[L7]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.copy 0, 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memcpy_alloca_dst(ptr %src) { %a = alloca [100 x i8] @@ -253,17 +248,11 @@ define void @memcpy_alloca_dst(ptr %src) { ; BULK-MEM-NEXT: .functype memset_alloca (i32) -> () ; BULK-MEM-NEXT: global.get $push[[L1:[0-9]+]]=, __stack_pointer ; BULK-MEM-NEXT: i64.const $push[[L0:[0-9]+]]=, 112 -; BULK-MEM-NEXT: i64.sub $1=, $pop[[L1]], $pop[[L0]] -; BULK-MEM-NEXT: block -; BULK-MEM-NEXT: i64.const $push[[L2:[0-9]+]]=, 100 -; BULK-MEM-NEXT: i64.eqz $push[[L3:[0-9]+]]=, $pop[[L2]] -; BULK-MEM-NEXT: br_if 0, $pop[[L3]] -; BULK-MEM-NEXT: i64.const $push[[L4:[0-9]+]]=, 12 -; BULK-MEM-NEXT: i64.add $push[[L5:[0-9]+]]=, $1, $pop[[L4]] -; BULK-MEM-NEXT: i64.const $push[[L6:[0-9]+]]=, 100 -; BULK-MEM-NEXT: memory.fill 0, $pop[[L5]], $0, $pop[[L6]] -; BULK-MEM-NEXT: .LBB{{.*}}: -; BULK-MEM-NEXT: end_block +; BULK-MEM-NEXT: i64.sub $push[[L2:[0-9]+]]=, $pop[[L1]], $pop[[L0]] +; BULK-MEM-NEXT: i64.const $push[[L3:[0-9]+]]=, 12 +; BULK-MEM-NEXT: i64.add $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]] +; BULK-MEM-NEXT: i64.const $push[[L5:[0-9]+]]=, 100 +; BULK-MEM-NEXT: memory.fill 0, $pop[[L4]], $0, $pop[[L5]] ; BULK-MEM-NEXT: return define void @memset_alloca(i8 %val) { %a = alloca [100 x i8] diff --git a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll index 28b4541..7bdc4e1 100644 --- a/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll +++ b/llvm/test/CodeGen/X86/2007-08-09-IllegalX86-64Asm.ll @@ -44,7 +44,7 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: callq __ubyte_convert_to_ctype ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_6 +; CHECK-NEXT: js LBB0_4 ; CHECK-NEXT: ## %bb.1: ## %cond_next.i ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsi ; CHECK-NEXT: movq %rbx, %rdi @@ -53,84 +53,81 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: sarl $31, %ecx ; CHECK-NEXT: andl %eax, %ecx ; CHECK-NEXT: cmpl $-2, %ecx -; CHECK-NEXT: je LBB0_10 +; CHECK-NEXT: je LBB0_8 ; CHECK-NEXT: ## %bb.2: ## %cond_next.i ; CHECK-NEXT: cmpl $-1, %ecx -; CHECK-NEXT: jne LBB0_3 -; CHECK-NEXT: LBB0_8: ## %bb4 +; CHECK-NEXT: jne LBB0_6 +; CHECK-NEXT: LBB0_3: ## %bb4 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 16(%rax), %rax -; CHECK-NEXT: jmp LBB0_9 -; CHECK-NEXT: LBB0_6: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: jmp LBB0_10 +; CHECK-NEXT: LBB0_4: ## %_ubyte_convert2_to_ctypes.exit ; CHECK-NEXT: cmpl $-2, %eax -; CHECK-NEXT: je LBB0_10 -; CHECK-NEXT: ## %bb.7: ## %_ubyte_convert2_to_ctypes.exit -; CHECK-NEXT: cmpl $-1, %eax ; CHECK-NEXT: je LBB0_8 -; CHECK-NEXT: LBB0_3: ## %bb35 +; CHECK-NEXT: ## %bb.5: ## %_ubyte_convert2_to_ctypes.exit +; CHECK-NEXT: cmpl $-1, %eax +; CHECK-NEXT: je LBB0_3 +; CHECK-NEXT: LBB0_6: ## %bb35 ; CHECK-NEXT: movq _PyUFunc_API@GOTPCREL(%rip), %r14 ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *216(%rax) ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: je LBB0_4 -; CHECK-NEXT: ## %bb.12: ## %cond_false.i -; CHECK-NEXT: setne %dil +; CHECK-NEXT: je LBB0_11 +; CHECK-NEXT: ## %bb.7: ## %cond_false.i ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-NEXT: movzbl %sil, %ecx ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movl %eax, %r15d ; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: setne %al -; CHECK-NEXT: testb %dil, %al -; CHECK-NEXT: jne LBB0_5 -; CHECK-NEXT: LBB0_13: ## %cond_true.i200 -; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: jne LBB0_15 -; CHECK-NEXT: ## %bb.14: ## %cond_true14.i -; CHECK-NEXT: movl $4, %edi -; CHECK-NEXT: callq _feraiseexcept -; CHECK-NEXT: LBB0_15: ## %ubyte_ctype_remainder.exit -; CHECK-NEXT: xorl %ebx, %ebx -; CHECK-NEXT: jmp LBB0_16 -; CHECK-NEXT: LBB0_10: ## %bb17 +; CHECK-NEXT: jne LBB0_12 +; CHECK-NEXT: jmp LBB0_14 +; CHECK-NEXT: LBB0_8: ## %bb17 ; CHECK-NEXT: callq _PyErr_Occurred ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: ## %bb.11: ## %cond_next +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.9: ## %cond_next ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq 80(%rax), %rax -; CHECK-NEXT: LBB0_9: ## %bb4 +; CHECK-NEXT: LBB0_10: ## %bb4 ; CHECK-NEXT: movq 96(%rax), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: movq %rbx, %rsi ; CHECK-NEXT: callq *40(%rax) -; CHECK-NEXT: jmp LBB0_24 -; CHECK-NEXT: LBB0_4: ## %cond_true.i +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_11: ## %cond_true.i ; CHECK-NEXT: movl $4, %edi ; CHECK-NEXT: callq _feraiseexcept ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-NEXT: movzbl {{[0-9]+}}(%rsp), %esi +; CHECK-NEXT: xorl %r15d, %r15d ; CHECK-NEXT: testb %sil, %sil -; CHECK-NEXT: sete %al +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: LBB0_12: ## %cond_false.i ; CHECK-NEXT: testb %dl, %dl -; CHECK-NEXT: sete %cl -; CHECK-NEXT: xorl %r15d, %r15d -; CHECK-NEXT: orb %al, %cl -; CHECK-NEXT: jne LBB0_13 -; CHECK-NEXT: LBB0_5: ## %cond_next17.i +; CHECK-NEXT: je LBB0_14 +; CHECK-NEXT: ## %bb.13: ## %cond_next17.i ; CHECK-NEXT: movzbl %sil, %eax ; CHECK-NEXT: divb %dl ; CHECK-NEXT: movzbl %ah, %ebx -; CHECK-NEXT: LBB0_16: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: jmp LBB0_18 +; CHECK-NEXT: LBB0_14: ## %cond_true.i200 +; CHECK-NEXT: testb %dl, %dl +; CHECK-NEXT: jne LBB0_17 +; CHECK-NEXT: ## %bb.16: ## %cond_true14.i +; CHECK-NEXT: movl $4, %edi +; CHECK-NEXT: callq _feraiseexcept +; CHECK-NEXT: LBB0_17: ## %ubyte_ctype_remainder.exit +; CHECK-NEXT: xorl %ebx, %ebx +; CHECK-NEXT: LBB0_18: ## %ubyte_ctype_remainder.exit ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: callq *224(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: je LBB0_19 -; CHECK-NEXT: ## %bb.17: ## %cond_true61 +; CHECK-NEXT: je LBB0_21 +; CHECK-NEXT: ## %bb.19: ## %cond_true61 ; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq _.str5@GOTPCREL(%rip), %rdi @@ -139,8 +136,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: callq *200(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: js LBB0_23 -; CHECK-NEXT: ## %bb.18: ## %cond_next73 +; CHECK-NEXT: js LBB0_27 +; CHECK-NEXT: ## %bb.20: ## %cond_next73 ; CHECK-NEXT: movl $1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq (%r14), %rax ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rsi @@ -149,13 +146,13 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: movl %ebp, %edx ; CHECK-NEXT: callq *232(%rax) ; CHECK-NEXT: testl %eax, %eax -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: LBB0_19: ## %cond_next89 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: LBB0_21: ## %cond_next89 ; CHECK-NEXT: movl $2, %edi ; CHECK-NEXT: callq _PyTuple_New ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_23 -; CHECK-NEXT: ## %bb.20: ## %cond_next97 +; CHECK-NEXT: je LBB0_27 +; CHECK-NEXT: ## %bb.22: ## %cond_next97 ; CHECK-NEXT: movq %rax, %r14 ; CHECK-NEXT: movq _PyArray_API@GOTPCREL(%rip), %r12 ; CHECK-NEXT: movq (%r12), %rax @@ -163,8 +160,8 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.25: ## %cond_next135 +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.23: ## %cond_next135 ; CHECK-NEXT: movb %r15b, 16(%rax) ; CHECK-NEXT: movq %rax, 24(%r14) ; CHECK-NEXT: movq (%r12), %rax @@ -172,22 +169,22 @@ define ptr @ubyte_divmod(ptr %a, ptr %b) { ; CHECK-NEXT: xorl %esi, %esi ; CHECK-NEXT: callq *304(%rdi) ; CHECK-NEXT: testq %rax, %rax -; CHECK-NEXT: je LBB0_21 -; CHECK-NEXT: ## %bb.26: ## %cond_next182 +; CHECK-NEXT: je LBB0_25 +; CHECK-NEXT: ## %bb.24: ## %cond_next182 ; CHECK-NEXT: movb %bl, 16(%rax) ; CHECK-NEXT: movq %rax, 32(%r14) ; CHECK-NEXT: movq %r14, %rax -; CHECK-NEXT: jmp LBB0_24 -; CHECK-NEXT: LBB0_21: ## %cond_true113 +; CHECK-NEXT: jmp LBB0_28 +; CHECK-NEXT: LBB0_25: ## %cond_true113 ; CHECK-NEXT: decq (%r14) -; CHECK-NEXT: jne LBB0_23 -; CHECK-NEXT: ## %bb.22: ## %cond_true126 +; CHECK-NEXT: jne LBB0_27 +; CHECK-NEXT: ## %bb.26: ## %cond_true126 ; CHECK-NEXT: movq 8(%r14), %rax ; CHECK-NEXT: movq %r14, %rdi ; CHECK-NEXT: callq *48(%rax) -; CHECK-NEXT: LBB0_23: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_27: ## %UnifiedReturnBlock ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: LBB0_24: ## %UnifiedReturnBlock +; CHECK-NEXT: LBB0_28: ## %UnifiedReturnBlock ; CHECK-NEXT: addq $32, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %r12 diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll index 40c38c2..71253c8 100644 --- a/llvm/test/CodeGen/X86/cpus-intel.ll +++ b/llvm/test/CodeGen/X86/cpus-intel.ll @@ -38,6 +38,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty @@ -104,6 +105,7 @@ ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=lunarlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty +; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=wildcatlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll index df04b67..c2b7068 100644 --- a/llvm/test/CodeGen/X86/isel-fpclass.ll +++ b/llvm/test/CodeGen/X86/isel-fpclass.ll @@ -1,10 +1,10 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86 +; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86,X86-SDAGISEL ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL ; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL ; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL -; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X86 -; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=2 | FileCheck %s -check-prefixes=X64,X64-GISEL +; RUN: llc < %s -mtriple=i686-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X86,X86-GISEL +; RUN: llc < %s -mtriple=x86_64-linux -global-isel -global-isel-abort=1 | FileCheck %s -check-prefixes=X64-GISEL define i1 @isnone_f(float %x) nounwind { ; X86-LABEL: isnone_f: @@ -23,6 +23,11 @@ define i1 @isnone_f(float %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: xorl %eax, %eax ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isnone_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0) ret i1 %0 @@ -45,22 +50,27 @@ define i1 @isany_f(float %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: movb $1, %al ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isany_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movb $1, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023) ret i1 %0 } define i1 @issignaling_f(float %x) nounwind { -; X86-LABEL: issignaling_f: -; X86: # %bb.0: -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-NEXT: setl %cl -; X86-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 -; X86-NEXT: setge %al -; X86-NEXT: andb %cl, %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: issignaling_f: +; X86-SDAGISEL: # %bb.0: +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %cl +; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: andb %cl, %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: issignaling_f: ; X64: # %bb.0: @@ -87,18 +97,44 @@ define i1 @issignaling_f(float %x) nounwind { ; X86-FASTISEL-NEXT: andb %cl, %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: issignaling_f: +; X86-GISEL: # %bb.0: +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %dl +; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: andb %dl, %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: issignaling_f: +; X64-GISEL: # %bb.0: +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %dl +; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: andb %dl, %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan" ret i1 %a0 } define i1 @isquiet_f(float %x) nounwind { -; X86-LABEL: isquiet_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-NEXT: setge %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: isquiet_f: ; X64: # %bb.0: # %entry @@ -119,19 +155,39 @@ define i1 @issignaling_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setge %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: isquiet_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-GISEL-NEXT: setae %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: isquiet_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GISEL-NEXT: setae %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan" ret i1 %0 } define i1 @not_isquiet_f(float %x) nounwind { -; X86-LABEL: not_isquiet_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 -; X86-NEXT: setl %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_isquiet_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_isquiet_f: ; X64: # %bb.0: # %entry @@ -152,19 +208,57 @@ define i1 @not_isquiet_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setl %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_isquiet_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %dl +; X86-GISEL-NEXT: orb %cl, %dl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %cl +; X86-GISEL-NEXT: orb %dl, %cl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %dl +; X86-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: andb %dl, %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_isquiet_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %dl +; X64-GISEL-NEXT: orb %cl, %dl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %cl +; X64-GISEL-NEXT: orb %dl, %cl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %dl +; X64-GISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: andb %dl, %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan" ret i1 %0 } define i1 @isinf_f(float %x) nounwind { -; X86-LABEL: isinf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: isinf_f: ; X64: # %bb.0: # %entry @@ -185,19 +279,39 @@ define i1 @isinf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: isinf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: isinf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf" ret i1 %0 } define i1 @not_isinf_f(float %x) nounwind { -; X86-LABEL: not_isinf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setne %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_isinf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_isinf_f: ; X64: # %bb.0: # %entry @@ -218,17 +332,43 @@ define i1 @not_isinf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setne %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_isinf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %dl +; X86-GISEL-NEXT: orb %cl, %dl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %al +; X86-GISEL-NEXT: orb %dl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_isinf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %dl +; X64-GISEL-NEXT: orb %cl, %dl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %al +; X64-GISEL-NEXT: orb %dl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf" ret i1 %0 } define i1 @is_plus_inf_f(float %x) nounwind { -; X86-LABEL: is_plus_inf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: is_plus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: is_plus_inf_f: ; X64: # %bb.0: # %entry @@ -246,17 +386,34 @@ define i1 @is_plus_inf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: is_plus_inf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: is_plus_inf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf" ret i1 %0 } define i1 @is_minus_inf_f(float %x) nounwind { -; X86-LABEL: is_minus_inf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: sete %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: is_minus_inf_f: ; X64: # %bb.0: # %entry @@ -274,17 +431,34 @@ define i1 @is_minus_inf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: sete %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: is_minus_inf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-GISEL-NEXT: sete %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: is_minus_inf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000 +; X64-GISEL-NEXT: sete %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf" ret i1 %0 } define i1 @not_is_minus_inf_f(float %x) nounwind { -; X86-LABEL: not_is_minus_inf_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 -; X86-NEXT: setne %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_is_minus_inf_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000 +; X86-SDAGISEL-NEXT: setne %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_is_minus_inf_f: ; X64: # %bb.0: # %entry @@ -302,19 +476,55 @@ define i1 @not_is_minus_inf_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setne %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_is_minus_inf_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: pushl %ebx +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: movl %eax, %ecx +; X86-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %edx, %edx +; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %bl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %ah +; X86-GISEL-NEXT: orb %dl, %ah +; X86-GISEL-NEXT: orb %bl, %ah +; X86-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %al +; X86-GISEL-NEXT: orb %ah, %al +; X86-GISEL-NEXT: popl %ebx +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_is_minus_inf_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: movl %eax, %ecx +; X64-GISEL-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %edx, %edx +; X64-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %sil +; X64-GISEL-NEXT: orb %dl, %sil +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %dl +; X64-GISEL-NEXT: cmpl $2139095040, %ecx # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %al +; X64-GISEL-NEXT: orb %dl, %al +; X64-GISEL-NEXT: orb %sil, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf" ret i1 %0 } define i1 @isfinite_f(float %x) nounwind { -; X86-LABEL: isfinite_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setl %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setl %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: isfinite_f: ; X64: # %bb.0: # %entry @@ -335,19 +545,39 @@ define i1 @isfinite_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setl %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: isfinite_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: isfinite_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite" ret i1 %0 } define i1 @not_isfinite_f(float %x) nounwind { -; X86-LABEL: not_isfinite_f: -; X86: # %bb.0: # %entry -; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 -; X86-NEXT: setge %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: not_isfinite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setge %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: not_isfinite_f: ; X64: # %bb.0: # %entry @@ -368,17 +598,43 @@ define i1 @not_isfinite_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setge %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: not_isfinite_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: sete %dl +; X86-GISEL-NEXT: orb %cl, %dl +; X86-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X86-GISEL-NEXT: seta %al +; X86-GISEL-NEXT: orb %dl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: not_isfinite_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: sete %dl +; X64-GISEL-NEXT: orb %cl, %dl +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: seta %al +; X64-GISEL-NEXT: orb %dl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite" ret i1 %0 } define i1 @is_plus_finite_f(float %x) nounwind { -; X86-LABEL: is_plus_finite_f: -; X86: # %bb.0: # %entry -; X86-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 -; X86-NEXT: setb %al -; X86-NEXT: retl +; X86-SDAGISEL-LABEL: is_plus_finite_f: +; X86-SDAGISEL: # %bb.0: # %entry +; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-SDAGISEL-NEXT: setb %al +; X86-SDAGISEL-NEXT: retl ; ; X64-LABEL: is_plus_finite_f: ; X64: # %bb.0: # %entry @@ -396,6 +652,23 @@ define i1 @is_plus_finite_f(float %x) nounwind { ; X86-FASTISEL-NEXT: setb %al ; X86-FASTISEL-NEXT: popl %ecx ; X86-FASTISEL-NEXT: retl +; +; X86-GISEL-LABEL: is_plus_finite_f: +; X86-GISEL: # %bb.0: # %entry +; X86-GISEL-NEXT: xorl %ecx, %ecx +; X86-GISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000 +; X86-GISEL-NEXT: setb %al +; X86-GISEL-NEXT: orb %cl, %al +; X86-GISEL-NEXT: retl +; +; X64-GISEL-LABEL: is_plus_finite_f: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %ecx, %ecx +; X64-GISEL-NEXT: movd %xmm0, %eax +; X64-GISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000 +; X64-GISEL-NEXT: setb %al +; X64-GISEL-NEXT: orb %cl, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite" ret i1 %0 @@ -418,6 +691,11 @@ define i1 @isnone_d(double %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: xorl %eax, %eax ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isnone_d: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: xorl %eax, %eax +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0) ret i1 %0 @@ -440,6 +718,11 @@ define i1 @isany_d(double %x) nounwind { ; X86-FASTISEL-NEXT: fstp %st(0) ; X86-FASTISEL-NEXT: movb $1, %al ; X86-FASTISEL-NEXT: retl +; +; X64-GISEL-LABEL: isany_d: +; X64-GISEL: # %bb.0: # %entry +; X64-GISEL-NEXT: movb $1, %al +; X64-GISEL-NEXT: retq entry: %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023) ret i1 %0 diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 4cde581..caec02e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -4765,6 +4765,66 @@ define void @scaleidx_scatter_outofrange(<8 x float> %value, ptr %base, <8 x i32 } declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>) +define <16 x i32> @pr163023_sext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_sext: +; X64: # %bb.0: +; X64-NEXT: kxnorw %k0, %k0, %k1 +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X64-NEXT: vpgatherdd (%rdi,%zmm0), %zmm1 {%k1} +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr163023_sext: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kxnorw %k0, %k0, %k1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl + %addr.p = ptrtoint ptr %a0 to i64 + %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 + %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer + %ofs = sext <16 x i32> %a1 to <16 x i64> + %addr = add nuw <16 x i64> %addr.splat, %ofs + %ptr = inttoptr <16 x i64> %addr to <16 x ptr> + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + ret <16 x i32> %gather +} + +define <16 x i32> @pr163023_zext(ptr %a0, <16 x i32> %a1) { +; X64-LABEL: pr163023_zext: +; X64: # %bb.0: +; X64-NEXT: vpmovzxdq {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: vextracti64x4 $1, %zmm0, %ymm0 +; X64-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; X64-NEXT: kxnorw %k0, %k0, %k1 +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; X64-NEXT: kxnorw %k0, %k0, %k2 +; X64-NEXT: vpgatherqd (%rdi,%zmm0), %ymm3 {%k2} +; X64-NEXT: vpgatherqd (%rdi,%zmm1), %ymm2 {%k1} +; X64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; X64-NEXT: retq +; +; X86-LABEL: pr163023_zext: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: kxnorw %k0, %k0, %k1 +; X86-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; X86-NEXT: vpgatherdd (%eax,%zmm0), %zmm1 {%k1} +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 +; X86-NEXT: retl + %addr.p = ptrtoint ptr %a0 to i64 + %addr.v = insertelement <1 x i64> poison, i64 %addr.p, i64 0 + %addr.splat = shufflevector <1 x i64> %addr.v, <1 x i64> poison, <16 x i32> zeroinitializer + %ofs = zext <16 x i32> %a1 to <16 x i64> + %addr = add nuw <16 x i64> %addr.splat, %ofs + %ptr = inttoptr <16 x i64> %addr to <16 x ptr> + %gather = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> %ptr, i32 4, <16 x i1> splat (i1 true), <16 x i32> poison) + ret <16 x i32> %gather +} + ; ; PR45906 ; This used to cause fast-isel to generate bad copy instructions that would diff --git a/llvm/test/CodeGen/X86/pr160612.ll b/llvm/test/CodeGen/X86/pr160612.ll new file mode 100644 index 0000000..6572c42 --- /dev/null +++ b/llvm/test/CodeGen/X86/pr160612.ll @@ -0,0 +1,74 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -O2 | FileCheck %s + +; Test for issue #160612: OR conditions in branches should use multiple branches +; instead of materializing booleans with SETCC when no special optimizations apply. + +declare void @subroutine_foo() +declare void @subroutine_bar() + +; Original issue: (x == 0 || y == 0) was generating SETCC + TEST + BRANCH +; instead of using two conditional branches directly. +define void @func_a(i32 noundef %x, i32 noundef %y) { +; CHECK-LABEL: func_a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: jne subroutine_bar@PLT # TAILCALL +; CHECK-NEXT: # %bb.2: # %if.then +; CHECK-NEXT: jmp subroutine_foo@PLT # TAILCALL +entry: + %cmp = icmp eq i32 %x, 0 + %cmp1 = icmp eq i32 %y, 0 + %or.cond = or i1 %cmp, %cmp1 + br i1 %or.cond, label %if.then, label %if.else + +if.then: + tail call void @subroutine_foo() + br label %if.end + +if.else: + tail call void @subroutine_bar() + br label %if.end + +if.end: + ret void +} + +; Reference implementation that already generated optimal code. +; This should continue to generate the same optimal code. +define void @func_b(i32 noundef %x, i32 noundef %y) { +; CHECK-LABEL: func_b: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.1: # %if.else +; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: je subroutine_foo@PLT # TAILCALL +; CHECK-NEXT: # %bb.2: # %if.else3 +; CHECK-NEXT: jmp subroutine_bar@PLT # TAILCALL +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %if.then, label %if.else + +if.then: + tail call void @subroutine_foo() + br label %if.end4 + +if.else: + %cmp1 = icmp eq i32 %y, 0 + br i1 %cmp1, label %if.then2, label %if.else3 + +if.then2: + tail call void @subroutine_foo() + br label %if.end4 + +if.else3: + tail call void @subroutine_bar() + br label %if.end4 + +if.end4: + ret void +} diff --git a/llvm/test/CodeGen/X86/setcc-wide-types.ll b/llvm/test/CodeGen/X86/setcc-wide-types.ll index 69abf6e..d018c53 100644 --- a/llvm/test/CodeGen/X86/setcc-wide-types.ll +++ b/llvm/test/CodeGen/X86/setcc-wide-types.ll @@ -1493,15 +1493,23 @@ define i1 @allbits_i128_load_arg(ptr %w) { } define i1 @anybits_i256_load_arg(ptr %w) { -; ANY-LABEL: anybits_i256_load_arg: -; ANY: # %bb.0: -; ANY-NEXT: movq (%rdi), %rax -; ANY-NEXT: movq 8(%rdi), %rcx -; ANY-NEXT: orq 24(%rdi), %rcx -; ANY-NEXT: orq 16(%rdi), %rax -; ANY-NEXT: orq %rcx, %rax -; ANY-NEXT: setne %al -; ANY-NEXT: retq +; SSE-LABEL: anybits_i256_load_arg: +; SSE: # %bb.0: +; SSE-NEXT: movq (%rdi), %rax +; SSE-NEXT: movq 8(%rdi), %rcx +; SSE-NEXT: orq 24(%rdi), %rcx +; SSE-NEXT: orq 16(%rdi), %rax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVXANY-LABEL: anybits_i256_load_arg: +; AVXANY: # %bb.0: +; AVXANY-NEXT: vmovdqu (%rdi), %ymm0 +; AVXANY-NEXT: vptest %ymm0, %ymm0 +; AVXANY-NEXT: setne %al +; AVXANY-NEXT: vzeroupper +; AVXANY-NEXT: retq %ld = load i256, ptr %w %cmp = icmp ne i256 %ld, 0 ret i1 %cmp @@ -1552,21 +1560,30 @@ define i1 @allbits_i256_load_arg(ptr %w) { } define i1 @anybits_i512_load_arg(ptr %w) { -; ANY-LABEL: anybits_i512_load_arg: -; ANY: # %bb.0: -; ANY-NEXT: movq 16(%rdi), %rax -; ANY-NEXT: movq (%rdi), %rcx -; ANY-NEXT: movq 8(%rdi), %rdx -; ANY-NEXT: movq 24(%rdi), %rsi -; ANY-NEXT: orq 56(%rdi), %rsi -; ANY-NEXT: orq 40(%rdi), %rdx -; ANY-NEXT: orq %rsi, %rdx -; ANY-NEXT: orq 48(%rdi), %rax -; ANY-NEXT: orq 32(%rdi), %rcx -; ANY-NEXT: orq %rax, %rcx -; ANY-NEXT: orq %rdx, %rcx -; ANY-NEXT: setne %al -; ANY-NEXT: retq +; NO512-LABEL: anybits_i512_load_arg: +; NO512: # %bb.0: +; NO512-NEXT: movq 16(%rdi), %rax +; NO512-NEXT: movq (%rdi), %rcx +; NO512-NEXT: movq 8(%rdi), %rdx +; NO512-NEXT: movq 24(%rdi), %rsi +; NO512-NEXT: orq 56(%rdi), %rsi +; NO512-NEXT: orq 40(%rdi), %rdx +; NO512-NEXT: orq %rsi, %rdx +; NO512-NEXT: orq 48(%rdi), %rax +; NO512-NEXT: orq 32(%rdi), %rcx +; NO512-NEXT: orq %rax, %rcx +; NO512-NEXT: orq %rdx, %rcx +; NO512-NEXT: setne %al +; NO512-NEXT: retq +; +; AVX512-LABEL: anybits_i512_load_arg: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 +; AVX512-NEXT: vptestmd %zmm0, %zmm0, %k0 +; AVX512-NEXT: kortestw %k0, %k0 +; AVX512-NEXT: setne %al +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq %ld = load i512, ptr %w %cmp = icmp ne i512 %ld, 0 ret i1 %cmp diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll index 42617c1..18588aa 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -24,7 +24,7 @@ define float @sqrt_ieee_ninf(float %f) #0 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr @@ -71,7 +71,7 @@ define float @sqrt_daz_ninf(float %f) #1 { ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 ; CHECK-NEXT: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF - ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK-NEXT: [[VRSQRTSSr:%[0-9]+]]:fr32 = ninf afn VRSQRTSSr killed [[DEF]], [[COPY]] ; CHECK-NEXT: [[VMULSSrr:%[0-9]+]]:fr32 = ninf afn nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr ; CHECK-NEXT: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s32) from constant-pool) ; CHECK-NEXT: [[VFMADD213SSr:%[0-9]+]]:fr32 = ninf afn nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]], implicit $mxcsr diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index b2064b1..02d4d88 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -181,40 +181,38 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; CHECK-LABEL: segmentedStack: ; CHECK: ## %bb.0: ; CHECK-NEXT: cmpq %gs:816, %rsp -; CHECK-NEXT: jbe LBB3_6 +; CHECK-NEXT: jbe LBB3_7 ; CHECK-NEXT: LBB3_1: ## %entry ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: sete %al -; CHECK-NEXT: testq %rsi, %rsi -; CHECK-NEXT: sete %cl -; CHECK-NEXT: orb %al, %cl ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: orq %rsi, %rax ; CHECK-NEXT: sete %al -; CHECK-NEXT: testb %cl, %cl -; CHECK-NEXT: jne LBB3_4 -; CHECK-NEXT: ## %bb.2: ## %if.end4.i +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je LBB3_5 +; CHECK-NEXT: ## %bb.2: ## %entry +; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: je LBB3_5 +; CHECK-NEXT: ## %bb.3: ## %if.end4.i ; CHECK-NEXT: movq 8(%rdi), %rdx ; CHECK-NEXT: cmpq 8(%rsi), %rdx -; CHECK-NEXT: jne LBB3_5 -; CHECK-NEXT: ## %bb.3: ## %land.rhs.i.i +; CHECK-NEXT: jne LBB3_6 +; CHECK-NEXT: ## %bb.4: ## %land.rhs.i.i ; CHECK-NEXT: movq (%rsi), %rsi ; CHECK-NEXT: movq (%rdi), %rdi ; CHECK-NEXT: callq _memcmp ; CHECK-NEXT: testl %eax, %eax ; CHECK-NEXT: sete %al -; CHECK-NEXT: LBB3_4: ## %__go_ptr_strings_equal.exit +; CHECK-NEXT: LBB3_5: ## %__go_ptr_strings_equal.exit ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_5: +; CHECK-NEXT: LBB3_6: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq -; CHECK-NEXT: LBB3_6: +; CHECK-NEXT: LBB3_7: ; CHECK-NEXT: movl $8, %r10d ; CHECK-NEXT: movl $0, %r11d ; CHECK-NEXT: callq ___morestack @@ -224,43 +222,41 @@ define zeroext i1 @segmentedStack(ptr readonly %vk1, ptr readonly %vk2, i64 %key ; NOCOMPACTUNWIND-LABEL: segmentedStack: ; NOCOMPACTUNWIND: # %bb.0: ; NOCOMPACTUNWIND-NEXT: cmpq %fs:112, %rsp -; NOCOMPACTUNWIND-NEXT: jbe .LBB3_6 +; NOCOMPACTUNWIND-NEXT: jbe .LBB3_7 ; NOCOMPACTUNWIND-NEXT: .LBB3_1: # %entry ; NOCOMPACTUNWIND-NEXT: pushq %rax ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 -; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi -; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi -; NOCOMPACTUNWIND-NEXT: sete %cl -; NOCOMPACTUNWIND-NEXT: orb %al, %cl ; NOCOMPACTUNWIND-NEXT: movq %rdi, %rax ; NOCOMPACTUNWIND-NEXT: orq %rsi, %rax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: testb %cl, %cl -; NOCOMPACTUNWIND-NEXT: jne .LBB3_4 -; NOCOMPACTUNWIND-NEXT: # %bb.2: # %if.end4.i +; NOCOMPACTUNWIND-NEXT: testq %rdi, %rdi +; NOCOMPACTUNWIND-NEXT: je .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.2: # %entry +; NOCOMPACTUNWIND-NEXT: testq %rsi, %rsi +; NOCOMPACTUNWIND-NEXT: je .LBB3_5 +; NOCOMPACTUNWIND-NEXT: # %bb.3: # %if.end4.i ; NOCOMPACTUNWIND-NEXT: movq 8(%rdi), %rdx ; NOCOMPACTUNWIND-NEXT: cmpq 8(%rsi), %rdx -; NOCOMPACTUNWIND-NEXT: jne .LBB3_5 -; NOCOMPACTUNWIND-NEXT: # %bb.3: # %land.rhs.i.i +; NOCOMPACTUNWIND-NEXT: jne .LBB3_6 +; NOCOMPACTUNWIND-NEXT: # %bb.4: # %land.rhs.i.i ; NOCOMPACTUNWIND-NEXT: movq (%rsi), %rsi ; NOCOMPACTUNWIND-NEXT: movq (%rdi), %rdi ; NOCOMPACTUNWIND-NEXT: callq memcmp@PLT ; NOCOMPACTUNWIND-NEXT: testl %eax, %eax ; NOCOMPACTUNWIND-NEXT: sete %al -; NOCOMPACTUNWIND-NEXT: .LBB3_4: # %__go_ptr_strings_equal.exit +; NOCOMPACTUNWIND-NEXT: .LBB3_5: # %__go_ptr_strings_equal.exit ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_5: +; NOCOMPACTUNWIND-NEXT: .LBB3_6: ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 16 ; NOCOMPACTUNWIND-NEXT: xorl %eax, %eax ; NOCOMPACTUNWIND-NEXT: # kill: def $al killed $al killed $eax ; NOCOMPACTUNWIND-NEXT: popq %rcx ; NOCOMPACTUNWIND-NEXT: .cfi_def_cfa_offset 8 ; NOCOMPACTUNWIND-NEXT: retq -; NOCOMPACTUNWIND-NEXT: .LBB3_6: +; NOCOMPACTUNWIND-NEXT: .LBB3_7: ; NOCOMPACTUNWIND-NEXT: movl $8, %r10d ; NOCOMPACTUNWIND-NEXT: movl $0, %r11d ; NOCOMPACTUNWIND-NEXT: callq __morestack diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll index ae8b2b3..2a7216f 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-funclet.ll @@ -23,7 +23,7 @@ declare i32 @dummyPersonality(...) define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr @__CxxFrameHandler3 { ; CHECK-INLINE-LABEL: define void @FuncletPersonality( -; CHECK-INLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__CxxFrameHandler3 { +; CHECK-INLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 { ; CHECK-INLINE-NEXT: entry: ; CHECK-INLINE-NEXT: [[TMP0:%.*]] = alloca i64, align 32 ; CHECK-INLINE-NEXT: store i64 0, ptr [[TMP0]], align 8 @@ -37,33 +37,26 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: br label [[TMP6]] ; CHECK-INLINE: 6: ; CHECK-INLINE-NEXT: [[TMP7:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[TMP4]] ] +; CHECK-INLINE-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-INLINE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]] -; CHECK-INLINE: 9: +; CHECK-INLINE: 10: ; CHECK-INLINE-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 8544, align 32 -; CHECK-INLINE-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; CHECK-INLINE-NEXT: br label [[TMP11]] ; CHECK-INLINE: 11: -; CHECK-INLINE-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP7]], [[TMP6]] ], [ [[TMP10]], [[TMP9]] ] -; CHECK-INLINE-NEXT: store i64 [[TMP12]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 -; CHECK-INLINE-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 32 -; CHECK-INLINE-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr -; CHECK-INLINE-NEXT: [[TMP15:%.*]] = add i64 [[TMP12]], 8480 -; CHECK-INLINE-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr -; CHECK-INLINE-NEXT: [[TMP17:%.*]] = add i64 [[TMP12]], 8496 -; CHECK-INLINE-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr -; CHECK-INLINE-NEXT: [[TMP19:%.*]] = add i64 [[TMP12]], 8512 -; CHECK-INLINE-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr -; CHECK-INLINE-NEXT: [[TMP21:%.*]] = add i64 [[TMP12]], 8528 -; CHECK-INLINE-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr -; CHECK-INLINE-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-INLINE-NEXT: [[TMP23:%.*]] = phi ptr [ [[TMP10]], [[TMP6]] ], [ [[MYALLOCA]], [[TMP9]] ] +; CHECK-INLINE-NEXT: store ptr [[TMP23]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 +; CHECK-INLINE-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP23]], i64 32 +; CHECK-INLINE-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8480 +; CHECK-INLINE-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8496 +; CHECK-INLINE-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8512 +; CHECK-INLINE-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8528 ; CHECK-INLINE-NEXT: store i64 1102416563, ptr [[TMP23]], align 8 -; CHECK-INLINE-NEXT: [[TMP24:%.*]] = add i64 [[TMP12]], 8 -; CHECK-INLINE-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr +; CHECK-INLINE-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP23]], i64 8 ; CHECK-INLINE-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP25]], align 8 -; CHECK-INLINE-NEXT: [[TMP26:%.*]] = add i64 [[TMP12]], 16 -; CHECK-INLINE-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr +; CHECK-INLINE-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP23]], i64 16 ; CHECK-INLINE-NEXT: store i64 ptrtoint (ptr @FuncletPersonality to i64), ptr [[TMP27]], align 8 +; CHECK-INLINE-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP23]] to i64 ; CHECK-INLINE-NEXT: [[TMP28:%.*]] = lshr i64 [[TMP12]], 3 ; CHECK-INLINE-NEXT: [[TMP29:%.*]] = add i64 [[TMP28]], [[TMP1]] ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_f1(i64 [[TMP29]], i64 4) @@ -87,21 +80,22 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_f3(i64 [[TMP38]], i64 1) ; CHECK-INLINE-NEXT: [[TMP39:%.*]] = add i64 [[TMP29]], 1066 ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_04(i64 [[TMP39]], i64 1) +; CHECK-INLINE-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP22]] to i64 ; CHECK-INLINE-NEXT: [[TMP40:%.*]] = lshr i64 [[TMP21]], 3 ; CHECK-INLINE-NEXT: [[TMP41:%.*]] = add i64 [[TMP40]], [[TMP1]] ; CHECK-INLINE-NEXT: [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr ; CHECK-INLINE-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP42]], align 1 ; CHECK-INLINE-NEXT: [[TMP44:%.*]] = icmp ne i8 [[TMP43]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP44]], label [[TMP45:%.*]], label [[TMP50:%.*]], !prof [[PROF1:![0-9]+]] -; CHECK-INLINE: 45: +; CHECK-INLINE: 39: ; CHECK-INLINE-NEXT: [[TMP46:%.*]] = and i64 [[TMP21]], 7 ; CHECK-INLINE-NEXT: [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8 ; CHECK-INLINE-NEXT: [[TMP48:%.*]] = icmp sge i8 [[TMP47]], [[TMP43]] ; CHECK-INLINE-NEXT: br i1 [[TMP48]], label [[TMP49:%.*]], label [[TMP50]] -; CHECK-INLINE: 49: +; CHECK-INLINE: 43: ; CHECK-INLINE-NEXT: call void @__asan_report_store1(i64 [[TMP21]]) #[[ATTR7:[0-9]+]] ; CHECK-INLINE-NEXT: unreachable -; CHECK-INLINE: 50: +; CHECK-INLINE: 44: ; CHECK-INLINE-NEXT: store volatile i8 0, ptr [[TMP22]], align 1 ; CHECK-INLINE-NEXT: [[TMP51:%.*]] = add i64 [[TMP29]], 1066 ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_f8(i64 [[TMP51]], i64 1) @@ -125,10 +119,10 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1 ; CHECK-INLINE-NEXT: [[TMP66:%.*]] = icmp ne i8 [[TMP65]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP66]], label [[TMP67:%.*]], label [[TMP68:%.*]] -; CHECK-INLINE: 67: +; CHECK-INLINE: 61: ; CHECK-INLINE-NEXT: call void @__asan_report_store8(i64 [[TMP59]]) #[[ATTR7]] ; CHECK-INLINE-NEXT: unreachable -; CHECK-INLINE: 68: +; CHECK-INLINE: 62: ; CHECK-INLINE-NEXT: store volatile i64 0, ptr [[TMP61]], align 8 ; CHECK-INLINE-NEXT: [[TMPCOPYI64:%.*]] = load i64, ptr [[TMP61]], align 8 ; CHECK-INLINE-NEXT: [[TMP69:%.*]] = and i64 [[TMPCOPYI64]], 31 @@ -150,15 +144,15 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1 ; CHECK-INLINE-NEXT: [[TMP84:%.*]] = icmp ne i8 [[TMP83]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP84]], label [[TMP85:%.*]], label [[TMP90:%.*]], !prof [[PROF1]] -; CHECK-INLINE: 85: +; CHECK-INLINE: 79: ; CHECK-INLINE-NEXT: [[TMP86:%.*]] = and i64 [[TMP77]], 7 ; CHECK-INLINE-NEXT: [[TMP87:%.*]] = trunc i64 [[TMP86]] to i8 ; CHECK-INLINE-NEXT: [[TMP88:%.*]] = icmp sge i8 [[TMP87]], [[TMP83]] ; CHECK-INLINE-NEXT: br i1 [[TMP88]], label [[TMP89:%.*]], label [[TMP90]] -; CHECK-INLINE: 89: +; CHECK-INLINE: 83: ; CHECK-INLINE-NEXT: call void @__asan_report_store1(i64 [[TMP77]]) #[[ATTR7]] ; CHECK-INLINE-NEXT: unreachable -; CHECK-INLINE: 90: +; CHECK-INLINE: 84: ; CHECK-INLINE-NEXT: store volatile i8 0, ptr [[TMP79]], align 1 ; CHECK-INLINE-NEXT: invoke void @MayThrowFunc() ; CHECK-INLINE-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[EHCLEANUP:%.*]] @@ -170,15 +164,15 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: store i64 1172321806, ptr [[TMP23]], align 8 ; CHECK-INLINE-NEXT: [[TMP93:%.*]] = icmp ne i64 [[TMP7]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP93]], label [[TMP94:%.*]], label [[TMP95:%.*]] -; CHECK-INLINE: 94: +; CHECK-INLINE: 88: ; CHECK-INLINE-NEXT: call void @__asan_stack_free_8(i64 [[TMP7]], i64 8544) ; CHECK-INLINE-NEXT: br label [[TMP97:%.*]] -; CHECK-INLINE: 95: +; CHECK-INLINE: 89: ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP29]], i64 4) ; CHECK-INLINE-NEXT: [[TMP96:%.*]] = add i64 [[TMP29]], 1028 ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP96]], i64 40) ; CHECK-INLINE-NEXT: br label [[TMP97]] -; CHECK-INLINE: 97: +; CHECK-INLINE: 91: ; CHECK-INLINE-NEXT: ret void ; CHECK-INLINE: ehcleanup: ; CHECK-INLINE-NEXT: [[TMP98:%.*]] = cleanuppad within none [] @@ -189,23 +183,27 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: [[TMP102:%.*]] = load i8, ptr [[TMP101]], align 1 ; CHECK-INLINE-NEXT: [[TMP103:%.*]] = icmp ne i8 [[TMP102]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP103]], label [[TMP104:%.*]], label [[TMP109:%.*]], !prof [[PROF1]] -; CHECK-INLINE: 104: +; CHECK-INLINE: 98: ; CHECK-INLINE-NEXT: [[TMP105:%.*]] = and i64 [[TMP54]], 7 ; CHECK-INLINE-NEXT: [[TMP106:%.*]] = trunc i64 [[TMP105]] to i8 ; CHECK-INLINE-NEXT: [[TMP107:%.*]] = icmp sge i8 [[TMP106]], [[TMP102]] ; CHECK-INLINE-NEXT: br i1 [[TMP107]], label [[TMP108:%.*]], label [[TMP109]] -; CHECK-INLINE: 108: +; CHECK-INLINE: 102: ; CHECK-INLINE-NEXT: call void @__asan_report_store1(i64 [[TMP54]]) #[[ATTR7]] [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: unreachable -; CHECK-INLINE: 109: +; CHECK-INLINE: 103: ; CHECK-INLINE-NEXT: store volatile i8 0, ptr [[TMP56]], align 1 ; CHECK-INLINE-NEXT: call void @__asan_poison_stack_memory(i64 [[TMP54]], i64 4) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: call void @DeInit(ptr [[TMP14]]) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: [[TMP110:%.*]] = call ptr @__asan_memset(ptr [[TMP16]], i32 0, i64 4) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: [[TMP111:%.*]] = call ptr @__asan_memcpy(ptr [[TMP18]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: [[TMP112:%.*]] = call ptr @__asan_memmove(ptr [[TMP20]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP98]]) ] +; CHECK-INLINE-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-INLINE-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP18]] to i64 ; CHECK-INLINE-NEXT: call void @__sanitizer_ptr_cmp(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP98]]) ] -; CHECK-INLINE-NEXT: call void @__sanitizer_ptr_sub(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP98]]) ] +; CHECK-INLINE-NEXT: [[ADDR1:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-INLINE-NEXT: [[ADDR2:%.*]] = ptrtoint ptr [[TMP18]] to i64 +; CHECK-INLINE-NEXT: call void @__sanitizer_ptr_sub(i64 [[ADDR1]], i64 [[ADDR2]]) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: [[TMP113:%.*]] = ptrtoint ptr [[PTRPARAM]] to i64 ; CHECK-INLINE-NEXT: [[TMP114:%.*]] = add i64 [[TMP113]], 7 ; CHECK-INLINE-NEXT: [[TMP115:%.*]] = inttoptr i64 [[TMP114]] to ptr @@ -216,27 +214,27 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: [[TMP120:%.*]] = load i8, ptr [[TMP119]], align 1 ; CHECK-INLINE-NEXT: [[TMP121:%.*]] = icmp ne i8 [[TMP120]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP121]], label [[TMP122:%.*]], label [[TMP127:%.*]], !prof [[PROF1]] -; CHECK-INLINE: 122: +; CHECK-INLINE: 118: ; CHECK-INLINE-NEXT: [[TMP123:%.*]] = and i64 [[TMP116]], 7 ; CHECK-INLINE-NEXT: [[TMP124:%.*]] = trunc i64 [[TMP123]] to i8 ; CHECK-INLINE-NEXT: [[TMP125:%.*]] = icmp sge i8 [[TMP124]], [[TMP120]] ; CHECK-INLINE-NEXT: br i1 [[TMP125]], label [[TMP126:%.*]], label [[TMP127]] -; CHECK-INLINE: 126: +; CHECK-INLINE: 122: ; CHECK-INLINE-NEXT: call void @__asan_report_store_n(i64 [[TMP116]], i64 8) #[[ATTR7]] [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: unreachable -; CHECK-INLINE: 127: +; CHECK-INLINE: 123: ; CHECK-INLINE-NEXT: [[TMP128:%.*]] = lshr i64 [[TMP114]], 3 ; CHECK-INLINE-NEXT: [[TMP129:%.*]] = add i64 [[TMP128]], [[TMP1]] ; CHECK-INLINE-NEXT: [[TMP130:%.*]] = inttoptr i64 [[TMP129]] to ptr ; CHECK-INLINE-NEXT: [[TMP131:%.*]] = load i8, ptr [[TMP130]], align 1 ; CHECK-INLINE-NEXT: [[TMP132:%.*]] = icmp ne i8 [[TMP131]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP132]], label [[TMP133:%.*]], label [[EHEXIT:%.*]], !prof [[PROF1]] -; CHECK-INLINE: 133: +; CHECK-INLINE: 129: ; CHECK-INLINE-NEXT: [[TMP134:%.*]] = and i64 [[TMP114]], 7 ; CHECK-INLINE-NEXT: [[TMP135:%.*]] = trunc i64 [[TMP134]] to i8 ; CHECK-INLINE-NEXT: [[TMP136:%.*]] = icmp sge i8 [[TMP135]], [[TMP131]] ; CHECK-INLINE-NEXT: br i1 [[TMP136]], label [[TMP137:%.*]], label [[EHEXIT]] -; CHECK-INLINE: 137: +; CHECK-INLINE: 133: ; CHECK-INLINE-NEXT: call void @__asan_report_store_n(i64 [[TMP114]], i64 8) #[[ATTR7]] [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: unreachable ; CHECK-INLINE: ehexit: @@ -249,19 +247,19 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-INLINE-NEXT: store i64 1172321806, ptr [[TMP23]], align 8 ; CHECK-INLINE-NEXT: [[TMP142:%.*]] = icmp ne i64 [[TMP7]], 0 ; CHECK-INLINE-NEXT: br i1 [[TMP142]], label [[TMP143:%.*]], label [[TMP144:%.*]] -; CHECK-INLINE: 143: +; CHECK-INLINE: 139: ; CHECK-INLINE-NEXT: call void @__asan_stack_free_8(i64 [[TMP7]], i64 8544) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: br label [[TMP146:%.*]] -; CHECK-INLINE: 144: +; CHECK-INLINE: 140: ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP29]], i64 4) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: [[TMP145:%.*]] = add i64 [[TMP29]], 1028 ; CHECK-INLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP145]], i64 40) [ "funclet"(token [[TMP98]]) ] ; CHECK-INLINE-NEXT: br label [[TMP146]] -; CHECK-INLINE: 146: +; CHECK-INLINE: 142: ; CHECK-INLINE-NEXT: cleanupret from [[TMP98]] unwind to caller ; ; CHECK-OUTLINE-LABEL: define void @FuncletPersonality( -; CHECK-OUTLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @__CxxFrameHandler3 { +; CHECK-OUTLINE-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @__CxxFrameHandler3 { ; CHECK-OUTLINE-NEXT: entry: ; CHECK-OUTLINE-NEXT: [[TMP0:%.*]] = alloca i64, align 32 ; CHECK-OUTLINE-NEXT: store i64 0, ptr [[TMP0]], align 8 @@ -275,37 +273,28 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-OUTLINE-NEXT: br label [[TMP6]] ; CHECK-OUTLINE: 6: ; CHECK-OUTLINE-NEXT: [[TMP7:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[TMP4]] ] +; CHECK-OUTLINE-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP7]] to ptr ; CHECK-OUTLINE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0 ; CHECK-OUTLINE-NEXT: br i1 [[TMP8]], label [[TMP9:%.*]], label [[TMP11:%.*]] -; CHECK-OUTLINE: 9: +; CHECK-OUTLINE: 10: ; CHECK-OUTLINE-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 8608, align 32 -; CHECK-OUTLINE-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; CHECK-OUTLINE-NEXT: br label [[TMP11]] ; CHECK-OUTLINE: 11: -; CHECK-OUTLINE-NEXT: [[TMP12:%.*]] = phi i64 [ [[TMP7]], [[TMP6]] ], [ [[TMP10]], [[TMP9]] ] -; CHECK-OUTLINE-NEXT: store i64 [[TMP12]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 -; CHECK-OUTLINE-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 32 -; CHECK-OUTLINE-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr -; CHECK-OUTLINE-NEXT: [[TMP15:%.*]] = add i64 [[TMP12]], 8480 -; CHECK-OUTLINE-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr -; CHECK-OUTLINE-NEXT: [[TMP17:%.*]] = add i64 [[TMP12]], 8496 -; CHECK-OUTLINE-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr -; CHECK-OUTLINE-NEXT: [[TMP19:%.*]] = add i64 [[TMP12]], 8512 -; CHECK-OUTLINE-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr -; CHECK-OUTLINE-NEXT: [[TMP21:%.*]] = add i64 [[TMP12]], 8528 -; CHECK-OUTLINE-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr -; CHECK-OUTLINE-NEXT: [[TMP23:%.*]] = add i64 [[TMP12]], 8544 -; CHECK-OUTLINE-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr -; CHECK-OUTLINE-NEXT: [[TMP25:%.*]] = add i64 [[TMP12]], 8560 -; CHECK-OUTLINE-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP25]] to ptr -; CHECK-OUTLINE-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP12]] to ptr +; CHECK-OUTLINE-NEXT: [[TMP27:%.*]] = phi ptr [ [[TMP10]], [[TMP6]] ], [ [[MYALLOCA]], [[TMP9]] ] +; CHECK-OUTLINE-NEXT: store ptr [[TMP27]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 +; CHECK-OUTLINE-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP27]], i64 32 +; CHECK-OUTLINE-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8480 +; CHECK-OUTLINE-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8496 +; CHECK-OUTLINE-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8512 +; CHECK-OUTLINE-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8528 +; CHECK-OUTLINE-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8544 +; CHECK-OUTLINE-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8560 ; CHECK-OUTLINE-NEXT: store i64 1102416563, ptr [[TMP27]], align 8 -; CHECK-OUTLINE-NEXT: [[TMP28:%.*]] = add i64 [[TMP12]], 8 -; CHECK-OUTLINE-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr +; CHECK-OUTLINE-NEXT: [[TMP29:%.*]] = getelementptr i8, ptr [[TMP27]], i64 8 ; CHECK-OUTLINE-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP29]], align 8 -; CHECK-OUTLINE-NEXT: [[TMP30:%.*]] = add i64 [[TMP12]], 16 -; CHECK-OUTLINE-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP30]] to ptr +; CHECK-OUTLINE-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[TMP27]], i64 16 ; CHECK-OUTLINE-NEXT: store i64 ptrtoint (ptr @FuncletPersonality to i64), ptr [[TMP31]], align 8 +; CHECK-OUTLINE-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP27]] to i64 ; CHECK-OUTLINE-NEXT: [[TMP32:%.*]] = lshr i64 [[TMP12]], 3 ; CHECK-OUTLINE-NEXT: [[TMP33:%.*]] = add i64 [[TMP32]], [[TMP1]] ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_f1(i64 [[TMP33]], i64 4) @@ -335,10 +324,12 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_f3(i64 [[TMP45]], i64 5) ; CHECK-OUTLINE-NEXT: [[TMP46:%.*]] = add i64 [[TMP33]], 1066 ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_04(i64 [[TMP46]], i64 1) +; CHECK-OUTLINE-NEXT: [[TMP21:%.*]] = ptrtoint ptr [[TMP22]] to i64 ; CHECK-OUTLINE-NEXT: call void @__asan_store1(i64 [[TMP21]]) ; CHECK-OUTLINE-NEXT: store volatile i8 0, ptr [[TMP22]], align 1 ; CHECK-OUTLINE-NEXT: [[TMP47:%.*]] = add i64 [[TMP33]], 1066 ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_f8(i64 [[TMP47]], i64 1) +; CHECK-OUTLINE-NEXT: [[TMP25:%.*]] = ptrtoint ptr [[TMP26]] to i64 ; CHECK-OUTLINE-NEXT: call void @__asan_store8(i64 [[TMP25]]) ; CHECK-OUTLINE-NEXT: store volatile i64 0, ptr [[TMP26]], align 8 ; CHECK-OUTLINE-NEXT: [[TMPCOPYI64:%.*]] = load i64, ptr [[TMP26]], align 8 @@ -367,22 +358,23 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-OUTLINE-NEXT: store i64 1172321806, ptr [[TMP27]], align 8 ; CHECK-OUTLINE-NEXT: [[TMP61:%.*]] = icmp ne i64 [[TMP7]], 0 ; CHECK-OUTLINE-NEXT: br i1 [[TMP61]], label [[TMP62:%.*]], label [[TMP63:%.*]] -; CHECK-OUTLINE: 62: +; CHECK-OUTLINE: 55: ; CHECK-OUTLINE-NEXT: call void @__asan_stack_free_8(i64 [[TMP7]], i64 8608) ; CHECK-OUTLINE-NEXT: br label [[TMP66:%.*]] -; CHECK-OUTLINE: 63: +; CHECK-OUTLINE: 56: ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP33]], i64 4) ; CHECK-OUTLINE-NEXT: [[TMP64:%.*]] = add i64 [[TMP33]], 1028 ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP64]], i64 42) ; CHECK-OUTLINE-NEXT: [[TMP65:%.*]] = add i64 [[TMP33]], 1071 ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP65]], i64 5) ; CHECK-OUTLINE-NEXT: br label [[TMP66]] -; CHECK-OUTLINE: 66: +; CHECK-OUTLINE: 59: ; CHECK-OUTLINE-NEXT: ret void ; CHECK-OUTLINE: ehcleanup: ; CHECK-OUTLINE-NEXT: [[TMP67:%.*]] = cleanuppad within none [] ; CHECK-OUTLINE-NEXT: [[TMP68:%.*]] = add i64 [[TMP33]], 1068 ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_04(i64 [[TMP68]], i64 1) [ "funclet"(token [[TMP67]]) ] +; CHECK-OUTLINE-NEXT: [[TMP23:%.*]] = ptrtoint ptr [[TMP24]] to i64 ; CHECK-OUTLINE-NEXT: call void @__asan_store1(i64 [[TMP23]]) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: store volatile i8 0, ptr [[TMP24]], align 1 ; CHECK-OUTLINE-NEXT: [[TMP69:%.*]] = add i64 [[TMP33]], 1068 @@ -391,8 +383,12 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-OUTLINE-NEXT: [[TMP70:%.*]] = call ptr @__asan_memset(ptr [[TMP16]], i32 0, i64 4) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: [[TMP71:%.*]] = call ptr @__asan_memcpy(ptr [[TMP18]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: [[TMP72:%.*]] = call ptr @__asan_memmove(ptr [[TMP20]], ptr [[TMP16]], i64 4) [ "funclet"(token [[TMP67]]) ] +; CHECK-OUTLINE-NEXT: [[TMP15:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-OUTLINE-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[TMP18]] to i64 ; CHECK-OUTLINE-NEXT: call void @__sanitizer_ptr_cmp(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP67]]) ] -; CHECK-OUTLINE-NEXT: call void @__sanitizer_ptr_sub(i64 [[TMP15]], i64 [[TMP17]]) [ "funclet"(token [[TMP67]]) ] +; CHECK-OUTLINE-NEXT: [[ADDR1:%.*]] = ptrtoint ptr [[TMP16]] to i64 +; CHECK-OUTLINE-NEXT: [[ADDR2:%.*]] = ptrtoint ptr [[TMP18]] to i64 +; CHECK-OUTLINE-NEXT: call void @__sanitizer_ptr_sub(i64 [[ADDR1]], i64 [[ADDR2]]) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: [[TMP73:%.*]] = ptrtoint ptr [[PTRPARAM]] to i64 ; CHECK-OUTLINE-NEXT: call void @__asan_storeN(i64 [[TMP73]], i64 8) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: store i64 0, ptr [[PTRPARAM]], align 1 @@ -404,17 +400,17 @@ define void @FuncletPersonality(ptr %ptrParam) sanitize_address personality ptr ; CHECK-OUTLINE-NEXT: store i64 1172321806, ptr [[TMP27]], align 8 ; CHECK-OUTLINE-NEXT: [[TMP78:%.*]] = icmp ne i64 [[TMP7]], 0 ; CHECK-OUTLINE-NEXT: br i1 [[TMP78]], label [[TMP79:%.*]], label [[TMP80:%.*]] -; CHECK-OUTLINE: 79: +; CHECK-OUTLINE: 75: ; CHECK-OUTLINE-NEXT: call void @__asan_stack_free_8(i64 [[TMP7]], i64 8608) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: br label [[TMP83:%.*]] -; CHECK-OUTLINE: 80: +; CHECK-OUTLINE: 76: ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP33]], i64 4) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: [[TMP81:%.*]] = add i64 [[TMP33]], 1028 ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP81]], i64 42) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: [[TMP82:%.*]] = add i64 [[TMP33]], 1071 ; CHECK-OUTLINE-NEXT: call void @__asan_set_shadow_00(i64 [[TMP82]], i64 5) [ "funclet"(token [[TMP67]]) ] ; CHECK-OUTLINE-NEXT: br label [[TMP83]] -; CHECK-OUTLINE: 83: +; CHECK-OUTLINE: 79: ; CHECK-OUTLINE-NEXT: cleanupret from [[TMP67]] unwind to caller ; @@ -487,7 +483,7 @@ nopredecessor: ; Non-Windows personality, ensure no funclet gets attached to asan runtime call. define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @dummyPersonality { ; CHECK-LABEL: define void @OtherPersonality( -; CHECK-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR3:[0-9]+]] personality ptr @dummyPersonality { +; CHECK-SAME: ptr [[PTRPARAM:%.*]]) #[[ATTR2:[0-9]+]] personality ptr @dummyPersonality { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr @__asan_shadow_memory_dynamic_address, align 8 ; CHECK-NEXT: [[ASAN_LOCAL_STACK_BASE:%.*]] = alloca i64, align 8 @@ -499,25 +495,22 @@ define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @d ; CHECK-NEXT: br label [[TMP5]] ; CHECK: 5: ; CHECK-NEXT: [[TMP6:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP4]], [[TMP3]] ] +; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[TMP7]], label [[TMP8:%.*]], label [[TMP10:%.*]] -; CHECK: 8: +; CHECK: 9: ; CHECK-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32 -; CHECK-NEXT: [[TMP9:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; CHECK-NEXT: br label [[TMP10]] ; CHECK: 10: -; CHECK-NEXT: [[TMP11:%.*]] = phi i64 [ [[TMP6]], [[TMP5]] ], [ [[TMP9]], [[TMP8]] ] -; CHECK-NEXT: store i64 [[TMP11]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 -; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 32 -; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr -; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP11]] to ptr +; CHECK-NEXT: [[TMP14:%.*]] = phi ptr [ [[TMP9]], [[TMP5]] ], [ [[MYALLOCA]], [[TMP8]] ] +; CHECK-NEXT: store ptr [[TMP14]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP14]], i64 32 ; CHECK-NEXT: store i64 1102416563, ptr [[TMP14]], align 8 -; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP11]], 8 -; CHECK-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[TMP14]], i64 8 ; CHECK-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack.1 to i64), ptr [[TMP16]], align 8 -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[TMP11]], 16 -; CHECK-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP14]], i64 16 ; CHECK-NEXT: store i64 ptrtoint (ptr @OtherPersonality to i64), ptr [[TMP18]], align 8 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP14]] to i64 ; CHECK-NEXT: [[TMP19:%.*]] = lshr i64 [[TMP11]], 3 ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], [[TMP0]] ; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[TMP20]], 0 @@ -532,14 +525,14 @@ define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @d ; CHECK-NEXT: store i64 1172321806, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP24:%.*]] = icmp ne i64 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[TMP24]], label [[TMP25:%.*]], label [[TMP26:%.*]] -; CHECK: 25: +; CHECK: 22: ; CHECK-NEXT: call void @__asan_stack_free_0(i64 [[TMP6]], i64 64) ; CHECK-NEXT: br label [[TMP28:%.*]] -; CHECK: 26: +; CHECK: 23: ; CHECK-NEXT: [[TMP27:%.*]] = add i64 [[TMP20]], 0 ; CHECK-NEXT: call void @__asan_set_shadow_00(i64 [[TMP27]], i64 8) ; CHECK-NEXT: br label [[TMP28]] -; CHECK: 28: +; CHECK: 25: ; CHECK-NEXT: ret void ; CHECK: ehcleanup: ; CHECK-NEXT: [[TMP29:%.*]] = cleanuppad within none [] @@ -547,14 +540,14 @@ define void @OtherPersonality(ptr %ptrParam) sanitize_address personality ptr @d ; CHECK-NEXT: store i64 1172321806, ptr [[TMP14]], align 8 ; CHECK-NEXT: [[TMP31:%.*]] = icmp ne i64 [[TMP6]], 0 ; CHECK-NEXT: br i1 [[TMP31]], label [[TMP32:%.*]], label [[TMP33:%.*]] -; CHECK: 32: +; CHECK: 29: ; CHECK-NEXT: call void @__asan_stack_free_0(i64 [[TMP6]], i64 64) ; CHECK-NEXT: br label [[TMP35:%.*]] -; CHECK: 33: +; CHECK: 30: ; CHECK-NEXT: [[TMP34:%.*]] = add i64 [[TMP20]], 0 ; CHECK-NEXT: call void @__asan_set_shadow_00(i64 [[TMP34]], i64 8) ; CHECK-NEXT: br label [[TMP35]] -; CHECK: 35: +; CHECK: 32: ; CHECK-NEXT: cleanupret from [[TMP29]] unwind to caller ; entry: diff --git a/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll b/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll index 3cccabb1..1b00cd8 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/fake-stack.ll @@ -20,24 +20,20 @@ define void @Simple() uwtable sanitize_address { ; NEVER-LABEL: @Simple( ; NEVER-NEXT: entry: ; NEVER-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32 -; NEVER-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 -; NEVER-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 32 -; NEVER-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr -; NEVER-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP0]] to ptr -; NEVER-NEXT: store i64 1102416563, ptr [[TMP3]], align 8 -; NEVER-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], 8 -; NEVER-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; NEVER-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32 +; NEVER-NEXT: store i64 1102416563, ptr [[MYALLOCA]], align 8 +; NEVER-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8 ; NEVER-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP5]], align 8 -; NEVER-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 16 -; NEVER-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; NEVER-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16 ; NEVER-NEXT: store i64 ptrtoint (ptr @Simple to i64), ptr [[TMP7]], align 8 +; NEVER-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; NEVER-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP0]], 3 ; NEVER-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 2147450880 ; NEVER-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 0 ; NEVER-NEXT: [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr ; NEVER-NEXT: store i64 -868083113472691727, ptr [[TMP11]], align 1 ; NEVER-NEXT: call void @Foo(ptr [[TMP2]]) -; NEVER-NEXT: store i64 1172321806, ptr [[TMP3]], align 8 +; NEVER-NEXT: store i64 1172321806, ptr [[MYALLOCA]], align 8 ; NEVER-NEXT: [[TMP12:%.*]] = add i64 [[TMP9]], 0 ; NEVER-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr ; NEVER-NEXT: store i64 0, ptr [[TMP13]], align 1 @@ -54,25 +50,22 @@ define void @Simple() uwtable sanitize_address { ; RUNTIME-NEXT: br label [[TMP4]] ; RUNTIME: 4: ; RUNTIME-NEXT: [[TMP5:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[TMP2]] ] +; RUNTIME-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP5]] to ptr ; RUNTIME-NEXT: [[TMP6:%.*]] = icmp eq i64 [[TMP5]], 0 ; RUNTIME-NEXT: br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP9:%.*]] -; RUNTIME: 7: +; RUNTIME: 8: ; RUNTIME-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32 -; RUNTIME-NEXT: [[TMP8:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; RUNTIME-NEXT: br label [[TMP9]] ; RUNTIME: 9: -; RUNTIME-NEXT: [[TMP10:%.*]] = phi i64 [ [[TMP5]], [[TMP4]] ], [ [[TMP8]], [[TMP7]] ] -; RUNTIME-NEXT: store i64 [[TMP10]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 -; RUNTIME-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 32 -; RUNTIME-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr -; RUNTIME-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP10]] to ptr +; RUNTIME-NEXT: [[TMP13:%.*]] = phi ptr [ [[TMP8]], [[TMP4]] ], [ [[MYALLOCA]], [[TMP7]] ] +; RUNTIME-NEXT: store ptr [[TMP13]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 +; RUNTIME-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP13]], i64 32 ; RUNTIME-NEXT: store i64 1102416563, ptr [[TMP13]], align 8 -; RUNTIME-NEXT: [[TMP14:%.*]] = add i64 [[TMP10]], 8 -; RUNTIME-NEXT: [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr +; RUNTIME-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP13]], i64 8 ; RUNTIME-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP15]], align 8 -; RUNTIME-NEXT: [[TMP16:%.*]] = add i64 [[TMP10]], 16 -; RUNTIME-NEXT: [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr +; RUNTIME-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP13]], i64 16 ; RUNTIME-NEXT: store i64 ptrtoint (ptr @Simple to i64), ptr [[TMP17]], align 8 +; RUNTIME-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[TMP13]] to i64 ; RUNTIME-NEXT: [[TMP18:%.*]] = lshr i64 [[TMP10]], 3 ; RUNTIME-NEXT: [[TMP19:%.*]] = add i64 [[TMP18]], 2147450880 ; RUNTIME-NEXT: [[TMP20:%.*]] = add i64 [[TMP19]], 0 @@ -82,47 +75,43 @@ define void @Simple() uwtable sanitize_address { ; RUNTIME-NEXT: store i64 1172321806, ptr [[TMP13]], align 8 ; RUNTIME-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP5]], 0 ; RUNTIME-NEXT: br i1 [[TMP22]], label [[TMP23:%.*]], label [[TMP30:%.*]] -; RUNTIME: 23: +; RUNTIME: 20: ; RUNTIME-NEXT: [[TMP24:%.*]] = add i64 [[TMP19]], 0 ; RUNTIME-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr ; RUNTIME-NEXT: store i64 -723401728380766731, ptr [[TMP25]], align 1 -; RUNTIME-NEXT: [[TMP26:%.*]] = add i64 [[TMP5]], 56 -; RUNTIME-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr +; RUNTIME-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP8]], i64 56 ; RUNTIME-NEXT: [[TMP28:%.*]] = load i64, ptr [[TMP27]], align 8 ; RUNTIME-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr ; RUNTIME-NEXT: store i8 0, ptr [[TMP29]], align 1 ; RUNTIME-NEXT: br label [[TMP33:%.*]] -; RUNTIME: 30: +; RUNTIME: 26: ; RUNTIME-NEXT: [[TMP31:%.*]] = add i64 [[TMP19]], 0 ; RUNTIME-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr ; RUNTIME-NEXT: store i64 0, ptr [[TMP32]], align 1 ; RUNTIME-NEXT: br label [[TMP33]] -; RUNTIME: 33: +; RUNTIME: 29: ; RUNTIME-NEXT: ret void ; ; ALWAYS-LABEL: @Simple( ; ALWAYS-NEXT: entry: ; ALWAYS-NEXT: [[ASAN_LOCAL_STACK_BASE:%.*]] = alloca i64, align 8 ; ALWAYS-NEXT: [[TMP0:%.*]] = call i64 @__asan_stack_malloc_always_0(i64 64) +; ALWAYS-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP0]] to ptr ; ALWAYS-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 0 ; ALWAYS-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP4:%.*]] -; ALWAYS: 2: +; ALWAYS: 3: ; ALWAYS-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32 -; ALWAYS-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; ALWAYS-NEXT: br label [[TMP4]] ; ALWAYS: 4: -; ALWAYS-NEXT: [[TMP5:%.*]] = phi i64 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP3]], [[TMP2]] ] -; ALWAYS-NEXT: store i64 [[TMP5]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 -; ALWAYS-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 32 -; ALWAYS-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr -; ALWAYS-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP5]] to ptr +; ALWAYS-NEXT: [[TMP8:%.*]] = phi ptr [ [[TMP3]], [[ENTRY:%.*]] ], [ [[MYALLOCA]], [[TMP2]] ] +; ALWAYS-NEXT: store ptr [[TMP8]], ptr [[ASAN_LOCAL_STACK_BASE]], align 8 +; ALWAYS-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP8]], i64 32 ; ALWAYS-NEXT: store i64 1102416563, ptr [[TMP8]], align 8 -; ALWAYS-NEXT: [[TMP9:%.*]] = add i64 [[TMP5]], 8 -; ALWAYS-NEXT: [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr +; ALWAYS-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i64 8 ; ALWAYS-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP10]], align 8 -; ALWAYS-NEXT: [[TMP11:%.*]] = add i64 [[TMP5]], 16 -; ALWAYS-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr +; ALWAYS-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP8]], i64 16 ; ALWAYS-NEXT: store i64 ptrtoint (ptr @Simple to i64), ptr [[TMP12]], align 8 +; ALWAYS-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[TMP8]] to i64 ; ALWAYS-NEXT: [[TMP13:%.*]] = lshr i64 [[TMP5]], 3 ; ALWAYS-NEXT: [[TMP14:%.*]] = add i64 [[TMP13]], 2147450880 ; ALWAYS-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], 0 @@ -132,22 +121,21 @@ define void @Simple() uwtable sanitize_address { ; ALWAYS-NEXT: store i64 1172321806, ptr [[TMP8]], align 8 ; ALWAYS-NEXT: [[TMP17:%.*]] = icmp ne i64 [[TMP0]], 0 ; ALWAYS-NEXT: br i1 [[TMP17]], label [[TMP18:%.*]], label [[TMP25:%.*]] -; ALWAYS: 18: +; ALWAYS: 15: ; ALWAYS-NEXT: [[TMP19:%.*]] = add i64 [[TMP14]], 0 ; ALWAYS-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr ; ALWAYS-NEXT: store i64 -723401728380766731, ptr [[TMP20]], align 1 -; ALWAYS-NEXT: [[TMP21:%.*]] = add i64 [[TMP0]], 56 -; ALWAYS-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP21]] to ptr +; ALWAYS-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP3]], i64 56 ; ALWAYS-NEXT: [[TMP23:%.*]] = load i64, ptr [[TMP22]], align 8 ; ALWAYS-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr ; ALWAYS-NEXT: store i8 0, ptr [[TMP24]], align 1 ; ALWAYS-NEXT: br label [[TMP28:%.*]] -; ALWAYS: 25: +; ALWAYS: 21: ; ALWAYS-NEXT: [[TMP26:%.*]] = add i64 [[TMP14]], 0 ; ALWAYS-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr ; ALWAYS-NEXT: store i64 0, ptr [[TMP27]], align 1 ; ALWAYS-NEXT: br label [[TMP28]] -; ALWAYS: 28: +; ALWAYS: 24: ; ALWAYS-NEXT: ret void ; entry: @@ -160,17 +148,13 @@ define void @Huge() uwtable sanitize_address { ; CHECK-LABEL: @Huge( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 100288, align 32 -; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 32 -; CHECK-NEXT: [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr -; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP0]] to ptr -; CHECK-NEXT: store i64 1102416563, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[TMP0]], 8 -; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32 +; CHECK-NEXT: store i64 1102416563, ptr [[MYALLOCA]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8 ; CHECK-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack.1 to i64), ptr [[TMP5]], align 8 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP0]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16 ; CHECK-NEXT: store i64 ptrtoint (ptr @Huge to i64), ptr [[TMP7]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP0]], 3 ; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 2147450880 ; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 0 @@ -189,7 +173,7 @@ define void @Huge() uwtable sanitize_address { ; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr ; CHECK-NEXT: store i64 -868082074056920077, ptr [[TMP19]], align 1 ; CHECK-NEXT: call void @Foo(ptr [[TMP2]]) -; CHECK-NEXT: store i64 1172321806, ptr [[TMP3]], align 8 +; CHECK-NEXT: store i64 1172321806, ptr [[MYALLOCA]], align 8 ; CHECK-NEXT: [[TMP20:%.*]] = add i64 [[TMP9]], 0 ; CHECK-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr ; CHECK-NEXT: store i32 0, ptr [[TMP21]], align 1 diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll index d1e0180..82e114e 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll @@ -16,17 +16,13 @@ define void @lifetime() sanitize_address { ; CHECK-DEFAULT-NEXT: [[TMP1:%.*]] = alloca i64, align 32 ; CHECK-DEFAULT-NEXT: store i64 0, ptr [[TMP1]], align 8 ; CHECK-DEFAULT-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32 -; CHECK-DEFAULT-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 -; CHECK-DEFAULT-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 32 -; CHECK-DEFAULT-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr -; CHECK-DEFAULT-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP2]] to ptr -; CHECK-DEFAULT-NEXT: store i64 1102416563, ptr [[TMP5]], align 8 -; CHECK-DEFAULT-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 8 -; CHECK-DEFAULT-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32 +; CHECK-DEFAULT-NEXT: store i64 1102416563, ptr [[MYALLOCA]], align 8 +; CHECK-DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8 ; CHECK-DEFAULT-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP7]], align 8 -; CHECK-DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[TMP2]], 16 -; CHECK-DEFAULT-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr +; CHECK-DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16 ; CHECK-DEFAULT-NEXT: store i64 ptrtoint (ptr @lifetime to i64), ptr [[TMP9]], align 8 +; CHECK-DEFAULT-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; CHECK-DEFAULT-NEXT: [[TMP10:%.*]] = lshr i64 [[TMP2]], 3 ; CHECK-DEFAULT-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 2147450880 ; CHECK-DEFAULT-NEXT: [[TMP12:%.*]] = add i64 [[TMP11]], 0 @@ -41,16 +37,16 @@ define void @lifetime() sanitize_address { ; CHECK-DEFAULT-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr ; CHECK-DEFAULT-NEXT: [[TMP20:%.*]] = load i8, ptr [[TMP19]], align 1 ; CHECK-DEFAULT-NEXT: [[TMP21:%.*]] = icmp ne i8 [[TMP20]], 0 -; CHECK-DEFAULT-NEXT: br i1 [[TMP21]], label %[[BB22:.*]], label %[[BB27:.*]], !prof [[PROF1:![0-9]+]] -; CHECK-DEFAULT: [[BB22]]: +; CHECK-DEFAULT-NEXT: br i1 [[TMP21]], label %[[BB18:.*]], label %[[BB23:.*]], !prof [[PROF1:![0-9]+]] +; CHECK-DEFAULT: [[BB18]]: ; CHECK-DEFAULT-NEXT: [[TMP23:%.*]] = and i64 [[TMP16]], 7 ; CHECK-DEFAULT-NEXT: [[TMP24:%.*]] = trunc i64 [[TMP23]] to i8 ; CHECK-DEFAULT-NEXT: [[TMP25:%.*]] = icmp sge i8 [[TMP24]], [[TMP20]] -; CHECK-DEFAULT-NEXT: br i1 [[TMP25]], label %[[BB26:.*]], label %[[BB27]] -; CHECK-DEFAULT: [[BB26]]: +; CHECK-DEFAULT-NEXT: br i1 [[TMP25]], label %[[BB22:.*]], label %[[BB23]] +; CHECK-DEFAULT: [[BB22]]: ; CHECK-DEFAULT-NEXT: call void @__asan_report_store1(i64 [[TMP16]]) #[[ATTR4:[0-9]+]] ; CHECK-DEFAULT-NEXT: unreachable -; CHECK-DEFAULT: [[BB27]]: +; CHECK-DEFAULT: [[BB23]]: ; CHECK-DEFAULT-NEXT: store volatile i8 0, ptr [[TMP4]], align 1 ; CHECK-DEFAULT-NEXT: [[TMP28:%.*]] = add i64 [[TMP11]], 4 ; CHECK-DEFAULT-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr @@ -73,16 +69,16 @@ define void @lifetime() sanitize_address { ; CHECK-DEFAULT-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr ; CHECK-DEFAULT-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP41]], align 1 ; CHECK-DEFAULT-NEXT: [[TMP43:%.*]] = icmp ne i8 [[TMP42]], 0 -; CHECK-DEFAULT-NEXT: br i1 [[TMP43]], label %[[BB44:.*]], label %[[BB49:.*]], !prof [[PROF1]] -; CHECK-DEFAULT: [[BB44]]: +; CHECK-DEFAULT-NEXT: br i1 [[TMP43]], label %[[BB40:.*]], label %[[BB45:.*]], !prof [[PROF1]] +; CHECK-DEFAULT: [[BB40]]: ; CHECK-DEFAULT-NEXT: [[TMP45:%.*]] = and i64 [[TMP38]], 7 ; CHECK-DEFAULT-NEXT: [[TMP46:%.*]] = trunc i64 [[TMP45]] to i8 ; CHECK-DEFAULT-NEXT: [[TMP47:%.*]] = icmp sge i8 [[TMP46]], [[TMP42]] -; CHECK-DEFAULT-NEXT: br i1 [[TMP47]], label %[[BB48:.*]], label %[[BB49]] -; CHECK-DEFAULT: [[BB48]]: +; CHECK-DEFAULT-NEXT: br i1 [[TMP47]], label %[[BB44:.*]], label %[[BB45]] +; CHECK-DEFAULT: [[BB44]]: ; CHECK-DEFAULT-NEXT: call void @__asan_report_store1(i64 [[TMP38]]) #[[ATTR4]] ; CHECK-DEFAULT-NEXT: unreachable -; CHECK-DEFAULT: [[BB49]]: +; CHECK-DEFAULT: [[BB45]]: ; CHECK-DEFAULT-NEXT: store volatile i8 0, ptr [[TMP36]], align 1 ; CHECK-DEFAULT-NEXT: [[TMP50:%.*]] = ptrtoint ptr [[TMP36]] to i64 ; CHECK-DEFAULT-NEXT: call void @__asan_poison_stack_memory(i64 [[TMP50]], i64 40) @@ -95,16 +91,16 @@ define void @lifetime() sanitize_address { ; CHECK-DEFAULT-NEXT: [[TMP56:%.*]] = inttoptr i64 [[TMP55]] to ptr ; CHECK-DEFAULT-NEXT: [[TMP57:%.*]] = load i8, ptr [[TMP56]], align 1 ; CHECK-DEFAULT-NEXT: [[TMP58:%.*]] = icmp ne i8 [[TMP57]], 0 -; CHECK-DEFAULT-NEXT: br i1 [[TMP58]], label %[[BB59:.*]], label %[[BB64:.*]], !prof [[PROF1]] -; CHECK-DEFAULT: [[BB59]]: +; CHECK-DEFAULT-NEXT: br i1 [[TMP58]], label %[[BB55:.*]], label %[[BB60:.*]], !prof [[PROF1]] +; CHECK-DEFAULT: [[BB55]]: ; CHECK-DEFAULT-NEXT: [[TMP60:%.*]] = and i64 [[TMP53]], 7 ; CHECK-DEFAULT-NEXT: [[TMP61:%.*]] = trunc i64 [[TMP60]] to i8 ; CHECK-DEFAULT-NEXT: [[TMP62:%.*]] = icmp sge i8 [[TMP61]], [[TMP57]] -; CHECK-DEFAULT-NEXT: br i1 [[TMP62]], label %[[BB63:.*]], label %[[BB64]] -; CHECK-DEFAULT: [[BB63]]: +; CHECK-DEFAULT-NEXT: br i1 [[TMP62]], label %[[BB59:.*]], label %[[BB60]] +; CHECK-DEFAULT: [[BB59]]: ; CHECK-DEFAULT-NEXT: call void @__asan_report_store1(i64 [[TMP53]]) #[[ATTR4]] ; CHECK-DEFAULT-NEXT: unreachable -; CHECK-DEFAULT: [[BB64]]: +; CHECK-DEFAULT: [[BB60]]: ; CHECK-DEFAULT-NEXT: store volatile i8 0, ptr [[TMP4]], align 1 ; CHECK-DEFAULT-NEXT: [[TMP65:%.*]] = add i64 [[TMP11]], 4 ; CHECK-DEFAULT-NEXT: [[TMP66:%.*]] = inttoptr i64 [[TMP65]] to ptr @@ -112,7 +108,7 @@ define void @lifetime() sanitize_address { ; CHECK-DEFAULT-NEXT: [[TMP67:%.*]] = ptrtoint ptr [[TMP1]] to i64 ; CHECK-DEFAULT-NEXT: [[TMP68:%.*]] = load i64, ptr [[TMP1]], align 8 ; CHECK-DEFAULT-NEXT: call void @__asan_allocas_unpoison(i64 [[TMP68]], i64 [[TMP67]]) -; CHECK-DEFAULT-NEXT: store i64 1172321806, ptr [[TMP5]], align 8 +; CHECK-DEFAULT-NEXT: store i64 1172321806, ptr [[MYALLOCA]], align 8 ; CHECK-DEFAULT-NEXT: [[TMP69:%.*]] = add i64 [[TMP11]], 0 ; CHECK-DEFAULT-NEXT: [[TMP70:%.*]] = inttoptr i64 [[TMP69]] to ptr ; CHECK-DEFAULT-NEXT: store i64 0, ptr [[TMP70]], align 1 @@ -121,17 +117,13 @@ define void @lifetime() sanitize_address { ; CHECK-NO-DYNAMIC-LABEL: define void @lifetime( ; CHECK-NO-DYNAMIC-SAME: ) #[[ATTR0:[0-9]+]] { ; CHECK-NO-DYNAMIC-NEXT: [[MYALLOCA:%.*]] = alloca i8, i64 64, align 32 -; CHECK-NO-DYNAMIC-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 -; CHECK-NO-DYNAMIC-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 32 -; CHECK-NO-DYNAMIC-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr -; CHECK-NO-DYNAMIC-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP1]] to ptr -; CHECK-NO-DYNAMIC-NEXT: store i64 1102416563, ptr [[TMP4]], align 8 -; CHECK-NO-DYNAMIC-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 8 -; CHECK-NO-DYNAMIC-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NO-DYNAMIC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 32 +; CHECK-NO-DYNAMIC-NEXT: store i64 1102416563, ptr [[MYALLOCA]], align 8 +; CHECK-NO-DYNAMIC-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 8 ; CHECK-NO-DYNAMIC-NEXT: store i64 ptrtoint (ptr @___asan_gen_stack to i64), ptr [[TMP6]], align 8 -; CHECK-NO-DYNAMIC-NEXT: [[TMP7:%.*]] = add i64 [[TMP1]], 16 -; CHECK-NO-DYNAMIC-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr +; CHECK-NO-DYNAMIC-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[MYALLOCA]], i64 16 ; CHECK-NO-DYNAMIC-NEXT: store i64 ptrtoint (ptr @lifetime to i64), ptr [[TMP8]], align 8 +; CHECK-NO-DYNAMIC-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[MYALLOCA]] to i64 ; CHECK-NO-DYNAMIC-NEXT: [[TMP9:%.*]] = lshr i64 [[TMP1]], 3 ; CHECK-NO-DYNAMIC-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 2147450880 ; CHECK-NO-DYNAMIC-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 0 @@ -146,16 +138,16 @@ define void @lifetime() sanitize_address { ; CHECK-NO-DYNAMIC-NEXT: [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr ; CHECK-NO-DYNAMIC-NEXT: [[TMP19:%.*]] = load i8, ptr [[TMP18]], align 1 ; CHECK-NO-DYNAMIC-NEXT: [[TMP20:%.*]] = icmp ne i8 [[TMP19]], 0 -; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP20]], label %[[BB21:.*]], label %[[BB26:.*]], !prof [[PROF1:![0-9]+]] -; CHECK-NO-DYNAMIC: [[BB21]]: +; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP20]], label %[[BB17:.*]], label %[[BB22:.*]], !prof [[PROF1:![0-9]+]] +; CHECK-NO-DYNAMIC: [[BB17]]: ; CHECK-NO-DYNAMIC-NEXT: [[TMP22:%.*]] = and i64 [[TMP15]], 7 ; CHECK-NO-DYNAMIC-NEXT: [[TMP23:%.*]] = trunc i64 [[TMP22]] to i8 ; CHECK-NO-DYNAMIC-NEXT: [[TMP24:%.*]] = icmp sge i8 [[TMP23]], [[TMP19]] -; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP24]], label %[[BB25:.*]], label %[[BB26]] -; CHECK-NO-DYNAMIC: [[BB25]]: +; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP24]], label %[[BB21:.*]], label %[[BB22]] +; CHECK-NO-DYNAMIC: [[BB21]]: ; CHECK-NO-DYNAMIC-NEXT: call void @__asan_report_store1(i64 [[TMP15]]) #[[ATTR4:[0-9]+]] ; CHECK-NO-DYNAMIC-NEXT: unreachable -; CHECK-NO-DYNAMIC: [[BB26]]: +; CHECK-NO-DYNAMIC: [[BB22]]: ; CHECK-NO-DYNAMIC-NEXT: store volatile i8 0, ptr [[TMP3]], align 1 ; CHECK-NO-DYNAMIC-NEXT: [[TMP27:%.*]] = add i64 [[TMP10]], 4 ; CHECK-NO-DYNAMIC-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP27]] to ptr @@ -171,16 +163,16 @@ define void @lifetime() sanitize_address { ; CHECK-NO-DYNAMIC-NEXT: [[TMP34:%.*]] = inttoptr i64 [[TMP33]] to ptr ; CHECK-NO-DYNAMIC-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP34]], align 1 ; CHECK-NO-DYNAMIC-NEXT: [[TMP36:%.*]] = icmp ne i8 [[TMP35]], 0 -; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP36]], label %[[BB37:.*]], label %[[BB42:.*]], !prof [[PROF1]] -; CHECK-NO-DYNAMIC: [[BB37]]: +; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP36]], label %[[BB33:.*]], label %[[BB38:.*]], !prof [[PROF1]] +; CHECK-NO-DYNAMIC: [[BB33]]: ; CHECK-NO-DYNAMIC-NEXT: [[TMP38:%.*]] = and i64 [[TMP31]], 7 ; CHECK-NO-DYNAMIC-NEXT: [[TMP39:%.*]] = trunc i64 [[TMP38]] to i8 ; CHECK-NO-DYNAMIC-NEXT: [[TMP40:%.*]] = icmp sge i8 [[TMP39]], [[TMP35]] -; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP40]], label %[[BB41:.*]], label %[[BB42]] -; CHECK-NO-DYNAMIC: [[BB41]]: +; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP40]], label %[[BB37:.*]], label %[[BB38]] +; CHECK-NO-DYNAMIC: [[BB37]]: ; CHECK-NO-DYNAMIC-NEXT: call void @__asan_report_store1(i64 [[TMP31]]) #[[ATTR4]] ; CHECK-NO-DYNAMIC-NEXT: unreachable -; CHECK-NO-DYNAMIC: [[BB42]]: +; CHECK-NO-DYNAMIC: [[BB38]]: ; CHECK-NO-DYNAMIC-NEXT: store volatile i8 0, ptr [[ARR]], align 1 ; CHECK-NO-DYNAMIC-NEXT: call void @llvm.lifetime.end.p0(ptr [[ARR]]) ; CHECK-NO-DYNAMIC-NEXT: [[TMP43:%.*]] = add i64 [[TMP10]], 4 @@ -192,21 +184,21 @@ define void @lifetime() sanitize_address { ; CHECK-NO-DYNAMIC-NEXT: [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr ; CHECK-NO-DYNAMIC-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP48]], align 1 ; CHECK-NO-DYNAMIC-NEXT: [[TMP50:%.*]] = icmp ne i8 [[TMP49]], 0 -; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP50]], label %[[BB51:.*]], label %[[BB56:.*]], !prof [[PROF1]] -; CHECK-NO-DYNAMIC: [[BB51]]: +; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP50]], label %[[BB47:.*]], label %[[BB52:.*]], !prof [[PROF1]] +; CHECK-NO-DYNAMIC: [[BB47]]: ; CHECK-NO-DYNAMIC-NEXT: [[TMP52:%.*]] = and i64 [[TMP45]], 7 ; CHECK-NO-DYNAMIC-NEXT: [[TMP53:%.*]] = trunc i64 [[TMP52]] to i8 ; CHECK-NO-DYNAMIC-NEXT: [[TMP54:%.*]] = icmp sge i8 [[TMP53]], [[TMP49]] -; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP54]], label %[[BB55:.*]], label %[[BB56]] -; CHECK-NO-DYNAMIC: [[BB55]]: +; CHECK-NO-DYNAMIC-NEXT: br i1 [[TMP54]], label %[[BB51:.*]], label %[[BB52]] +; CHECK-NO-DYNAMIC: [[BB51]]: ; CHECK-NO-DYNAMIC-NEXT: call void @__asan_report_store1(i64 [[TMP45]]) #[[ATTR4]] ; CHECK-NO-DYNAMIC-NEXT: unreachable -; CHECK-NO-DYNAMIC: [[BB56]]: +; CHECK-NO-DYNAMIC: [[BB52]]: ; CHECK-NO-DYNAMIC-NEXT: store volatile i8 0, ptr [[TMP3]], align 1 ; CHECK-NO-DYNAMIC-NEXT: [[TMP57:%.*]] = add i64 [[TMP10]], 4 ; CHECK-NO-DYNAMIC-NEXT: [[TMP58:%.*]] = inttoptr i64 [[TMP57]] to ptr ; CHECK-NO-DYNAMIC-NEXT: store i8 -8, ptr [[TMP58]], align 1 -; CHECK-NO-DYNAMIC-NEXT: store i64 1172321806, ptr [[TMP4]], align 8 +; CHECK-NO-DYNAMIC-NEXT: store i64 1172321806, ptr [[MYALLOCA]], align 8 ; CHECK-NO-DYNAMIC-NEXT: [[TMP59:%.*]] = add i64 [[TMP10]], 0 ; CHECK-NO-DYNAMIC-NEXT: [[TMP60:%.*]] = inttoptr i64 [[TMP59]] to ptr ; CHECK-NO-DYNAMIC-NEXT: store i64 0, ptr [[TMP60]], align 1 diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll index afa46e44..8b0940d 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll @@ -16,9 +16,8 @@ entry: call void @llvm.dbg.declare(metadata ptr %i.addr, metadata !12, metadata !DIExpression()), !dbg !13 ; CHECK: %asan_local_stack_base = alloca i64 - ; CHECK: %[[ALLOCA:.*]] = ptrtoint ptr %MyAlloca to i64 - ; CHECK: %[[PHI:.*]] = phi i64 {{.*}} %[[ALLOCA]], - ; CHECK: store i64 %[[PHI]], ptr %asan_local_stack_base + ; CHECK: %[[PHI:.*]] = phi ptr {{.*}} %MyAlloca + ; CHECK: store ptr %[[PHI]], ptr %asan_local_stack_base ; CHECK: #dbg_declare(ptr %asan_local_stack_base, [[VAR_I:![0-9]+]], !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 32), [[LOC_I:![0-9]+]] %0 = load i32, ptr %i.addr, align 4, !dbg !14 %add = add nsw i32 %0, 2, !dbg !15 diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll index d85f217..7e14987 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll @@ -19,8 +19,7 @@ entry: ; CHECK-LABEL: foo ; CHECK: call i64 @__asan_stack_malloc ; CHECK: alloca i8, i64 {{.*}} align 64 -; CHECK: [[copyPtr:%[^ \t]+]] = inttoptr i64 %{{[^ \t]+}} to ptr -; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr]]{{[^%]+}} align 64 %a,{{[^,]+}}, +; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr:%[0-9]+]]{{[^%]+}} align 64 %a,{{[^,]+}}, ; CHECK: call i32 @bar(ptr [[copyPtr]]) ; CHECK: ret void @@ -38,8 +37,7 @@ entry: ; CHECK-LABEL: baz ; CHECK: call i64 @__asan_stack_malloc ; CHECK: alloca i8, i64 {{.*}} align 32 -; CHECK: [[copyPtr:%[^ \t]+]] = inttoptr i64 %{{[^ \t]+}} to ptr -; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr]]{{[^%]+}} align 4 %0,{{[^,]+}} +; CHECK: call void @llvm.memcpy{{[^%]+}}[[copyPtr:%[0-9]+]]{{[^%]+}} align 4 %0,{{[^,]+}} ; CHECK: call i32 @bar(ptr [[copyPtr]]) ; CHECK: ret void diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll index d56cd34..c8478c8 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll @@ -19,15 +19,16 @@ entry: ; CHECK-RUNTIME: [[FAKE_STACK_BB:^[0-9]+]]: ; CHECK-RUNTIME: [[FAKE_STACK:%[0-9]+]] = phi i64 [ 0, %entry ], [ [[FAKE_STACK_RT]], %[[UAR_ENABLED_BB]] ] +; CHECK-RUNTIME: [[FAKE_STACK_PTR:%[0-9]+]] = inttoptr i64 [[FAKE_STACK]] to ptr +; CHECK-ALWAYS: [[FAKE_STACK_PTR:%[0-9]+]] = inttoptr i64 [[FAKE_STACK_RT]] to ptr ; CHECK-RUNTIME: icmp eq i64 [[FAKE_STACK]], 0 ; CHECK-ALWAYS: icmp eq i64 [[FAKE_STACK_RT]], 0 ; CHECK: [[NO_FAKE_STACK_BB:^[0-9]+]]: ; CHECK: %MyAlloca = alloca i8, i64 -; CHECK: [[ALLOCA:%[0-9]+]] = ptrtoint ptr %MyAlloca -; CHECK-RUNTIME: phi i64 [ [[FAKE_STACK]], %[[FAKE_STACK_BB]] ], [ [[ALLOCA]], %[[NO_FAKE_STACK_BB]] ] -; CHECK-ALWAYS: phi i64 [ [[FAKE_STACK_RT]], %entry ], [ [[ALLOCA]], %[[NO_FAKE_STACK_BB]] ] +; CHECK-RUNTIME: phi ptr [ [[FAKE_STACK_PTR]], %[[FAKE_STACK_BB]] ], [ %MyAlloca, %[[NO_FAKE_STACK_BB]] ] +; CHECK-ALWAYS: phi ptr [ [[FAKE_STACK_PTR]], %entry ], [ %MyAlloca, %[[NO_FAKE_STACK_BB]] ] ; CHECK: ret void diff --git a/llvm/test/MC/AArch64/data-directive-specifier.s b/llvm/test/MC/AArch64/data-directive-specifier.s index 2cb7eb3..2d1ec4f 100644 --- a/llvm/test/MC/AArch64/data-directive-specifier.s +++ b/llvm/test/MC/AArch64/data-directive-specifier.s @@ -12,6 +12,7 @@ l: # CHECK-NEXT: 0x8 R_AARCH64_PLT32 extern 0x4 # CHECK-NEXT: 0xC R_AARCH64_PLT32 g 0x8 # CHECK-NEXT: 0x10 R_AARCH64_PLT32 g 0x18 +# CHECK-NEXT: 0x14 R_AARCH64_FUNCINIT64 .text 0x0 # CHECK-NEXT: } .data .word l@plt - . @@ -21,6 +22,8 @@ l: .word g@plt - . + 8 .word g@plt - .data + 8 +.quad l@funcinit + # CHECK: Section ({{.*}}) .rela.data1 { # CHECK-NEXT: 0x0 R_AARCH64_GOTPCREL32 data1 0x0 # CHECK-NEXT: 0x4 R_AARCH64_GOTPCREL32 extern 0x4 diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index 3eda077..475faf9 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -177,6 +177,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: static constexpr bool is_iterable = true; // CHECK-NEXT: }; // CHECK-NEXT: } // namespace llvm +// CHECK-EMPTY: // CHECK-NEXT: #endif // LLVM_Tdl_INC diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index a25197c..ccc0944 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -150,6 +150,7 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: static constexpr bool is_iterable = true; // CHECK-NEXT: }; // CHECK-NEXT: } // namespace llvm +// CHECK-EMPTY: // CHECK-NEXT: #endif // LLVM_Tdl_INC // IMPL: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll index fdab67a..afc98ce 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-constant-propagation.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading,sccp,simplifycfg -verify-dom-info=1 %s | FileCheck %s ; This test checks that a constant propagation is applied for a basic loop. ; Related to bug 44679. diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll index f45798b..5076517 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-analysis.ll @@ -1,6 +1,6 @@ ; REQUIRES: asserts -; RUN: opt -S -passes=dfa-jump-threading -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s -; RUN: opt -S -passes=dfa-jump-threading -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -debug-only=dfa-jump-threading -disable-output %s 2>&1 | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 -print-prof-data %s -o - | FileCheck %s --check-prefix=PROFILE ; This test checks that the analysis identifies all threadable paths in a ; simple CFG. A threadable path includes a list of basic blocks, the exit diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll index 092c854..426b51e 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-jump-threading-transform.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals -; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s ; These tests check that the DFA jump threading transformation is applied ; properly to two CFGs. It checks that blocks are cloned, branches are updated, @@ -445,9 +445,67 @@ bb2: ; preds = %select.unfold unreachable } + +define i16 @DTU_update_crash() { +; CHECK-LABEL: @DTU_update_crash( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY_SELECTBLOCK:%.*]] +; CHECK: for.body.selectblock: +; CHECK-NEXT: br i1 false, label [[SWITCHBLOCK_JT0:%.*]], label [[SEL_SI_UNFOLD_FALSE_JT0:%.*]] +; CHECK: sel.si.unfold.false: +; CHECK-NEXT: br label [[SWITCHBLOCK:%.*]] +; CHECK: sel.si.unfold.false.jt0: +; CHECK-NEXT: [[DOTSI_UNFOLD_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ] +; CHECK-NEXT: br label [[SWITCHBLOCK_JT0]] +; CHECK: switchblock: +; CHECK-NEXT: [[SWITCHBLOCK_PHI:%.*]] = phi i32 [ poison, [[SEL_SI_UNFOLD_FALSE:%.*]] ] +; CHECK-NEXT: [[P_24_ADDR_3:%.*]] = phi i32 [ 0, [[SEL_SI_UNFOLD_FALSE]] ] +; CHECK-NEXT: switch i32 [[SWITCHBLOCK_PHI]], label [[CLEANUP:%.*]] [ +; CHECK-NEXT: i32 0, label [[FOR_INC:%.*]] +; CHECK-NEXT: i32 1, label [[CLEANUP]] +; CHECK-NEXT: i32 5, label [[FOR_BODY_SELECTBLOCK]] +; CHECK-NEXT: ] +; CHECK: switchblock.jt0: +; CHECK-NEXT: [[SWITCHBLOCK_PHI_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ [[DOTSI_UNFOLD_PHI_JT0]], [[SEL_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: [[P_24_ADDR_3_JT0:%.*]] = phi i32 [ 0, [[FOR_BODY_SELECTBLOCK]] ], [ 0, [[SEL_SI_UNFOLD_FALSE_JT0]] ] +; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: br i1 false, label [[FOR_BODY_SELECTBLOCK]], label [[CLEANUP]] +; CHECK: cleanup: +; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[P_24_ADDR_3_JT0]]) +; CHECK-NEXT: ret i16 0 +; +entry: + br label %for.body.selectblock + +for.body.selectblock: ; preds = %for.inc, %switchblock, %entry + %sel = select i1 false, i32 0, i32 0 + br label %switchblock + +switchblock: ; preds = %for.body.selectblock + %switchblock.phi = phi i32 [ %sel, %for.body.selectblock ] + %p_24.addr.3 = phi i32 [ 0, %for.body.selectblock ] + switch i32 %switchblock.phi, label %cleanup [ + i32 0, label %for.inc + i32 1, label %cleanup + i32 5, label %for.body.selectblock + ] + +for.inc: ; preds = %switchblock + br i1 false, label %for.body.selectblock, label %cleanup + +cleanup: ; preds = %for.inc, %switchblock, %switchblock + call void (...) @llvm.fake.use(i32 %p_24.addr.3) + ret i16 0 +} + +declare void @llvm.fake.use(...) + !0 = !{!"function_entry_count", i32 10} !1 = !{!"branch_weights", i32 3, i32 5} ;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +;. ; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} ; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} ;. diff --git a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll index de38752..95d3ffa 100644 --- a/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll +++ b/llvm/test/Transforms/DFAJumpThreading/dfa-unfold-select.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -dfa-early-exit-heuristic=false -verify-dom-info=1 %s | FileCheck %s ; These tests check if selects are unfolded properly for jump threading ; opportunities. There are three different patterns to consider: diff --git a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll index 4555dfb..71a469d 100644 --- a/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll +++ b/llvm/test/Transforms/DFAJumpThreading/equivalent-states.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s declare void @do_something() declare void @user(i32) diff --git a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll index 00500a7..cc117e7 100644 --- a/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll +++ b/llvm/test/Transforms/DFAJumpThreading/single_succ_switch.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 -; RUN: opt -S -passes=dfa-jump-threading %s | FileCheck %s +; RUN: opt -S -passes=dfa-jump-threading -verify-dom-info=1 %s | FileCheck %s define void @pr60254() { ; CHECK-LABEL: define void @pr60254() { diff --git a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll index 89b132e..9371fe2 100644 --- a/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll +++ b/llvm/test/Transforms/IndVarSimplify/pointer-loop-guards.ll @@ -18,7 +18,7 @@ define i64 @test_ptr_compare_guard(ptr %start, ptr %end) { ; CHECK-NEXT: br i1 [[C_1]], label %[[LOOP_LATCH]], label %[[EXIT_LOOPEXIT:.*]] ; CHECK: [[LOOP_LATCH]]: ; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 1 -; CHECK-NEXT: [[I64_IV_NEXT]] = add i64 [[I64_IV]], 1 +; CHECK-NEXT: [[I64_IV_NEXT]] = add nuw i64 [[I64_IV]], 1 ; CHECK-NEXT: [[C_2:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] ; CHECK-NEXT: br i1 [[C_2]], label %[[EXIT_LOOPEXIT]], label %[[LOOP_HEADER]] ; CHECK: [[EXIT_LOOPEXIT]]: diff --git a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll index ba34930..bc988a9 100644 --- a/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll +++ b/llvm/test/Transforms/InstCombine/select-safe-impliedcond-transforms.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes=instcombine -S | FileCheck %s define i1 @a_true_implies_b_true(i8 %z, i1 %X, i1 %Y) { @@ -34,15 +34,15 @@ define <2 x i1> @a_true_implies_b_true_vec(i8 %z0, <2 x i1> %X, <2 x i1> %Y) { ret <2 x i1> %res } -define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) { +define i1 @a_true_implies_b_true2(i8 %z, i1 %X, i1 %Y) !prof !0 { ; CHECK-LABEL: @a_true_implies_b_true2( ; CHECK-NEXT: [[A:%.*]] = icmp ugt i8 [[Z:%.*]], 20 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false +; CHECK-NEXT: [[RES:%.*]] = select i1 [[A]], i1 [[X:%.*]], i1 false, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i1 [[RES]] ; %a = icmp ugt i8 %z, 20 %b = icmp ugt i8 %z, 10 - %sel = select i1 %b, i1 %X, i1 %Y + %sel = select i1 %b, i1 %X, i1 %Y, !prof !1 %res = and i1 %a, %sel ret i1 %res } @@ -258,3 +258,10 @@ define i1 @neg_icmp_eq_implies_trunc(i8 %x, i1 %c) { %sel2 = select i1 %cmp, i1 true, i1 %sel1 ret i1 %sel2 } + +!0 = !{!"function_entry_count", i64 1000} +!1 = !{!"branch_weights", i32 2, i32 3} +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i64 1000} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 2, i32 3} +;. diff --git a/llvm/test/Transforms/InstSimplify/ptrmask.ll b/llvm/test/Transforms/InstSimplify/ptrmask.ll index 5e7c636..a3483af 100644 --- a/llvm/test/Transforms/InstSimplify/ptrmask.ll +++ b/llvm/test/Transforms/InstSimplify/ptrmask.ll @@ -158,6 +158,26 @@ define ptr addrspace(1) @ptrmask_simplify_ptrmask_i32(ptr addrspace(1) %p) { ret ptr addrspace(1) %r } +define ptr @ptrmask_simplify_ptrtoaddr(ptr %p) { +; CHECK-LABEL: define ptr @ptrmask_simplify_ptrtoaddr +; CHECK-SAME: (ptr [[P:%.*]]) { +; CHECK-NEXT: ret ptr [[P]] +; + %m = ptrtoaddr ptr %p to i64 + %r = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m) + ret ptr %r +} + +define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32(ptr addrspace(1) %p) { +; CHECK-LABEL: define ptr addrspace(1) @ptrmask_simplify_ptrtoaddr_i32 +; CHECK-SAME: (ptr addrspace(1) [[P:%.*]]) { +; CHECK-NEXT: ret ptr addrspace(1) [[P]] +; + %m = ptrtoaddr ptr addrspace(1) %p to i32 + %r = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m) + ret ptr addrspace(1) %r +} + define ptr @ptrmask_simplify_aligned_unused(ptr align 64 %p) { ; CHECK-LABEL: define ptr @ptrmask_simplify_aligned_unused ; CHECK-SAME: (ptr align 64 [[P:%.*]]) { diff --git a/llvm/test/Transforms/LoopUnroll/scevunroll.ll b/llvm/test/Transforms/LoopUnroll/scevunroll.ll index fa55eab..bc63f79 100644 --- a/llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ b/llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -465,8 +465,7 @@ define void @peel_int_eq_condition(i32 %start) { ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT_PEEL]], [[ENTRY_PEEL_NEWPH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] -; CHECK-NEXT: [[C_0:%.*]] = icmp eq i32 [[IV]], [[START]] -; CHECK-NEXT: br i1 [[C_0]], label [[IF_THEN:%.*]], label [[LOOP_LATCH]] +; CHECK-NEXT: br i1 false, label [[IF_THEN:%.*]], label [[LOOP_LATCH]] ; CHECK: if.then: ; CHECK-NEXT: call void @fn(i32 [[IV]]) ; CHECK-NEXT: br label [[LOOP_LATCH]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll index 3a88273..56a5663 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll @@ -2697,4 +2697,4 @@ for.body: ; preds = %for.body.lr.ph, %fo !9 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} !10 = !{!"llvm.loop.vectorize.enable", i1 true} attributes #0 = { vscale_range(1,16) "target-features"="+sve" } -attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "cpu"="neoverse-v2" } +attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "target-cpu"="neoverse-v2" } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll index 3c2ae1c7..1e6bcb1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce.ll @@ -410,20 +410,32 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE3:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE8:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE9:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 +; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP8]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = zext <16 x i8> [[WIDE_LOAD5]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = zext <16 x i8> [[WIDE_LOAD6]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP4]]) ; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE3]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI1]], <16 x i32> [[TMP5]]) -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE8]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI2]], <16 x i32> [[TMP10]]) +; CHECK-INTERLEAVED-NEXT: [[PARTIAL_REDUCE9]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI3]], <16 x i32> [[TMP7]]) +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[PARTIAL_REDUCE3]], [[PARTIAL_REDUCE]] -; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX10:%.*]] = add <4 x i32> [[PARTIAL_REDUCE8]], [[BIN_RDX]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX11:%.*]] = add <4 x i32> [[PARTIAL_REDUCE9]], [[BIN_RDX10]] +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX11]]) ; CHECK-INTERLEAVED-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK-INTERLEAVED: scalar.ph: ; @@ -432,25 +444,20 @@ define i32 @zext_add_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-MAXBW-NEXT: entry: ; CHECK-MAXBW-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK-MAXBW: vector.ph: -; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() -; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 16 -; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 1025, [[TMP3]] -; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 1025, [[N_MOD_VF]] ; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK-MAXBW: vector.body: ; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] +; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[PARTIAL_REDUCE:%.*]], [[VECTOR_BODY]] ] ; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] -; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 16 x i8>, ptr [[TMP7]], align 1 -; CHECK-MAXBW-NEXT: [[TMP9:%.*]] = zext <vscale x 16 x i8> [[WIDE_LOAD]] to <vscale x 16 x i32> -; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <vscale x 4 x i32> @llvm.vector.partial.reduce.add.nxv4i32.nxv16i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 16 x i32> [[TMP9]]) -; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] -; CHECK-MAXBW-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 1 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> +; CHECK-MAXBW-NEXT: [[PARTIAL_REDUCE]] = call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> [[VEC_PHI]], <16 x i32> [[TMP1]]) +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-MAXBW-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 +; CHECK-MAXBW-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-MAXBW: middle.block: -; CHECK-MAXBW-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[PARTIAL_REDUCE]]) -; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 1025, [[N_VEC]] -; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH:%.*]] +; CHECK-MAXBW-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE]]) +; CHECK-MAXBW-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK-MAXBW: scalar.ph: ; entry: @@ -693,20 +700,32 @@ define i32 @zext_sub_reduc_i8_i32_has_neon_dotprod(ptr %a) #1 { ; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <16 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] ; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] ; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i32 16 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i32 32 +; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP1]], i32 48 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1 ; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP9]], align 1 ; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = zext <16 x i8> [[WIDE_LOAD]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[WIDE_LOAD2]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = zext <16 x i8> [[WIDE_LOAD5]] to <16 x i32> +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = zext <16 x i8> [[WIDE_LOAD6]] to <16 x i32> ; CHECK-INTERLEAVED-NEXT: [[TMP6]] = sub <16 x i32> [[VEC_PHI]], [[TMP4]] ; CHECK-INTERLEAVED-NEXT: [[TMP7]] = sub <16 x i32> [[VEC_PHI1]], [[TMP5]] -; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 +; CHECK-INTERLEAVED-NEXT: [[TMP10]] = sub <16 x i32> [[VEC_PHI2]], [[TMP12]] +; CHECK-INTERLEAVED-NEXT: [[TMP11]] = sub <16 x i32> [[VEC_PHI3]], [[TMP14]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 ; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-INTERLEAVED-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK-INTERLEAVED: middle.block: ; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <16 x i32> [[TMP7]], [[TMP6]] -; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX]]) +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX7:%.*]] = add <16 x i32> [[TMP10]], [[BIN_RDX]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX8:%.*]] = add <16 x i32> [[TMP11]], [[BIN_RDX7]] +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[BIN_RDX8]]) ; CHECK-INTERLEAVED-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK-INTERLEAVED: scalar.ph: ; @@ -1093,9 +1112,124 @@ exit: ret i32 %add.lcssa } +define i64 @sext_reduction_i32_to_i64(ptr %arr, i64 %n) #1 { +; CHECK-INTERLEAVE1-LABEL: define i64 @sext_reduction_i32_to_i64( +; CHECK-INTERLEAVE1-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CHECK-INTERLEAVE1-NEXT: entry: +; CHECK-INTERLEAVE1-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 2 +; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-INTERLEAVE1: vector.ph: +; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 2 +; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INTERLEAVE1: vector.body: +; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] +; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-INTERLEAVE1-NEXT: [[TMP2]] = add <2 x i64> [[VEC_PHI]], [[TMP1]] +; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-INTERLEAVE1: middle.block: +; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP2]]) +; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVE1: scalar.ph: +; +; CHECK-INTERLEAVED-LABEL: define i64 @sext_reduction_i32_to_i64( +; CHECK-INTERLEAVED-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CHECK-INTERLEAVED-NEXT: entry: +; CHECK-INTERLEAVED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 8 +; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-INTERLEAVED: vector.ph: +; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 8 +; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-INTERLEAVED: vector.body: +; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] +; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] +; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 2 +; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 4 +; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 6 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD4:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 +; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4 +; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[WIDE_LOAD4]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sext <2 x i32> [[WIDE_LOAD5]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext <2 x i32> [[WIDE_LOAD6]] to <2 x i64> +; CHECK-INTERLEAVED-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI]], [[TMP14]] +; CHECK-INTERLEAVED-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI1]], [[TMP5]] +; CHECK-INTERLEAVED-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI2]], [[TMP6]] +; CHECK-INTERLEAVED-NEXT: [[TMP11]] = add <2 x i64> [[VEC_PHI3]], [[TMP7]] +; CHECK-INTERLEAVED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-INTERLEAVED-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-INTERLEAVED: middle.block: +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX7:%.*]] = add <2 x i64> [[TMP10]], [[BIN_RDX]] +; CHECK-INTERLEAVED-NEXT: [[BIN_RDX8:%.*]] = add <2 x i64> [[TMP11]], [[BIN_RDX7]] +; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX8]]) +; CHECK-INTERLEAVED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-INTERLEAVED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-INTERLEAVED: scalar.ph: +; +; CHECK-MAXBW-LABEL: define i64 @sext_reduction_i32_to_i64( +; CHECK-MAXBW-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) #[[ATTR2]] { +; CHECK-MAXBW-NEXT: entry: +; CHECK-MAXBW-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1) +; CHECK-MAXBW-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 2 +; CHECK-MAXBW-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK-MAXBW: vector.ph: +; CHECK-MAXBW-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 2 +; CHECK-MAXBW-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]] +; CHECK-MAXBW-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK-MAXBW: vector.body: +; CHECK-MAXBW-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-MAXBW-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ] +; CHECK-MAXBW-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]] +; CHECK-MAXBW-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP4]], align 4 +; CHECK-MAXBW-NEXT: [[TMP1:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64> +; CHECK-MAXBW-NEXT: [[TMP2]] = add <2 x i64> [[VEC_PHI]], [[TMP1]] +; CHECK-MAXBW-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-MAXBW-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-MAXBW-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-MAXBW: middle.block: +; CHECK-MAXBW-NEXT: [[TMP5:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP2]]) +; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UMAX]], [[N_VEC]] +; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] +; CHECK-MAXBW: scalar.ph: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %acc = phi i64 [ 0, %entry ], [ %add, %loop ] + %gep = getelementptr inbounds i32, ptr %arr, i64 %iv + %load = load i32, ptr %gep + %sext = sext i32 %load to i64 + %add = add i64 %acc, %sext + %iv.next = add i64 %iv, 1 + %cmp = icmp ult i64 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i64 %add +} + !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} attributes #0 = { vscale_range(1,16) "target-features"="+sve" } -attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "cpu"="neoverse-v2" } +attributes #1 = { vscale_range(1,16) "target-features"="+neon,+dotprod,+sve" "target-cpu"="neoverse-v2" } attributes #2 = { "target-features"="+neon,+dotprod" } diff --git a/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll new file mode 100644 index 0000000..e4322cf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/reduction-minmax-users-and-predicated.ll @@ -0,0 +1,588 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=4 -S %s | FileCheck %s + +define i32 @umax_phi_used_outside(ptr %src, i32 %n) { +; CHECK-LABEL: define i32 @umax_phi_used_outside( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SPEC_SELECT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC]], align 1 +; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-NEXT: [[SPEC_SELECT]] = tail call i32 @llvm.umax.i32(i32 [[MAX]], i32 [[L_EXT]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_LCSSA:%.*]] = phi i32 [ [[MAX]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %spec.select, %loop ] + %gep.src = getelementptr inbounds i8, ptr %src, i32 %iv + %l = load i8, ptr %gep.src + %l.ext = zext i8 %l to i32 + %spec.select = tail call i32 @llvm.umax.i32(i32 %max, i32 %l.ext) + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max +} + +define i32 @chained_smax(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_smax( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 1, i8 2, i8 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[PRED_LOAD_CONTINUE6]] ] +; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i8> [[VEC_IND]], splat (i8 1) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[BROADCAST_SPLAT]], <4 x i32> [[VEC_PHI]]) +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]] +; CHECK: [[PRED_LOAD_IF]]: +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]] +; CHECK: [[PRED_LOAD_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ] +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2:.*]] +; CHECK: [[PRED_LOAD_IF1]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i32 1 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]] +; CHECK: [[PRED_LOAD_CONTINUE2]]: +; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ] +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2 +; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_LOAD_IF3:.*]], label %[[PRED_LOAD_CONTINUE4:.*]] +; CHECK: [[PRED_LOAD_IF3]]: +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4 +; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i32 2 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE4]] +; CHECK: [[PRED_LOAD_CONTINUE4]]: +; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], %[[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], %[[PRED_LOAD_IF3]] ] +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3 +; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_LOAD_IF5:.*]], label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_IF5]]: +; CHECK-NEXT: [[TMP21:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP22]], align 4 +; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i32 3 +; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]] +; CHECK: [[PRED_LOAD_CONTINUE6]]: +; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], %[[PRED_LOAD_IF5]] ] +; CHECK-NEXT: [[TMP26]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP25]], <4 x i32> [[TMP1]]) +; CHECK-NEXT: [[TMP27:%.*]] = select <4 x i1> [[TMP0]], <4 x i32> [[TMP26]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) +; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP28:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP27]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[TMP28]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max) + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %l, i32 %max.1) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + +define void @smax_with_invariant_store_user(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_invariant_store_user( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr2(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr2( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 0, ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 0, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_same_addr3(ptr noalias %src, ptr %dst, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_same_addr3( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP2]]) +; CHECK-NEXT: store i32 [[TMP4]], ptr [[DST]], align 4 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 0, ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 0, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define void @smax_with_multiple_invariant_store_user_different_addr(ptr noalias %src, ptr noalias %dst, ptr noalias %dst.2, i64 %n) { +; CHECK-LABEL: define void @smax_with_multiple_invariant_store_user_different_addr( +; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[DST_2:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: store i32 [[MAX_NEXT]], ptr [[DST_2]], align 4 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr inbounds i32, ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + store i32 %max.next, ptr %dst, align 4 + %iv.next = add i64 %iv, 1 + store i32 %max.next, ptr %dst.2, align 4 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret void +} + +define i32 @chained_instructions_feeding_max1(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_instructions_feeding_max1( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[MAX]], [[L]] +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 [[L]]) +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %l = load i32, ptr %gep.src, align 4 + %add = add i32 %max, %l + %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 %l) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + +define i32 @chained_instructions_feeding_max2(i32 %x, ptr %src) { +; CHECK-LABEL: define i32 @chained_instructions_feeding_max2( +; CHECK-SAME: i32 [[X:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[MAX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr [3 x i32], ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[MAX_1:%.*]] = tail call i32 @llvm.smax.i32(i32 [[X]], i32 [[MAX]]) +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[L]], [[MAX_1]] +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[ADD]], i32 100) +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1 +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %max = phi i32 [ 0, %entry ], [ %max.next, %loop ] + %gep.src = getelementptr [3 x i32], ptr %src, i64 %iv + %max.1 = tail call i32 @llvm.smax.i32(i32 %x, i32 %max) + %l = load i32, ptr %gep.src, align 4 + %add = add i32 %l, %max.1 + %max.next = tail call i32 @llvm.smax.i32(i32 %add, i32 100) + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 1 + br i1 %ec, label %exit, label %loop + +exit: + ret i32 %max.next +} + + +define i32 @test_predicated_smin(ptr %src) { +; CHECK-LABEL: define i32 @test_predicated_smin( +; CHECK-SAME: ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[PREDPHI:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr float, ptr [[SRC]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP0]], align 4 +; CHECK-NEXT: [[TMP1:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = fdiv <4 x float> [[WIDE_LOAD]], splat (float 3.000000e+00) +; CHECK-NEXT: [[TMP3:%.*]] = fptosi <4 x float> [[TMP2]] to <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP3]]) +; CHECK-NEXT: [[PREDPHI]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP4]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[PREDPHI]]) +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i32 [[TMP6]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %min = phi i32 [ 0, %entry ], [ %min.merge, %loop.latch ] + %gep.src = getelementptr float, ptr %src, i64 %iv + %l = load float, ptr %gep.src, align 4 + %c = fcmp une float %l, 0.0 + br i1 %c, label %then, label %loop.latch + +then: + %div = fdiv float %l, 3.0 + %div.i32 = fptosi float %div to i32 + %min.next = tail call i32 @llvm.smin.i32(i32 %min, i32 %div.i32) + br label %loop.latch + +loop.latch: + %min.merge = phi i32 [ %min.next, %then ], [ %min, %loop.header ] + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, 111 + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %min.merge +} + +define i32 @smax_reduction_multiple_incoming(ptr %src, i32 %n, i1 %cond) { +; CHECK-LABEL: define i32 @smax_reduction_multiple_incoming( +; CHECK-SAME: ptr [[SRC:%.*]], i32 [[N:%.*]], i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 [[COND]], label %[[LOOP_HEADER_PREHEADER:.*]], label %[[ELSE:.*]] +; CHECK: [[ELSE]]: +; CHECK-NEXT: br label %[[LOOP_HEADER_PREHEADER]] +; CHECK: [[LOOP_HEADER_PREHEADER]]: +; CHECK-NEXT: [[IV_PH:%.*]] = phi i32 [ 10, %[[ELSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[MAX_PH:%.*]] = phi i32 [ 5, %[[ELSE]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[IV_PH]] +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; CHECK: [[VECTOR_SCEVCHECK]]: +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[N]], [[IV_PH]] +; CHECK-NEXT: br i1 [[TMP2]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[IV_PH]], [[N_VEC]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[MAX_PH]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i32 [[IV_PH]], [[INDEX]] +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[OFFSET_IDX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4 +; CHECK-NEXT: [[TMP5]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> [[TMP5]]) +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[TMP3]], %[[MIDDLE_BLOCK]] ], [ [[IV_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[IV_PH]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ [[MAX_PH]], %[[LOOP_HEADER_PREHEADER]] ], [ [[MAX_PH]], %[[VECTOR_SCEVCHECK]] ] +; CHECK-NEXT: br label %[[LOOP_HEADER:.*]] +; CHECK: [[LOOP_HEADER]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[MAX:%.*]] = phi i32 [ [[MAX_NEXT:%.*]], %[[LOOP_HEADER]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 [[IV]] +; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP_SRC]], align 4 +; CHECK-NEXT: [[MAX_NEXT]] = tail call i32 @llvm.smax.i32(i32 [[MAX]], i32 [[L]]) +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[MAX_NEXT_LCSSA:%.*]] = phi i32 [ [[MAX_NEXT]], %[[LOOP_HEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[MAX_NEXT_LCSSA]] +; +entry: + br i1 %cond, label %loop.header, label %else + +else: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ 10, %else ], [ %iv.next, %loop.header ] + %max = phi i32 [ 0, %entry ], [ 5, %else ], [ %max.next, %loop.header ] + %gep.src = getelementptr inbounds i32, ptr %src, i32 %iv + %l = load i32, ptr %gep.src, align 4 + %max.next = tail call i32 @llvm.smax.i32(i32 %max, i32 %l) + %iv.next = add i32 %iv, 1 + %ec = icmp eq i32 %iv, %n + br i1 %ec, label %exit, label %loop.header + +exit: + ret i32 %max.next +} diff --git a/llvm/test/Transforms/SROA/phi-and-select.ll b/llvm/test/Transforms/SROA/phi-and-select.ll index 616617b..5d5a610 100644 --- a/llvm/test/Transforms/SROA/phi-and-select.ll +++ b/llvm/test/Transforms/SROA/phi-and-select.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -36,11 +36,11 @@ exit: ret i32 %result } -define i32 @test2() { +define i32 @test2() !prof !0 { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[COND:%.*]] = icmp sle i32 0, 1 -; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0 +; CHECK-NEXT: [[RESULT_SROA_SPECULATED:%.*]] = select i1 [[COND]], i32 1, i32 0, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i32 [[RESULT_SROA_SPECULATED]] ; entry: @@ -53,7 +53,7 @@ entry: %v1 = load i32, ptr %a1 %cond = icmp sle i32 %v0, %v1 - %select = select i1 %cond, ptr %a1, ptr %a + %select = select i1 %cond, ptr %a1, ptr %a, !prof !1 %result = load i32, ptr %select ret i32 %result @@ -870,3 +870,17 @@ define i8 @volatile_select(ptr %p, i1 %b) { %v2 = load i8, ptr %px ret i8 %v2 } + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5} +;. +; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address } +;. +; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { sanitize_address } +;. +; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} +;. +; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-MODIFY-CFG: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} +;. diff --git a/llvm/test/Transforms/SROA/phi-gep.ll b/llvm/test/Transforms/SROA/phi-gep.ll index 776624c..45c3bbd 100644 --- a/llvm/test/Transforms/SROA/phi-gep.ll +++ b/llvm/test/Transforms/SROA/phi-gep.ll @@ -1,9 +1,12 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG %pair = type { i32, i32 } +;. +; CHECK: @g = global %pair zeroinitializer, align 4 +;. define i32 @test_sroa_phi_gep(i1 %cond) { ; CHECK-LABEL: @test_sroa_phi_gep( ; CHECK-NEXT: entry: @@ -334,18 +337,18 @@ exit: unreachable } -define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) { +define void @test_sroa_gep_phi_select_same_block(i1 %c1, i1 %c2, ptr %ptr) !prof !0 { ; CHECK-LABEL: @test_sroa_gep_phi_select_same_block( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [[PAIR:%.*]], align 8 ; CHECK-NEXT: br label [[WHILE_BODY:%.*]] ; CHECK: while.body: ; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[ALLOCA]], [[ENTRY:%.*]] ], [ [[SELECT:%.*]], [[WHILE_BODY]] ] -; CHECK-NEXT: [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]] +; CHECK-NEXT: [[SELECT]] = select i1 [[C1:%.*]], ptr [[PHI]], ptr [[PTR:%.*]], !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[PHI_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PHI]], i64 1 ; CHECK-NEXT: [[PTR_SROA_GEP:%.*]] = getelementptr inbounds [[PAIR]], ptr [[PTR]], i64 1 -; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]] -; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]] +; CHECK-NEXT: [[SELECT_SROA_SEL:%.*]] = select i1 [[C1]], ptr [[PHI_SROA_GEP]], ptr [[PTR_SROA_GEP]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[C2:%.*]], label [[EXIT:%.*]], label [[WHILE_BODY]], !prof [[PROF2:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -355,9 +358,9 @@ entry: while.body: %phi = phi ptr [ %alloca, %entry ], [ %select, %while.body ] - %select = select i1 %c1, ptr %phi, ptr %ptr + %select = select i1 %c1, ptr %phi, ptr %ptr, !prof !1 %gep = getelementptr inbounds %pair, ptr %select, i64 1 - br i1 %c2, label %exit, label %while.body + br i1 %c2, label %exit, label %while.body, !prof !2 exit: ret void @@ -747,6 +750,18 @@ declare ptr @foo() declare i32 @__gxx_personality_v0(...) declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) + +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 3, i32 5} +!2 = !{!"branch_weights", i32 7, i32 11} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 3, i32 5} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 7, i32 11} +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} ; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/test/Transforms/SROA/select-gep.ll b/llvm/test/Transforms/SROA/select-gep.ll index b48b0f7..a701d78 100644 --- a/llvm/test/Transforms/SROA/select-gep.ll +++ b/llvm/test/Transforms/SROA/select-gep.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt -S -passes='sroa<preserve-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt -S -passes='sroa<modify-cfg>' < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG @@ -203,10 +203,10 @@ define i32 @test_select_idx_mem2reg(i1 %c) { ; Test gep with a select-like zext index unfolding on an alloca that is ; splittable and promotable. -define i64 @test_select_like_zext_idx_mem2reg(i1 %c) { +define i64 @test_select_like_zext_idx_mem2reg(i1 %c) !prof !0 { ; CHECK-LABEL: @test_select_like_zext_idx_mem2reg( ; CHECK-NEXT: [[IDX:%.*]] = zext i1 [[C:%.*]] to i64 -; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 2, i64 1 +; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 2, i64 1, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: ret i64 [[RES]] ; %alloca = alloca [2 x i64], align 8 @@ -352,3 +352,16 @@ define i32 @test_select_idx_not_constant3(i1 %c, ptr %p, i64 %arg) { %res = load i32, ptr %gep, align 4 ret i32 %res } + +!0 = !{!"function_entry_count", i32 10} +;. +; CHECK-PRESERVE-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +;. +; CHECK-MODIFY-CFG: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +;. +; CHECK-PRESERVE-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-PRESERVE-CFG: [[PROF1]] = !{!"unknown", !"sroa"} +;. +; CHECK-MODIFY-CFG: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK-MODIFY-CFG: [[PROF1]] = !{!"unknown", !"sroa"} +;. diff --git a/llvm/test/Transforms/SROA/slice-width.ll b/llvm/test/Transforms/SROA/slice-width.ll index eabb697..3b77e49 100644 --- a/llvm/test/Transforms/SROA/slice-width.ll +++ b/llvm/test/Transforms/SROA/slice-width.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -passes='sroa<preserve-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG ; RUN: opt < %s -passes='sroa<modify-cfg>' -S | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-f80:128-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" @@ -8,6 +8,10 @@ declare void @llvm.memset.p0.i32(ptr nocapture, i8, i32, i1) nounwind declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind ; This tests that allocas are not split into slices that are not byte width multiple +;. +; CHECK: @foo_copy_source = external constant %union.Foo +; CHECK: @i64_sink = global i64 0 +;. define void @no_split_on_non_byte_width(i32) { ; CHECK-LABEL: @no_split_on_non_byte_width( ; CHECK-NEXT: [[ARG_SROA_0:%.*]] = alloca i8, align 8 @@ -92,12 +96,12 @@ declare i32 @memcpy_vec3float_helper(ptr) ; PR18726: Check that SROA does not rewrite a 12-byte memcpy into a 16-byte ; vector store, hence accidentally putting gibberish onto the stack. -define i32 @memcpy_vec3float_widening(ptr %x) { +define i32 @memcpy_vec3float_widening(ptr %x) !prof !0 { ; CHECK-LABEL: @memcpy_vec3float_widening( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP1_SROA_0_0_COPYLOAD:%.*]] = load <3 x float>, ptr [[X:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXPAND:%.*]] = shufflevector <3 x float> [[TMP1_SROA_0_0_COPYLOAD]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> -; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef +; CHECK-NEXT: [[TMP1_SROA_0_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x float> [[TMP1_SROA_0_0_VEC_EXPAND]], <4 x float> undef, !prof [[PROF1:![0-9]+]] ; CHECK-NEXT: [[TMP2:%.*]] = alloca [[S_VEC3FLOAT:%.*]], align 4 ; CHECK-NEXT: [[TMP1_SROA_0_0_VEC_EXTRACT:%.*]] = shufflevector <4 x float> [[TMP1_SROA_0_0_VECBLEND]], <4 x float> poison, <3 x i32> <i32 0, i32 1, i32 2> ; CHECK-NEXT: store <3 x float> [[TMP1_SROA_0_0_VEC_EXTRACT]], ptr [[TMP2]], align 4 @@ -158,6 +162,15 @@ define i1 @presplit_overlarge_load() { %L2 = load i1, ptr %A ret i1 %L2 } +!0 = !{!"function_entry_count", i32 10} + +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +;. +; CHECK: [[META0:![0-9]+]] = !{!"function_entry_count", i32 10} +; CHECK: [[PROF1]] = !{!"unknown", !"sroa"} +;. ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK-MODIFY-CFG: {{.*}} ; CHECK-PRESERVE-CFG: {{.*}} diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml new file mode 100644 index 0000000..e748eecf --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section-errors.yaml @@ -0,0 +1,27 @@ +# RUN: yaml2obj %s -o %t.dxbc +# RUN: not llvm-objcopy --dump-section=FKE0=%t.fek0 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-ZEROSIZE -DFILE=%t.fek0 +# RUN: not llvm-objcopy --dump-section=FKE3=%t.fek1 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-MISSING -DFILE=%t.fek1 +# RUN: not llvm-objcopy --dump-section=FKE2=%t/does_not_exist/.fek2 %t.dxbc 2>&1 | FileCheck %s --check-prefix=CHECK-BAD-PATH -DFILE=%t/does_not_exist/.fek2 -DMSG=%errc_ENOENT + +# CHECK-ZEROSIZE: error: '[[FILE]]': part 'FKE0' is empty +# CHECK-MISSING: error: '[[FILE]]': part 'FKE3' not found +# CHECK-BAD-PATH: error: '[[FILE]]': [[MSG]] + +--- !dxcontainer +Header: + Hash: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] + Version: + Major: 1 + Minor: 0 + FileSize: 108 + PartCount: 3 + PartOffsets: [ 60, 68, 76 ] +Parts: + - Name: FKE0 + Size: 0 + - Name: FKE1 + Size: 0 + - Name: FKE2 + Size: 8 +... diff --git a/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml new file mode 100644 index 0000000..7d80a2c --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/DXContainer/dump-section.yaml @@ -0,0 +1,278 @@ +# RUN: yaml2obj %s -o %t.dxbc +# RUN: llvm-objcopy --dump-section=DXIL=%t.bc %t.dxbc +# RUN: llvm-dis %t.bc -o - | FileCheck %s --check-prefix=BITCODE +# RUN: wc -c %t.bc | FileCheck %s --check-prefix=DXIL-SIZE + +## Verify that when dumping the DXIL part we get a valid bitcode file. +# BITCODE: define void @main() +## Verify the size of the bitcode data. +# DXIL-SIZE: 1708 + +## Dump the PSV0 part and verify its size. +# RUN: llvm-objcopy --dump-section=PSV0=%t.psv0 %t.dxbc +# RUN: wc -c %t.psv0 | FileCheck %s --check-prefix=PSV0-SIZE +# RUN: od -v -Ax -t x1 %t.psv0 | FileCheck %s --check-prefix=PSV0-CONTENTS +# PSV0-SIZE: 76 + +# For a compute shader the structure size is encoded followed by a bunch of 00'd +# bytes until you get to the unused wave size min and max (0xffff), followed by +# the shader stage (5 for compute). +# TODO: Update this test to use objdump or obj2yaml once we support +# --add-section in objcopy. See issue: +# https://github.com/llvm/llvm-project/issues/162159. +# PSV0-CONTENTS: 0000 34 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +# PSV0-CONTENTS: 0010 00 00 00 00 00 00 00 00 ff ff ff ff 05 00 00 00 + +--- !dxcontainer +Header: + Hash: [ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 ] + Version: + Major: 1 + Minor: 0 + FileSize: 1872 + PartCount: 2 + PartOffsets: [ 40, 1780 ] +Parts: + - Name: DXIL + Size: 1732 + Program: + MajorVersion: 6 + MinorVersion: 0 + ShaderKind: 5 + Size: 433 + DXILMajorVersion: 1 + DXILMinorVersion: 0 + DXILSize: 1708 + DXIL: [ 0x42, 0x43, 0xC0, 0xDE, 0x21, 0xC, 0x0, 0x0, 0xA8, + 0x1, 0x0, 0x0, 0xB, 0x82, 0x20, 0x0, 0x2, 0x0, + 0x0, 0x0, 0x13, 0x0, 0x0, 0x0, 0x7, 0x81, 0x23, + 0x91, 0x41, 0xC8, 0x4, 0x49, 0x6, 0x10, 0x32, + 0x39, 0x92, 0x1, 0x84, 0xC, 0x25, 0x5, 0x8, 0x19, + 0x1E, 0x4, 0x8B, 0x62, 0x80, 0x10, 0x45, 0x2, + 0x42, 0x92, 0xB, 0x42, 0x84, 0x10, 0x32, 0x14, + 0x38, 0x8, 0x18, 0x4B, 0xA, 0x32, 0x42, 0x88, + 0x48, 0x90, 0x14, 0x20, 0x43, 0x46, 0x88, 0xA5, + 0x0, 0x19, 0x32, 0x42, 0xE4, 0x48, 0xE, 0x90, + 0x11, 0x22, 0xC4, 0x50, 0x41, 0x51, 0x81, 0x8C, + 0xE1, 0x83, 0xE5, 0x8A, 0x4, 0x21, 0x46, 0x6, + 0x51, 0x18, 0x0, 0x0, 0x4, 0x0, 0x0, 0x0, 0x1B, + 0x90, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0x7, 0xC0, + 0x1, 0x24, 0x80, 0x2, 0x0, 0x0, 0x0, 0x49, 0x18, + 0x0, 0x0, 0x1, 0x0, 0x0, 0x0, 0x13, 0x82, 0x0, + 0x0, 0x89, 0x20, 0x0, 0x0, 0x11, 0x0, 0x0, 0x0, + 0x32, 0x22, 0x8, 0x9, 0x20, 0x64, 0x85, 0x4, 0x13, + 0x22, 0xA4, 0x84, 0x4, 0x13, 0x22, 0xE3, 0x84, + 0xA1, 0x90, 0x14, 0x12, 0x4C, 0x88, 0x8C, 0xB, + 0x84, 0x84, 0x4C, 0x10, 0x20, 0x73, 0x4, 0x8, + 0xC1, 0x65, 0xC3, 0x85, 0x2C, 0xE8, 0x3, 0x40, + 0x14, 0x91, 0x4E, 0xD1, 0x4A, 0x48, 0x44, 0x54, + 0x11, 0xC3, 0x9, 0x30, 0xC4, 0x18, 0x1, 0x30, + 0x2, 0x50, 0x82, 0x21, 0x1A, 0x8, 0x98, 0x23, + 0x0, 0x3, 0x0, 0x13, 0x14, 0x72, 0xC0, 0x87, 0x74, + 0x60, 0x87, 0x36, 0x68, 0x87, 0x79, 0x68, 0x3, + 0x72, 0xC0, 0x87, 0xD, 0xAE, 0x50, 0xE, 0x6D, + 0xD0, 0xE, 0x7A, 0x50, 0xE, 0x6D, 0x0, 0xF, 0x7A, + 0x30, 0x7, 0x72, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, + 0x90, 0xE, 0x71, 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, + 0x90, 0xE, 0x78, 0xA0, 0x7, 0x78, 0xD0, 0x6, 0xE9, + 0x10, 0x7, 0x76, 0xA0, 0x7, 0x71, 0x60, 0x7, 0x6D, + 0x90, 0xE, 0x73, 0x20, 0x7, 0x7A, 0x30, 0x7, 0x72, + 0xD0, 0x6, 0xE9, 0x60, 0x7, 0x74, 0xA0, 0x7, 0x76, + 0x40, 0x7, 0x6D, 0x60, 0xE, 0x71, 0x60, 0x7, 0x7A, + 0x10, 0x7, 0x76, 0xD0, 0x6, 0xE6, 0x30, 0x7, 0x72, + 0xA0, 0x7, 0x73, 0x20, 0x7, 0x6D, 0x60, 0xE, 0x76, + 0x40, 0x7, 0x7A, 0x60, 0x7, 0x74, 0xD0, 0x6, 0xEE, + 0x80, 0x7, 0x7A, 0x10, 0x7, 0x76, 0xA0, 0x7, 0x73, + 0x20, 0x7, 0x7A, 0x60, 0x7, 0x74, 0x30, 0xE4, + 0x21, 0x0, 0x0, 0x8, 0x0, 0x0, 0x0, 0x2, 0x0, + 0x0, 0x0, 0x20, 0xB, 0x4, 0x7, 0x0, 0x0, 0x0, + 0x32, 0x1E, 0x98, 0xC, 0x19, 0x11, 0x4C, 0x90, + 0x8C, 0x9, 0x26, 0x47, 0xC6, 0x4, 0x43, 0xBA, + 0x12, 0x28, 0x88, 0x62, 0x18, 0x1, 0x28, 0x84, + 0x22, 0x0, 0x0, 0x0, 0x79, 0x18, 0x0, 0x0, 0xE5, + 0x0, 0x0, 0x0, 0x33, 0x8, 0x80, 0x1C, 0xC4, 0xE1, + 0x1C, 0x66, 0x14, 0x1, 0x3D, 0x88, 0x43, 0x38, + 0x84, 0xC3, 0x8C, 0x42, 0x80, 0x7, 0x79, 0x78, + 0x7, 0x73, 0x98, 0x71, 0xC, 0xE6, 0x0, 0xF, 0xED, + 0x10, 0xE, 0xF4, 0x80, 0xE, 0x33, 0xC, 0x42, 0x1E, + 0xC2, 0xC1, 0x1D, 0xCE, 0xA1, 0x1C, 0x66, 0x30, + 0x5, 0x3D, 0x88, 0x43, 0x38, 0x84, 0x83, 0x1B, + 0xCC, 0x3, 0x3D, 0xC8, 0x43, 0x3D, 0x8C, 0x3, + 0x3D, 0xCC, 0x78, 0x8C, 0x74, 0x70, 0x7, 0x7B, + 0x8, 0x7, 0x79, 0x48, 0x87, 0x70, 0x70, 0x7, 0x7A, + 0x70, 0x3, 0x76, 0x78, 0x87, 0x70, 0x20, 0x87, + 0x19, 0xCC, 0x11, 0xE, 0xEC, 0x90, 0xE, 0xE1, + 0x30, 0xF, 0x6E, 0x30, 0xF, 0xE3, 0xF0, 0xE, 0xF0, + 0x50, 0xE, 0x33, 0x10, 0xC4, 0x1D, 0xDE, 0x21, + 0x1C, 0xD8, 0x21, 0x1D, 0xC2, 0x61, 0x1E, 0x66, + 0x30, 0x89, 0x3B, 0xBC, 0x83, 0x3B, 0xD0, 0x43, + 0x39, 0xB4, 0x3, 0x3C, 0xBC, 0x83, 0x3C, 0x84, + 0x3, 0x3B, 0xCC, 0xF0, 0x14, 0x76, 0x60, 0x7, + 0x7B, 0x68, 0x7, 0x37, 0x68, 0x87, 0x72, 0x68, + 0x7, 0x37, 0x80, 0x87, 0x70, 0x90, 0x87, 0x70, + 0x60, 0x7, 0x76, 0x28, 0x7, 0x76, 0xF8, 0x5, 0x76, + 0x78, 0x87, 0x77, 0x80, 0x87, 0x5F, 0x8, 0x87, + 0x71, 0x18, 0x87, 0x72, 0x98, 0x87, 0x79, 0x98, + 0x81, 0x2C, 0xEE, 0xF0, 0xE, 0xEE, 0xE0, 0xE, + 0xF5, 0xC0, 0xE, 0xEC, 0x30, 0x3, 0x62, 0xC8, + 0xA1, 0x1C, 0xE4, 0xA1, 0x1C, 0xCC, 0xA1, 0x1C, + 0xE4, 0xA1, 0x1C, 0xDC, 0x61, 0x1C, 0xCA, 0x21, + 0x1C, 0xC4, 0x81, 0x1D, 0xCA, 0x61, 0x6, 0xD6, + 0x90, 0x43, 0x39, 0xC8, 0x43, 0x39, 0x98, 0x43, + 0x39, 0xC8, 0x43, 0x39, 0xB8, 0xC3, 0x38, 0x94, + 0x43, 0x38, 0x88, 0x3, 0x3B, 0x94, 0xC3, 0x2F, + 0xBC, 0x83, 0x3C, 0xFC, 0x82, 0x3B, 0xD4, 0x3, + 0x3B, 0xB0, 0xC3, 0xC, 0xC7, 0x69, 0x87, 0x70, + 0x58, 0x87, 0x72, 0x70, 0x83, 0x74, 0x68, 0x7, + 0x78, 0x60, 0x87, 0x74, 0x18, 0x87, 0x74, 0xA0, + 0x87, 0x19, 0xCE, 0x53, 0xF, 0xEE, 0x0, 0xF, 0xF2, + 0x50, 0xE, 0xE4, 0x90, 0xE, 0xE3, 0x40, 0xF, 0xE1, + 0x20, 0xE, 0xEC, 0x50, 0xE, 0x33, 0x20, 0x28, + 0x1D, 0xDC, 0xC1, 0x1E, 0xC2, 0x41, 0x1E, 0xD2, + 0x21, 0x1C, 0xDC, 0x81, 0x1E, 0xDC, 0xE0, 0x1C, + 0xE4, 0xE1, 0x1D, 0xEA, 0x1, 0x1E, 0x66, 0x18, + 0x51, 0x38, 0xB0, 0x43, 0x3A, 0x9C, 0x83, 0x3B, + 0xCC, 0x50, 0x24, 0x76, 0x60, 0x7, 0x7B, 0x68, + 0x7, 0x37, 0x60, 0x87, 0x77, 0x78, 0x7, 0x78, + 0x98, 0x51, 0x4C, 0xF4, 0x90, 0xF, 0xF0, 0x50, + 0xE, 0x33, 0x1E, 0x6A, 0x1E, 0xCA, 0x61, 0x1C, + 0xE8, 0x21, 0x1D, 0xDE, 0xC1, 0x1D, 0x7E, 0x1, + 0x1E, 0xE4, 0xA1, 0x1C, 0xCC, 0x21, 0x1D, 0xF0, + 0x61, 0x6, 0x54, 0x85, 0x83, 0x38, 0xCC, 0xC3, + 0x3B, 0xB0, 0x43, 0x3D, 0xD0, 0x43, 0x39, 0xFC, + 0xC2, 0x3C, 0xE4, 0x43, 0x3B, 0x88, 0xC3, 0x3B, + 0xB0, 0xC3, 0x8C, 0xC5, 0xA, 0x87, 0x79, 0x98, + 0x87, 0x77, 0x18, 0x87, 0x74, 0x8, 0x7, 0x7A, + 0x28, 0x7, 0x72, 0x98, 0x81, 0x5C, 0xE3, 0x10, + 0xE, 0xEC, 0xC0, 0xE, 0xE5, 0x50, 0xE, 0xF3, 0x30, + 0x23, 0xC1, 0xD2, 0x41, 0x1E, 0xE4, 0xE1, 0x17, + 0xD8, 0xE1, 0x1D, 0xDE, 0x1, 0x1E, 0x66, 0x48, + 0x19, 0x3B, 0xB0, 0x83, 0x3D, 0xB4, 0x83, 0x1B, + 0x84, 0xC3, 0x38, 0x8C, 0x43, 0x39, 0xCC, 0xC3, + 0x3C, 0xB8, 0xC1, 0x39, 0xC8, 0xC3, 0x3B, 0xD4, + 0x3, 0x3C, 0xCC, 0x48, 0xB4, 0x71, 0x8, 0x7, 0x76, + 0x60, 0x7, 0x71, 0x8, 0x87, 0x71, 0x58, 0x87, + 0x19, 0xDB, 0xC6, 0xE, 0xEC, 0x60, 0xF, 0xED, + 0xE0, 0x6, 0xF0, 0x20, 0xF, 0xE5, 0x30, 0xF, 0xE5, + 0x20, 0xF, 0xF6, 0x50, 0xE, 0x6E, 0x10, 0xE, 0xE3, + 0x30, 0xE, 0xE5, 0x30, 0xF, 0xF3, 0xE0, 0x6, 0xE9, + 0xE0, 0xE, 0xE4, 0x50, 0xE, 0xF8, 0x30, 0x23, + 0xE2, 0xEC, 0x61, 0x1C, 0xC2, 0x81, 0x1D, 0xD8, + 0xE1, 0x17, 0xEC, 0x21, 0x1D, 0xE6, 0x21, 0x1D, + 0xC4, 0x21, 0x1D, 0xD8, 0x21, 0x1D, 0xE8, 0x21, + 0x1F, 0x66, 0x20, 0x9D, 0x3B, 0xBC, 0x43, 0x3D, + 0xB8, 0x3, 0x39, 0x94, 0x83, 0x39, 0xCC, 0x58, + 0xBC, 0x70, 0x70, 0x7, 0x77, 0x78, 0x7, 0x7A, + 0x8, 0x7, 0x7A, 0x48, 0x87, 0x77, 0x70, 0x87, + 0x19, 0xCB, 0xE7, 0xE, 0xEF, 0x30, 0xF, 0xE1, + 0xE0, 0xE, 0xE9, 0x40, 0xF, 0xE9, 0xA0, 0xF, 0xE5, + 0x30, 0xC3, 0x1, 0x3, 0x73, 0xA8, 0x7, 0x77, 0x18, + 0x87, 0x5F, 0x98, 0x87, 0x70, 0x70, 0x87, 0x74, + 0xA0, 0x87, 0x74, 0xD0, 0x87, 0x72, 0x98, 0x81, + 0x84, 0x41, 0x39, 0xE0, 0xC3, 0x38, 0xB0, 0x43, + 0x3D, 0x90, 0x43, 0x39, 0xCC, 0x40, 0xC4, 0xA0, + 0x1D, 0xCA, 0xA1, 0x1D, 0xE0, 0x41, 0x1E, 0xDE, + 0xC1, 0x1C, 0x66, 0x24, 0x63, 0x30, 0xE, 0xE1, + 0xC0, 0xE, 0xEC, 0x30, 0xF, 0xE9, 0x40, 0xF, 0xE5, + 0x30, 0x43, 0x21, 0x83, 0x75, 0x18, 0x7, 0x73, + 0x48, 0x87, 0x5F, 0xA0, 0x87, 0x7C, 0x80, 0x87, + 0x72, 0x98, 0xB1, 0x94, 0x1, 0x3C, 0x8C, 0xC3, + 0x3C, 0x94, 0xC3, 0x38, 0xD0, 0x43, 0x3A, 0xBC, + 0x83, 0x3B, 0xCC, 0xC3, 0x8C, 0xC5, 0xC, 0x48, + 0x21, 0x15, 0x42, 0x61, 0x1E, 0xE6, 0x21, 0x1D, + 0xCE, 0xC1, 0x1D, 0x52, 0x81, 0x14, 0x66, 0x4C, + 0x67, 0x30, 0xE, 0xEF, 0x20, 0xF, 0xEF, 0xE0, + 0x6, 0xEF, 0x50, 0xF, 0xF4, 0x30, 0xF, 0xE9, 0x40, + 0xE, 0xE5, 0xE0, 0x6, 0xE6, 0x20, 0xF, 0xE1, 0xD0, + 0xE, 0xE5, 0x30, 0xA3, 0x40, 0x83, 0x76, 0x68, + 0x7, 0x79, 0x8, 0x87, 0x19, 0x52, 0x1A, 0xB8, + 0xC3, 0x3B, 0x84, 0x3, 0x3B, 0xA4, 0x43, 0x38, + 0xCC, 0x83, 0x1B, 0x84, 0x3, 0x39, 0x90, 0x83, + 0x3C, 0xCC, 0x3, 0x3C, 0x84, 0xC3, 0x38, 0x94, + 0xC3, 0xC, 0x46, 0xD, 0xC6, 0x21, 0x1C, 0xD8, + 0x81, 0x1D, 0xCA, 0xA1, 0x1C, 0x7E, 0x81, 0x1E, + 0xF2, 0x1, 0x1E, 0xCA, 0x61, 0xC6, 0xB1, 0x6, + 0xEE, 0xF0, 0xE, 0xE6, 0x20, 0xF, 0xE5, 0x50, + 0xE, 0x33, 0x1C, 0x36, 0x20, 0x7, 0x7C, 0x70, + 0x3, 0x77, 0x78, 0x7, 0x77, 0xA8, 0x7, 0x77, 0x48, + 0x7, 0x73, 0x78, 0x7, 0x79, 0x68, 0x87, 0x19, + 0x55, 0x1B, 0x90, 0x3, 0x3E, 0xB8, 0xC1, 0x38, + 0xBC, 0x83, 0x3B, 0xD0, 0x83, 0x3C, 0xBC, 0x3, + 0x3B, 0x98, 0x3, 0x3B, 0xBC, 0xC3, 0x3D, 0xB8, + 0x1, 0x3A, 0xA4, 0x83, 0x3B, 0xD0, 0xC3, 0x3C, + 0xCC, 0x58, 0xDC, 0x80, 0x1C, 0xF0, 0xC1, 0xD, + 0xE0, 0x41, 0x1E, 0xCA, 0x61, 0x1C, 0xD2, 0x61, + 0x1E, 0xCA, 0x1, 0x0, 0x79, 0x28, 0x0, 0x0, 0x52, + 0x0, 0x0, 0x0, 0xC2, 0x3C, 0x90, 0x40, 0x86, 0x10, + 0x19, 0x32, 0xE2, 0x64, 0x90, 0x40, 0x46, 0x2, + 0x19, 0x23, 0x23, 0x46, 0x2, 0x13, 0x24, 0xC6, + 0x0, 0x13, 0x74, 0xCE, 0x61, 0x8C, 0x2D, 0xCC, + 0xED, 0xC, 0xC4, 0xAE, 0x4C, 0x6E, 0x2E, 0xED, + 0xCD, 0xD, 0x44, 0x46, 0xC6, 0x5, 0xC6, 0x5, 0xE6, + 0x2C, 0x8D, 0xE, 0x4, 0xE5, 0x2C, 0x8D, 0xE, 0xE8, + 0x2C, 0x8D, 0xE, 0xAD, 0x4E, 0xCC, 0x65, 0xEC, + 0xAD, 0x4D, 0x87, 0x8D, 0xCD, 0xAE, 0xED, 0x85, + 0x8D, 0xCD, 0xAE, 0xAD, 0x5, 0x4E, 0xEE, 0x4D, + 0xAD, 0x6C, 0x8C, 0xCE, 0xE5, 0x2C, 0x8D, 0xE, + 0xA4, 0xEC, 0xC6, 0x86, 0xA6, 0x2C, 0x26, 0x7, + 0xA6, 0xAC, 0xC, 0x26, 0x26, 0xE7, 0x46, 0x6C, + 0x2C, 0xA6, 0xC, 0x66, 0xA6, 0x6C, 0xC6, 0x4C, + 0x6, 0x86, 0x6C, 0x4C, 0x6, 0x46, 0xCC, 0x66, + 0x6C, 0x2C, 0xC, 0x27, 0x46, 0x6C, 0x86, 0x6C, + 0x2C, 0xE5, 0x8, 0x63, 0x73, 0x87, 0x68, 0xB, + 0x4B, 0x73, 0x3B, 0xCA, 0xDD, 0x18, 0x5A, 0x98, + 0xDC, 0xD7, 0x5C, 0x9A, 0x5E, 0xD9, 0x69, 0xCC, + 0xE4, 0xC2, 0xDA, 0xCA, 0x5A, 0xE0, 0xDE, 0xD2, + 0xDC, 0xE8, 0xCA, 0xE4, 0x86, 0x20, 0x1C, 0xC1, + 0x10, 0x84, 0x43, 0x18, 0x82, 0x70, 0xC, 0x43, + 0x10, 0xE, 0x62, 0x8, 0x42, 0x1, 0xC, 0x41, 0x38, + 0x8A, 0x21, 0x8, 0x87, 0x31, 0x6, 0xC1, 0x38, + 0xC6, 0x10, 0x4, 0x63, 0x18, 0x4, 0x24, 0x19, + 0x83, 0x60, 0x24, 0x63, 0x18, 0xC, 0xC3, 0x18, + 0x83, 0xB0, 0x44, 0x63, 0x28, 0x94, 0x1, 0x0, + 0xA4, 0x31, 0xC, 0x6, 0xB1, 0x8C, 0x61, 0x60, + 0xA, 0xC6, 0x24, 0x64, 0x78, 0x2E, 0x76, 0x61, + 0x6C, 0x76, 0x65, 0x72, 0x43, 0x9, 0x18, 0xA3, + 0xB0, 0xB1, 0xD9, 0xB5, 0xB9, 0xA4, 0x91, 0x95, + 0xB9, 0xD1, 0xD, 0x25, 0x68, 0x8C, 0x43, 0x86, + 0xE7, 0x32, 0x87, 0x16, 0x46, 0x56, 0x26, 0xD7, + 0xF4, 0x46, 0x56, 0xC6, 0x36, 0x94, 0xC0, 0x31, + 0xA, 0x19, 0x9E, 0x8B, 0x5D, 0x99, 0xDC, 0x5C, + 0xDA, 0x9B, 0xDB, 0x50, 0x82, 0xC7, 0x38, 0x64, + 0x78, 0x2E, 0x65, 0x6E, 0x74, 0x72, 0x79, 0x50, + 0x6F, 0x69, 0x6E, 0x74, 0x73, 0x43, 0x9, 0x24, + 0x13, 0xB1, 0xB1, 0xD9, 0xB5, 0xB9, 0xB4, 0xBD, + 0x91, 0xD5, 0xB1, 0x95, 0xB9, 0x98, 0xB1, 0x85, + 0x9D, 0xCD, 0xD, 0x45, 0x98, 0x28, 0x0, 0x71, + 0x20, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x6, 0x40, + 0x30, 0x0, 0xD2, 0x0, 0x0, 0x0, 0x61, 0x20, 0x0, + 0x0, 0x6, 0x0, 0x0, 0x0, 0x13, 0x4, 0x1, 0x86, + 0x3, 0x1, 0x0, 0x0, 0x2, 0x0, 0x0, 0x0, 0x7, 0x50, + 0x10, 0xCD, 0x14, 0x61, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0 ] + - Name: PSV0 + Size: 76 + PSVInfo: + Version: 3 + ShaderStage: 5 + MinimumWaveLaneCount: 0 + MaximumWaveLaneCount: 4294967295 + UsesViewID: 0 + SigInputVectors: 0 + SigOutputVectors: [ 0, 0, 0, 0 ] + NumThreadsX: 1 + NumThreadsY: 1 + NumThreadsZ: 1 + EntryName: main + ResourceStride: 24 + Resources: [] + SigInputElements: [] + SigOutputElements: [] + SigPatchOrPrimElements: [] + InputOutputMap: + - [ ] + - [ ] + - [ ] + - [ ] +... diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test index fd1492f..bcffd40 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map.test @@ -34,6 +34,7 @@ # CHECK-NEXT: { # CHECK-NEXT: ID: 0 # CHECK-NEXT: Offset: 0x0 +# CHECK-NEXT: Hash: 0x0 # CHECK-NEXT: Size: 0x1 # CHECK-NEXT: HasReturn: No # CHECK-NEXT: HasTailCall: Yes @@ -50,6 +51,7 @@ # CHECK-NEXT: ID: 2 # CHECK-NEXT: Offset: 0x3 # CHECK-NEXT: Callsite End Offsets: [1, 3] +# CHECK-NEXT: Hash: 0x123 # CHECK-NEXT: Size: 0x7 # CHECK-NEXT: HasReturn: Yes # CHECK-NEXT: HasTailCall: No @@ -144,8 +146,8 @@ Sections: ShSize: [[SIZE=<none>]] Link: .text Entries: - - Version: 3 - Feature: 0x28 + - Version: 4 + Feature: 0x68 BBRanges: - BaseAddress: [[ADDR=0x11111]] BBEntries: @@ -160,6 +162,7 @@ Sections: Size: 0x4 Metadata: 0x15 CallsiteEndOffsets: [ 0x1 , 0x2 ] + Hash: 0x123 - Version: 2 BBRanges: - BaseAddress: 0x22222 diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml index dc14025..7a22efe 100644 --- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map.yaml @@ -162,6 +162,92 @@ Sections: BBRanges: - BaseAddress: 0x20 +## Check that obj2yaml can dump basic block hash in the .llvm_bb_addr_map section. + +# RUN: yaml2obj --docnum=4 %s -o %t4 +# RUN: obj2yaml %t4 | FileCheck %s --check-prefix=BBHASH + +# BBHASH: --- !ELF +# BBHASH-NEXT: FileHeader: +# BBHASH-NEXT: Class: ELFCLASS64 +# BBHASH-NEXT: Data: ELFDATA2LSB +# BBHASH-NEXT: Type: ET_EXEC +# BBHASH-NEXT: Sections: +# BBHASH-NEXT: - Name: .llvm_bb_addr_map +# BBHASH-NEXT: Type: SHT_LLVM_BB_ADDR_MAP +# BBHASH-NEXT: Entries: +# BBHASH-NEXT: - Version: 4 +# BBHASH-NEXT: Feature: 0x40 +# BBHASH-NEXT: BBRanges: +# BBHASH-NEXT: - BBEntries: +# BBHASH-NEXT: - ID: 0 +# BBHASH-NEXT: AddressOffset: 0x1 +# BBHASH-NEXT: Size: 0x2 +# BBHASH-NEXT: Metadata: 0x3 +# BBHASH-NEXT: Hash: 0x1 +# BBHASH-NEXT: - ID: 2 +# BBHASH-NEXT: AddressOffset: 0x4 +# BBHASH-NEXT: Size: 0x5 +# BBHASH-NEXT: Metadata: 0x6 +# BBHASH-NEXT: Hash: 0x2 +# BBHASH-NEXT: - ID: 4 +# BBHASH-NEXT: AddressOffset: 0xFFFFFFFFFFFFFFF7 +# BBHASH-NEXT: Size: 0xFFFFFFFFFFFFFFF8 +# BBHASH-NEXT: Metadata: 0xFFFFFFFFFFFFFFF9 +# BBHASH-NEXT: Hash: 0x3 +# BBHASH-NEXT: - Version: 4 +# BBHASH-NEXT: Feature: 0x68 +# BBHASH-NEXT: BBRanges: +# BBHASH-NEXT: - BaseAddress: 0xFFFFFFFFFFFFFF20 +# BBHASH-NEXT: BBEntries: +# BBHASH-NEXT: - ID: 6 +# BBHASH-NEXT: AddressOffset: 0xA +# BBHASH-NEXT: Size: 0xB +# BBHASH-NEXT: Metadata: 0xC +# BBHASH-NEXT: CallsiteEndOffsets: [ 0x1, 0x2 ] +# BBHASH-NEXT: Hash: 0x123 + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC +Sections: + - Name: .llvm_bb_addr_map + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 4 + Feature: 0x40 + BBRanges: + - BaseAddress: 0x0 + BBEntries: + - ID: 0 + AddressOffset: 0x1 + Size: 0x2 + Metadata: 0x3 + Hash: 0x1 + - ID: 2 + AddressOffset: 0x4 + Size: 0x5 + Metadata: 0x6 + Hash: 0x2 + - ID: 4 + AddressOffset: 0xFFFFFFFFFFFFFFF7 + Size: 0xFFFFFFFFFFFFFFF8 + Metadata: 0xFFFFFFFFFFFFFFF9 + Hash: 0x3 + - Version: 4 + Feature: 0x68 + BBRanges: + - BaseAddress: 0xFFFFFFFFFFFFFF20 + BBEntries: + - ID: 6 + AddressOffset: 0xA + Size: 0xB + Metadata: 0xC + CallsiteEndOffsets: [ 0x1, 0x2 ] + Hash: 0x123 + ## Check that obj2yaml uses the "Content" tag to describe an .llvm_bb_addr_map section ## when it can't extract the entries, for example, when the section is truncated, or ## when an invalid 'NumBlocks' or 'NumBBRanges` field is specified. diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml index 418f90f..339e419 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml @@ -72,6 +72,13 @@ # CHECK-NEXT: 0000: 03202000 00000000 0000010E 01000203 # CHECK-NEXT: ) +# Case 10: Specify basic block hash. +# CHECK: Name: .llvm_bb_addr_map (1) +# CHECK: SectionData ( +# CHECK-NEXT: 0000: 04602000 00000000 0000010E 01000203 +# CHECK-NEXT: 0010: 23010000 00000000 +# CHECK-NEXT: ) + --- !ELF FileHeader: @@ -176,6 +183,22 @@ Sections: Metadata: 0x00000003 CallsiteEndOffsets: [] +## 10) We can produce a SHT_LLVM_BB_ADDR_MAP section with basic block hash. + - Name: '.llvm_bb_addr_map (10)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 4 + Feature: 0x60 + BBRanges: + - BaseAddress: 0x0000000000000020 + BBEntries: + - ID: 14 + AddressOffset: 0x00000001 + Size: 0x00000002 + Metadata: 0x00000003 + CallsiteEndOffsets: [] + Hash: 0x123 + ## Check we can't use Entries at the same time as either Content or Size. # RUN: not yaml2obj --docnum=2 -DCONTENT="00" %s 2>&1 | FileCheck %s --check-prefix=INVALID # RUN: not yaml2obj --docnum=2 -DSIZE="0" %s 2>&1 | FileCheck %s --check-prefix=INVALID @@ -197,7 +220,7 @@ Sections: ## Check that yaml2obj generates a warning when we use unsupported versions. # RUN: yaml2obj --docnum=3 %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION -# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 4; encoding using the most recent version +# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version --- !ELF FileHeader: @@ -209,4 +232,4 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP Entries: ## Specify unsupported version - - Version: 4 + - Version: 5 |