diff options
author | Evgeniy Brevnov <ybrevnov@azul.com> | 2022-07-19 18:46:43 +0700 |
---|---|---|
committer | Evgeniy Brevnov <ybrevnov@azul.com> | 2022-07-19 19:03:53 +0700 |
commit | 8f90edeb55b4b5b15a8c303bcc98e95df933062d (patch) | |
tree | 5f60fbdd09411a9e68b5eea34e549ed96caa92aa | |
parent | 18b92c66fe59a44f50bc211a418eaf48fe1cf7c1 (diff) | |
download | llvm-8f90edeb55b4b5b15a8c303bcc98e95df933062d.zip llvm-8f90edeb55b4b5b15a8c303bcc98e95df933062d.tar.gz llvm-8f90edeb55b4b5b15a8c303bcc98e95df933062d.tar.bz2 |
Additional regression test for a crash during reorder masked gather nodes
-rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll | 99 |
1 files changed, 99 insertions, 0 deletions
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll new file mode 100644 index 0000000..6539718 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/reorder-reused-masked-gather2.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -slp-vectorizer -S < %s | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128-ni:1-p2:32:8:8:32-ni:2" +target triple = "x86_64-unknown-linux-gnu" + +define void @"foo"(i8 addrspace(1)* %0, i8 addrspace(1)* %1) #0 { +; CHECK-LABEL: @foo( +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i8 addrspace(1)*> poison, i8 addrspace(1)* [[TMP0:%.*]], i32 0 +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i8 addrspace(1)*> [[TMP3]], <4 x i8 addrspace(1)*> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, <4 x i8 addrspace(1)*> [[SHUFFLE]], <4 x i64> <i64 8, i64 12, i64 28, i64 24> +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* [[TMP1:%.*]], i64 8 +; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 addrspace(1)* [[TMP5]] to float addrspace(1)* +; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i8 addrspace(1)*> [[TMP4]] to <4 x float addrspace(1)*> +; CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p1f32(<4 x float addrspace(1)*> [[TMP7]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef) +; CHECK-NEXT: [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[TMP8]], <4 x float> poison, <8 x i32> <i32 0, i32 3, i32 0, i32 3, i32 2, i32 1, i32 2, i32 1> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast float addrspace(1)* [[TMP6]] to <8 x float> addrspace(1)* +; CHECK-NEXT: [[TMP10:%.*]] = load <8 x float>, <8 x float> addrspace(1)* [[TMP9]], align 4 +; CHECK-NEXT: [[TMP11:%.*]] = fmul <8 x float> [[SHUFFLE1]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = fadd <8 x float> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[SHUFFLE2:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> poison, <8 x i32> <i32 0, i32 5, i32 2, i32 7, i32 4, i32 1, i32 6, i32 3> +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x float addrspace(1)*> [[TMP7]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast float addrspace(1)* [[TMP13]] to <8 x float> addrspace(1)* +; CHECK-NEXT: store <8 x float> [[SHUFFLE2]], <8 x float> addrspace(1)* [[TMP14]], align 4 +; CHECK-NEXT: ret void +; + %3 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 8 + %4 = bitcast i8 addrspace(1)* %3 to float addrspace(1)* + %5 = load float, float addrspace(1)* %4 , align 4 + %6 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 8 + %7 = bitcast i8 addrspace(1)* %6 to float addrspace(1)* + %8 = load float, float addrspace(1)* %7 , align 4 + %9 = fmul float %5, %8 + %10 = fadd float %9, 0.000000e+00 + %11 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 12 + %12 = bitcast i8 addrspace(1)* %11 to float addrspace(1)* + %13 = load float, float addrspace(1)* %12 , align 4 + %14 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 28 + %15 = bitcast i8 addrspace(1)* %14 to float addrspace(1)* + %16 = load float, float addrspace(1)* %15 , align 4 + %17 = fmul float %13, %16 + %18 = fadd float %17, 0.000000e+00 + %19 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 16 + %20 = bitcast i8 addrspace(1)* %19 to float addrspace(1)* + %21 = load float, float addrspace(1)* %20 , align 4 + %22 = fmul float %5, %21 + %23 = fadd float %22, 0.000000e+00 + %24 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 36 + %25 = bitcast i8 addrspace(1)* %24 to float addrspace(1)* + %26 = load float, float addrspace(1)* %25 , align 4 + %27 = fmul float %13, %26 + %28 = fadd float %27, 0.000000e+00 + %29 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 28 + %30 = bitcast i8 addrspace(1)* %29 to float addrspace(1)* + %31 = load float, float addrspace(1)* %30 , align 4 + %32 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 24 + %33 = bitcast i8 addrspace(1)* %32 to float addrspace(1)* + %34 = load float, float addrspace(1)* %33 , align 4 + %35 = fmul float %31, %34 + %36 = fadd float %35, 0.000000e+00 + %37 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 24 + %38 = bitcast i8 addrspace(1)* %37 to float addrspace(1)* + %39 = load float, float addrspace(1)* %38 , align 4 + %40 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 12 + %41 = bitcast i8 addrspace(1)* %40 to float addrspace(1)* + %42 = load float, float addrspace(1)* %41 , align 4 + %43 = fmul float %39, %42 + %44 = fadd float %43, 0.000000e+00 + %45 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 32 + %46 = bitcast i8 addrspace(1)* %45 to float addrspace(1)* + %47 = load float, float addrspace(1)* %46 , align 4 + %48 = fmul float %31, %47 + %49 = fadd float %48, 0.000000e+00 + %50 = getelementptr inbounds i8, i8 addrspace(1)* %1, i64 20 + %51 = bitcast i8 addrspace(1)* %50 to float addrspace(1)* + %52 = load float, float addrspace(1)* %51 , align 4 + %53 = fmul float %39, %52 + %54 = fadd float %53, 0.000000e+00 + store float %10, float addrspace(1)* %4 , align 4 + store float %18, float addrspace(1)* %12 , align 4 + %55 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 16 + %56 = bitcast i8 addrspace(1)* %55 to float addrspace(1)* + store float %23, float addrspace(1)* %56 , align 4 + %57 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 20 + %58 = bitcast i8 addrspace(1)* %57 to float addrspace(1)* + store float %28, float addrspace(1)* %58 , align 4 + store float %36, float addrspace(1)* %38 , align 4 + store float %44, float addrspace(1)* %30 , align 4 + %59 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 32 + %60 = bitcast i8 addrspace(1)* %59 to float addrspace(1)* + store float %49, float addrspace(1)* %60 , align 4 + %61 = getelementptr inbounds i8, i8 addrspace(1)* %0, i64 36 + %62 = bitcast i8 addrspace(1)* %61 to float addrspace(1)* + store float %54, float addrspace(1)* %62 , align 4 + ret void +} + +attributes #0 = { "target-cpu"="skylake" } + |