aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexey Bataev <a.bataev@outlook.com>2024-04-01 04:20:08 -0700
committerAlexey Bataev <a.bataev@outlook.com>2024-04-01 06:07:18 -0700
commit41afef9066eec8daf517ac357a628cdf30c95e39 (patch)
treed19decae42ee9aedc2de8aa9467b1ca7204a4986
parentda9f06c9b1179423302e3e7ccb27431ced44e548 (diff)
downloadllvm-41afef9066eec8daf517ac357a628cdf30c95e39.zip
llvm-41afef9066eec8daf517ac357a628cdf30c95e39.tar.gz
llvm-41afef9066eec8daf517ac357a628cdf30c95e39.tar.bz2
[SLP]Fix PR87011: Missing sign extension of demoted type before zero extension
Need to drop skipping of the first zext/sext nodes, it leads to incorrect and less profitable code.
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp6
-rw-r--r--llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll2
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll22
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/sext.ll22
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll9
-rw-r--r--llvm/test/Transforms/SLPVectorizer/X86/zext.ll9
6 files changed, 19 insertions, 51 deletions
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 2bc0c5d..1ffc39a 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -14263,11 +14263,9 @@ void BoUpSLP::computeMinimumValueSizes() {
SmallVector<unsigned> RootDemotes;
if (NodeIdx != 0 &&
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
- (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
- VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
- VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
+ VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
- IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
+ IsTruncRoot = true;
RootDemotes.push_back(NodeIdx);
IsProfitableToDemoteRoot = true;
++NodeIdx;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
index 436fba3..1166b1f 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/init-ext-node-not-truncable.ll
@@ -7,7 +7,7 @@ define void @test() {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: store <2 x i64> <i64 -1, i64 0>, ptr @h, align 8
+; CHECK-NEXT: store <2 x i64> <i64 4294967295, i64 0>, ptr @h, align 8
; CHECK-NEXT: ret void
;
entry:
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
index 5ae0ad9..b64743a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
@@ -11,20 +11,10 @@
;
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
-; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
-; SSE2-NEXT: ret <2 x i64> [[V1]]
-;
-; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
-; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
-; SLM-NEXT: ret <2 x i64> [[TMP3]]
+; SSE-LABEL: @loadext_2i8_to_2i64(
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64>
+; SSE-NEXT: ret <2 x i64> [[TMP2]]
;
; AVX-LABEL: @loadext_2i8_to_2i64(
; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
index 7d38aeb..744a509 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SSE2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
+; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
@@ -11,20 +11,10 @@
;
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
-; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = sext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = sext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
-; SSE2-NEXT: ret <2 x i64> [[V1]]
-;
-; SLM-LABEL: @loadext_2i8_to_2i64(
-; SLM-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
-; SLM-NEXT: [[TMP3:%.*]] = sext <2 x i8> [[TMP2]] to <2 x i64>
-; SLM-NEXT: ret <2 x i64> [[TMP3]]
+; SSE-LABEL: @loadext_2i8_to_2i64(
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i64>
+; SSE-NEXT: ret <2 x i64> [[TMP2]]
;
; AVX-LABEL: @loadext_2i8_to_2i64(
; AVX-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
index d1f6c41..27996a7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
@@ -12,13 +12,8 @@
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> poison, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
+; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
index 829e4ba..9487042 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
@@ -12,13 +12,8 @@
define <2 x i64> @loadext_2i8_to_2i64(ptr %p0) {
; SSE2-LABEL: @loadext_2i8_to_2i64(
-; SSE2-NEXT: [[P1:%.*]] = getelementptr inbounds i8, ptr [[P0:%.*]], i64 1
-; SSE2-NEXT: [[I0:%.*]] = load i8, ptr [[P0]], align 1
-; SSE2-NEXT: [[I1:%.*]] = load i8, ptr [[P1]], align 1
-; SSE2-NEXT: [[X0:%.*]] = zext i8 [[I0]] to i64
-; SSE2-NEXT: [[X1:%.*]] = zext i8 [[I1]] to i64
-; SSE2-NEXT: [[V0:%.*]] = insertelement <2 x i64> undef, i64 [[X0]], i32 0
-; SSE2-NEXT: [[V1:%.*]] = insertelement <2 x i64> [[V0]], i64 [[X1]], i32 1
+; SSE2-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[P0:%.*]], align 1
+; SSE2-NEXT: [[V1:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i64>
; SSE2-NEXT: ret <2 x i64> [[V1]]
;
; SLM-LABEL: @loadext_2i8_to_2i64(