From 31001be371e8f2c74470e727e54503fb2aabec8b Mon Sep 17 00:00:00 2001 From: LemonBoy Date: Wed, 17 Mar 2021 16:59:55 +0100 Subject: [LoopVectorize] Refine hasIrregularType predicate The `hasIrregularType` predicate checks whether an array of N values of type Ty is "bitcast-compatible" with a vector. The previous check returned invalid results in some cases where there's some padding between the array elements: eg. a 4-element array of u7 values is considered as compatible with <4 x u7>, even though the vector is only loading/storing 28 bits instead of 32. The problem causes LLVM to generate incorrect code for some targets: for AArch64 the vector loads/stores are lowered in terms of ubfx/bfi, effectively losing the top (N * padding bits). Reviewed By: lebedev.ri Differential Revision: https://reviews.llvm.org/D97465 (cherry picked from commit 4f024938e4c932feba4d28573ec4522106f8d879) --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 22 ++++++------------ .../Transforms/LoopVectorize/irregular_type.ll | 27 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 15 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/irregular_type.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index d36e078..b456a97 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -372,19 +372,11 @@ static Type *getMemInstValueType(Value *I) { /// A helper function that returns true if the given type is irregular. The /// type is irregular if its allocated size doesn't equal the store size of an -/// element of the corresponding vector type at the given vectorization factor. -static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) { - // Determine if an array of VF elements of type Ty is "bitcast compatible" - // with a vector. - if (VF.isVector()) { - auto *VectorTy = VectorType::get(Ty, VF); - return TypeSize::get(VF.getKnownMinValue() * - DL.getTypeAllocSize(Ty).getFixedValue(), - VF.isScalable()) != DL.getTypeStoreSize(VectorTy); - } - - // If the vectorization factor is one, we just check if an array of type Ty - // requires padding between elements. +/// element of the corresponding vector type. +static bool hasIrregularType(Type *Ty, const DataLayout &DL) { + // Determine if an array of N elements of type Ty is "bitcast compatible" + // with a vector. + // This is only true if there is no padding between the array elements. return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty); } @@ -5212,7 +5204,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = getMemInstValueType(I); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; // Check if masking is required. @@ -5259,7 +5251,7 @@ bool LoopVectorizationCostModel::memoryInstructionCanBeWidened( // requires padding and will be scalarized. auto &DL = I->getModule()->getDataLayout(); auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType(); - if (hasIrregularType(ScalarTy, DL, VF)) + if (hasIrregularType(ScalarTy, DL)) return false; return true; diff --git a/llvm/test/Transforms/LoopVectorize/irregular_type.ll b/llvm/test/Transforms/LoopVectorize/irregular_type.ll new file mode 100644 index 0000000..167a1a1 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/irregular_type.ll @@ -0,0 +1,27 @@ +; RUN: opt %s -loop-vectorize -force-vector-width=4 -S | FileCheck %s + +; Ensure the array loads/stores are not optimized into vector operations when +; the element type has padding bits. + +; CHECK: foo +; CHECK: vector.body +; CHECK-NOT: load <4 x i7> +; CHECK-NOT: store <4 x i7> +; CHECK: for.body +define void @foo(i7* %a, i64 %n) { +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i7, i7* %a, i64 %indvars.iv + %0 = load i7, i7* %arrayidx, align 1 + %sub = add nuw nsw i7 %0, 0 + store i7 %sub, i7* %arrayidx, align 1 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cmp = icmp eq i64 %indvars.iv.next, %n + br i1 %cmp, label %for.exit, label %for.body + +for.exit: + ret void +} -- cgit v1.1