From cdfb51295d814a875925974364931ef4337641e1 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154@yahoo.com>
Date: Thu, 16 Nov 2023 12:50:46 -0800
Subject: [MLGO] Remove -tfutils-use-simplelogger flag (#72492)

This flag was redundant and the value was not used anywhere, so it
should be removed.
---
 llvm/lib/Analysis/TrainingLogger.cpp | 5 -----
 1 file changed, 5 deletions(-)
diff --git a/llvm/lib/Analysis/TrainingLogger.cpp b/llvm/lib/Analysis/TrainingLogger.cpp
index e236890..344ca92 100644
--- a/llvm/lib/Analysis/TrainingLogger.cpp
+++ b/llvm/lib/Analysis/TrainingLogger.cpp
@@ -27,11 +27,6 @@
 
 using namespace llvm;
 
-// FIXME(mtrofin): remove the flag altogether
-static cl::opt<bool>
-    UseSimpleLogger("tfutils-use-simplelogger", cl::init(true), cl::Hidden,
-                    cl::desc("Output simple (non-protobuf) log."));
-
 void Logger::writeHeader(std::optional<TensorSpec> AdviceSpec) {
   json::OStream JOS(*OS);
   JOS.object([&]() {
-- 
cgit v1.1


From b2d62c9a58433e2a2ca8d2c9cd6b0b612dca2e76 Mon Sep 17 00:00:00 2001
From: PiJoules <6019989+PiJoules@users.noreply.github.com>
Date: Thu, 16 Nov 2023 13:11:15 -0800
Subject: [clang] Ensure fixed point conversions work in C++ (#68344)

---
 clang/include/clang/Sema/Overload.h           |  3 +++
 clang/lib/Sema/SemaDecl.cpp                   |  3 +++
 clang/lib/Sema/SemaExprCXX.cpp                | 30 +++++++++++++++++++++++++++
 clang/lib/Sema/SemaOverload.cpp               | 10 +++++++--
 clang/test/Frontend/fixed_point_conversions.c | 14 +++++++++++--
 5 files changed, 56 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
index a97968d..333309a 100644
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -192,6 +192,9 @@ class Sema;
     /// C-only conversion between pointers with incompatible types
     ICK_Incompatible_Pointer_Conversion,
 
+    /// Fixed point type conversions according to N1169.
+    ICK_Fixed_Point_Conversion,
+
     /// The number of conversion kinds
     ICK_Num_Conversion_Kinds,
   };
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index a6cd0bb..da8e2db 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -148,6 +148,9 @@ bool Sema::isSimpleTypeSpecifier(tok::TokenKind Kind) const {
   case tok::kw___ibm128:
   case tok::kw_wchar_t:
   case tok::kw_bool:
+  case tok::kw__Accum:
+  case tok::kw__Fract:
+  case tok::kw__Sat:
 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) case tok::kw___##Trait:
 #include "clang/Basic/TransformTypeTraits.def"
   case tok::kw___auto_type:
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 023411c..081b568 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -4506,6 +4506,36 @@ Sema::PerformImplicitConversion(Expr *From, QualType ToType,
                  .get();
     break;
 
+  case ICK_Fixed_Point_Conversion:
+    assert((FromType->isFixedPointType() || ToType->isFixedPointType()) &&
+           "Attempting implicit fixed point conversion without a fixed "
+           "point operand");
+    if (FromType->isFloatingType())
+      From = ImpCastExprToType(From, ToType, CK_FloatingToFixedPoint,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (ToType->isFloatingType())
+      From = ImpCastExprToType(From, ToType, CK_FixedPointToFloating,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (FromType->isIntegralType(Context))
+      From = ImpCastExprToType(From, ToType, CK_IntegralToFixedPoint,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (ToType->isIntegralType(Context))
+      From = ImpCastExprToType(From, ToType, CK_FixedPointToIntegral,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else if (ToType->isBooleanType())
+      From = ImpCastExprToType(From, ToType, CK_FixedPointToBoolean,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    else
+      From = ImpCastExprToType(From, ToType, CK_FixedPointCast,
+                               VK_PRValue,
+                               /*BasePath=*/nullptr, CCK).get();
+    break;
+
   case ICK_Compatible_Conversion:
     From = ImpCastExprToType(From, ToType, CK_NoOp, From->getValueKind(),
                              /*BasePath=*/nullptr, CCK).get();
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 858654e..9800d7f 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -158,7 +158,8 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) {
                      // it was omitted by the patch that added
                      // ICK_Zero_Queue_Conversion
     ICR_C_Conversion,
-    ICR_C_Conversion_Extension
+    ICR_C_Conversion_Extension,
+    ICR_Conversion,
   };
   static_assert(std::size(Rank) == (int)ICK_Num_Conversion_Kinds);
   return Rank[(int)Kind];
@@ -197,7 +198,8 @@ static const char* GetImplicitConversionName(ImplicitConversionKind Kind) {
     "OpenCL Zero Event Conversion",
     "OpenCL Zero Queue Conversion",
     "C specific type conversion",
-    "Incompatible pointer conversion"
+    "Incompatible pointer conversion",
+    "Fixed point conversion",
   };
   static_assert(std::size(Name) == (int)ICK_Num_Conversion_Kinds);
   return Name[Kind];
@@ -2189,6 +2191,9 @@ static bool IsStandardConversion(Sema &S, Expr* From, QualType ToType,
              From->isIntegerConstantExpr(S.getASTContext())) {
     SCS.Second = ICK_Compatible_Conversion;
     FromType = ToType;
+  } else if (ToType->isFixedPointType() || FromType->isFixedPointType()) {
+    SCS.Second = ICK_Fixed_Point_Conversion;
+    FromType = ToType;
   } else {
     // No second conversion required.
     SCS.Second = ICK_Identity;
@@ -5947,6 +5952,7 @@ static bool CheckConvertedConstantConversions(Sema &S,
   case ICK_Zero_Event_Conversion:
   case ICK_C_Only_Conversion:
   case ICK_Incompatible_Pointer_Conversion:
+  case ICK_Fixed_Point_Conversion:
     return false;
 
   case ICK_Lvalue_To_Rvalue:
diff --git a/clang/test/Frontend/fixed_point_conversions.c b/clang/test/Frontend/fixed_point_conversions.c
index ebd1d7e..efa3f1b 100644
--- a/clang/test/Frontend/fixed_point_conversions.c
+++ b/clang/test/Frontend/fixed_point_conversions.c
@@ -1,6 +1,12 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED
-// RUN: %clang_cc1 -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED
+// RUN: %clang_cc1 -x c -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED
+// RUN: %clang_cc1 -x c -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED
+// RUN: %clang_cc1 -x c++ -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,SIGNED
+// RUN: %clang_cc1 -x c++ -ffixed-point -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - -fpadding-on-unsigned-fixed-point | FileCheck %s --check-prefixes=CHECK,UNSIGNED
+
+#ifdef __cplusplus
+extern "C" {
+#endif
 
 short _Accum sa;
 _Accum a, a2;
@@ -994,3 +1000,7 @@ void float_sat5(void) {
 void float_sat6(void) {
   sat_uf = fl;
 }
+
+#ifdef __cplusplus
+}
+#endif
-- 
cgit v1.1


From 6f3b88baa2ac9ec892ed3ad7dd64d0d537010bc5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 16 Nov 2023 21:17:06 +0000
Subject: [VPlan] Move trunc ([s|z]ext A) simplifications to simplifyRecipe.

Split off simplification from D149903 as suggested.

This should be effectively NFC until D149903 lands.
---
 llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 25 +++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index c55864d..0eaaa03 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -816,15 +816,28 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
     break;
   }
   case Instruction::Trunc: {
-    VPRecipeBase *Zext = R.getOperand(0)->getDefiningRecipe();
-    if (!Zext || getOpcodeForRecipe(*Zext) != Instruction::ZExt)
+    VPRecipeBase *Ext = R.getOperand(0)->getDefiningRecipe();
+    if (!Ext)
       break;
-    VPValue *A = Zext->getOperand(0);
+    unsigned ExtOpcode = getOpcodeForRecipe(*Ext);
+    if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt)
+      break;
+    VPValue *A = Ext->getOperand(0);
     VPValue *Trunc = R.getVPSingleValue();
-    Type *TruncToTy = TypeInfo.inferScalarType(Trunc);
-    if (TruncToTy && TruncToTy == TypeInfo.inferScalarType(A))
+    Type *TruncTy = TypeInfo.inferScalarType(Trunc);
+    Type *ATy = TypeInfo.inferScalarType(A);
+    if (TruncTy == ATy) {
       Trunc->replaceAllUsesWith(A);
-
+    } else if (ATy->getScalarSizeInBits() < TruncTy->getScalarSizeInBits()) {
+      auto *VPC =
+          new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
+      VPC->insertBefore(&R);
+      Trunc->replaceAllUsesWith(VPC);
+    } else if (ATy->getScalarSizeInBits() > TruncTy->getScalarSizeInBits()) {
+      auto *VPC = new VPWidenCastRecipe(Instruction::Trunc, A, TruncTy);
+      VPC->insertBefore(&R);
+      Trunc->replaceAllUsesWith(VPC);
+    }
 #ifndef NDEBUG
     // Verify that the cached type info is for both A and its users is still
     // accurate by comparing it to freshly computed types.
-- 
cgit v1.1


From ff8815e597597a319ffde9d18e708040d226bbae Mon Sep 17 00:00:00 2001
From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com>
Date: Thu, 16 Nov 2023 13:21:49 -0800
Subject: [mlir][sparse] code cleanup (remove topSort in CodegenEnv). (#72550)

---
 .../Dialect/SparseTensor/Transforms/CodegenEnv.cpp |  55 +++--------
 .../Dialect/SparseTensor/Transforms/CodegenEnv.h   |  27 +----
 .../SparseTensor/Transforms/LoopEmitter.cpp        |  18 +---
 .../Dialect/SparseTensor/Transforms/LoopEmitter.h  |  10 +-
 .../SparseTensor/Transforms/Sparsification.cpp     | 110 ++++++---------------
 5 files changed, 50 insertions(+), 170 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
index 3a02d56..ad2f649 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
@@ -28,21 +28,13 @@ static bool isMaterializing(Value val) {
          val.getDefiningOp<bufferization::AllocTensorOp>();
 }
 
-/// Makes target array's elements sorted according to the `order` array.
-static void sortArrayBasedOnOrder(std::vector<LoopCoeffPair> &target,
-                                  ArrayRef<LoopId> order) {
+/// Sorts the dependent loops such that it is ordered in the same sequence in
+/// which loops will be generated.
+static void sortDependentLoops(std::vector<LoopCoeffPair> &target) {
   std::sort(target.begin(), target.end(),
-            [&order](const LoopCoeffPair &l, const LoopCoeffPair &r) {
+            [](const LoopCoeffPair &l, const LoopCoeffPair &r) {
               assert(std::addressof(l) == std::addressof(r) || l != r);
-              int idxL = -1, idxR = -1;
-              for (int i = 0, e = order.size(); i < e; i++) {
-                if (order[i] == l.first)
-                  idxL = i;
-                if (order[i] == r.first)
-                  idxR = i;
-              }
-              assert(idxL >= 0 && idxR >= 0);
-              return idxL < idxR;
+              return l.first < r.first;
             });
 }
 //===----------------------------------------------------------------------===//
@@ -54,14 +46,10 @@ CodegenEnv::CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
                        unsigned numFilterLoops, unsigned maxRank)
     : linalgOp(linop), sparseOptions(opts),
       latticeMerger(numTensors, numLoops, numFilterLoops, maxRank),
-      loopEmitter(), topSort(), sparseOut(nullptr), outerParNest(-1u),
-      insChain(), expValues(), expFilled(), expAdded(), expCount(), redVal(),
+      loopEmitter(), sparseOut(nullptr), outerParNest(-1u), insChain(),
+      expValues(), expFilled(), expAdded(), expCount(), redVal(),
       redExp(detail::kInvalidId), redCustom(detail::kInvalidId),
-      redValidLexInsert() {
-  // TODO: remove topSort, loops should be already sorted by previous pass.
-  for (unsigned l = 0; l < latticeMerger.getNumLoops(); l++)
-    topSort.push_back(l);
-}
+      redValidLexInsert() {}
 
 LogicalResult CodegenEnv::initTensorExp() {
   // Builds the tensor expression for the Linalg operation in SSA form.
@@ -97,7 +85,7 @@ void CodegenEnv::startEmit() {
     (void)enc;
     assert(!enc || lvlRank == enc.getLvlRank());
     for (Level lvl = 0; lvl < lvlRank; lvl++)
-      sortArrayBasedOnOrder(latticeMerger.getDependentLoops(tid, lvl), topSort);
+      sortDependentLoops(latticeMerger.getDependentLoops(tid, lvl));
   }
 
   loopEmitter.initialize(
@@ -105,7 +93,7 @@ void CodegenEnv::startEmit() {
       StringAttr::get(linalgOp.getContext(),
                       linalg::GenericOp::getOperationName()),
       /*hasOutput=*/true,
-      /*isSparseOut=*/sparseOut != nullptr, topSort,
+      /*isSparseOut=*/sparseOut != nullptr, /*numLoops=*/getLoopNum(),
       // TODO: compute the map and pass it to loop emitter directly instead of
       // passing in a callback.
       /*dependentLvlGetter=*/
@@ -190,8 +178,7 @@ bool CodegenEnv::isAdmissibleTensorExp(ExprId exp) {
   // needed.
   outerParNest = 0;
   const auto iteratorTypes = linalgOp.getIteratorTypesArray();
-  assert(topSortSize() == latticeMerger.getNumLoops());
-  for (const LoopId i : topSort) {
+  for (unsigned i = 0, e = getLoopNum(); i < e; i++) {
     if (linalg::isReductionIterator(iteratorTypes[i]))
       break; // terminate at first reduction
     outerParNest++;
@@ -208,26 +195,8 @@ bool CodegenEnv::isAdmissibleTensorExp(ExprId exp) {
 // Code generation environment topological sort methods
 //===----------------------------------------------------------------------===//
 
-ArrayRef<LoopId> CodegenEnv::getTopSortSlice(LoopOrd n, LoopOrd m) const {
-  return ArrayRef<LoopId>(topSort).slice(n, m);
-}
-
-ArrayRef<LoopId> CodegenEnv::getLoopStackUpTo(LoopOrd n) const {
-  return ArrayRef<LoopId>(topSort).take_front(n);
-}
-
-ArrayRef<LoopId> CodegenEnv::getCurrentLoopStack() const {
-  return getLoopStackUpTo(loopEmitter.getCurrentDepth());
-}
-
 Value CodegenEnv::getLoopVar(LoopId i) const {
-  // TODO: this class should store the inverse of `topSort` so that
-  // it can do this conversion directly, instead of searching through
-  // `topSort` every time.  (Or else, `LoopEmitter` should handle this.)
-  for (LoopOrd n = 0, numLoops = topSortSize(); n < numLoops; n++)
-    if (topSort[n] == i)
-      return loopEmitter.getLoopIV(n);
-  llvm_unreachable("invalid loop identifier");
+  return loopEmitter.getLoopIV(i);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
index c0fc505..af783b9 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
@@ -83,6 +83,8 @@ public:
   }
   DimLevelType dlt(TensorLoopId b) const { return latticeMerger.getLvlType(b); }
 
+  unsigned getLoopNum() const { return latticeMerger.getNumLoops(); }
+
   //
   // LoopEmitter delegates.
   //
@@ -107,6 +109,8 @@ public:
     return loopEmitter.unpackTensorLevelRange(std::forward<ContainerTy>(c));
   }
 
+  unsigned getLoopDepth() const { return loopEmitter.getCurrentDepth(); }
+
   //
   // Code generation environment verify functions.
   //
@@ -115,25 +119,6 @@ public:
   /// It also sets the sparseOut if the output tensor is sparse.
   bool isAdmissibleTensorExp(ExprId e);
 
-  /// Whether the iteration graph is sorted in admissible topoOrder.
-  /// Sets outerParNest on success with sparse output
-  bool isAdmissibleTopoOrder();
-
-  //
-  // Topological delegate and sort methods.
-  //
-
-  LoopOrd topSortSize() const { return topSort.size(); }
-  LoopId topSortAt(LoopOrd n) const { return topSort.at(n); }
-  void topSortPushBack(LoopId i) { topSort.push_back(i); }
-  void topSortClear(size_t capacity = 0) {
-    topSort.clear();
-    topSort.reserve(capacity);
-  }
-
-  ArrayRef<LoopId> getTopSortSlice(LoopOrd n, LoopOrd m) const;
-  ArrayRef<LoopId> getLoopStackUpTo(LoopOrd n) const;
-  ArrayRef<LoopId> getCurrentLoopStack() const;
   /// Returns the induction-variable for the loop identified by the given
   /// `LoopId`.  This method handles application of the topological sort
   /// in order to convert the `LoopId` into the corresponding `LoopOrd`.
@@ -191,10 +176,6 @@ private:
   // Loop emitter helper class.
   LoopEmitter loopEmitter;
 
-  // Topological sort.  This serves as a mapping from `LoopOrd` to `LoopId`
-  // (cf., `getLoopVar` and `topSortAt`).
-  std::vector<LoopId> topSort;
-
   // Sparse tensor as output. Implemented either through direct injective
   // insertion in lexicographic index order or through access pattern
   // expansion in the innermost loop nest (`expValues` through `expCount`).
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
index 81ce525..ba798f0 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.cpp
@@ -276,13 +276,13 @@ Value LoopEmitter::genSparseCrd(OpBuilder &builder, Location loc, TensorId tid,
 }
 
 LoopEmitter::LoopEmitter(ValueRange tensors, StringAttr loopTag, bool hasOutput,
-                         bool isSparseOut, ArrayRef<LoopId> topSort,
+                         bool isSparseOut, unsigned numLoops,
                          DependentLvlGetter dimGetter) {
-  initialize(tensors, loopTag, hasOutput, isSparseOut, topSort, dimGetter);
+  initialize(tensors, loopTag, hasOutput, isSparseOut, numLoops, dimGetter);
 }
 
 void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
-                             bool isSparseOut, ArrayRef<LoopId> topSort,
+                             bool isSparseOut, unsigned numLoops,
                              DependentLvlGetter dimGetter) {
   // First initialize the top-level type of the fields.
   this->loopTag = loopTag;
@@ -308,10 +308,8 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
   this->sliceOffsets.assign(numTensors, std::vector<Value>());
   this->sliceStrides.assign(numTensors, std::vector<Value>());
 
-  const LoopOrd numLoops = topSort.size();
   // These zeros will be overwritten below, but we need to initialize
   // them to something since we'll need random-access assignment.
-  this->loopIdToOrd.assign(numLoops, 0);
   this->loopStack.reserve(numLoops);
   this->loopSeqStack.reserve(numLoops);
 
@@ -387,13 +385,6 @@ void LoopEmitter::initialize(ValueRange ts, StringAttr loopTag, bool hasOutput,
       }
     }
   }
-
-  // Construct the inverse of the `topSort` from the sparsifier.
-  // This is needed to map `AffineDimExpr`s back to the `LoopOrd`
-  // used in loop emitter.
-  // FIXME: This map should be maintained outside loop emitter.
-  for (LoopOrd n = 0; n < numLoops; n++)
-    loopIdToOrd[topSort[n]] = n;
 }
 
 void LoopEmitter::initializeLoopEmit(
@@ -611,8 +602,7 @@ Value LoopEmitter::genAffine(OpBuilder &builder, Location loc, AffineExpr a) {
     // However, elsewhere we have been lead to expect that `loopIdToOrd`
     // should be indexed by `LoopId`...
     const auto loopId = cast<AffineDimExpr>(a).getPosition();
-    assert(loopId < loopIdToOrd.size());
-    return loopStack[loopIdToOrd[loopId]].iv;
+    return loopStack[loopId].iv;
   }
   case AffineExprKind::Add: {
     auto binOp = cast<AffineBinaryOpExpr>(a);
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
index c6518de..320b397 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/LoopEmitter.h
@@ -113,12 +113,11 @@ public:
   /// to `LoopId`.
   void initialize(ValueRange tensors, StringAttr loopTag = nullptr,
                   bool hasOutput = false, bool isSparseOut = false,
-                  ArrayRef<LoopId> topSort = {},
-                  DependentLvlGetter getter = nullptr);
+                  unsigned numLoops = 0, DependentLvlGetter getter = nullptr);
 
   explicit LoopEmitter(ValueRange tensors, StringAttr loopTag = nullptr,
                        bool hasOutput = false, bool isSparseOut = false,
-                       ArrayRef<LoopId> topSort = {},
+                       unsigned numLoops = 0,
                        DependentLvlGetter getter = nullptr);
 
   /// Starts a loop emitting session by generating all the buffers needed
@@ -751,11 +750,6 @@ private:
   // TODO: maybe we should have a LoopSeqInfo
   std::vector<std::pair<Value, std::vector<std::tuple<TensorId, Level, bool>>>>
       loopSeqStack;
-
-  /// Maps `LoopId` (used by `AffineDimExpr`) to `LoopOrd` (in the `loopStack`).
-  /// TODO: We should probably use a callback function here to make it more
-  /// general.
-  std::vector<LoopOrd> loopIdToOrd;
 };
 
 } // namespace sparse_tensor
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index cd6f689a..ec96ce2 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -49,8 +49,8 @@ using namespace mlir::sparse_tensor;
 // (assuming that's the actual meaning behind the "idx"-vs-"ldx" convention).
 
 /// Determines if affine expression is invariant.
-static bool isInvariantAffine(AffineExpr a, ArrayRef<LoopId> loopStack,
-                              LoopId ldx, bool &isAtLoop) {
+static bool isInvariantAffine(AffineExpr a, unsigned loopDepth, LoopId ldx,
+                              bool &isAtLoop) {
   switch (a.getKind()) {
   case AffineExprKind::DimId: {
     const LoopId i = cast<AffineDimExpr>(a).getPosition();
@@ -59,19 +59,14 @@ static bool isInvariantAffine(AffineExpr a, ArrayRef<LoopId> loopStack,
       // Must be invariant if we are at the given loop.
       return true;
     }
-    bool isInvariant = false;
-    for (LoopId l : loopStack) {
-      isInvariant = (l == i);
-      if (isInvariant)
-        break;
-    }
-    return isInvariant;
+    // The DimExpr is invariant the loop has already been generated.
+    return i < loopDepth;
   }
   case AffineExprKind::Add:
   case AffineExprKind::Mul: {
     auto binOp = cast<AffineBinaryOpExpr>(a);
-    return isInvariantAffine(binOp.getLHS(), loopStack, ldx, isAtLoop) &&
-           isInvariantAffine(binOp.getRHS(), loopStack, ldx, isAtLoop);
+    return isInvariantAffine(binOp.getLHS(), loopDepth, ldx, isAtLoop) &&
+           isInvariantAffine(binOp.getRHS(), loopDepth, ldx, isAtLoop);
   }
   default: {
     assert(isa<AffineConstantExpr>(a));
@@ -80,12 +75,6 @@ static bool isInvariantAffine(AffineExpr a, ArrayRef<LoopId> loopStack,
   }
 }
 
-/// Determines if affine expression is invariant.
-static bool isInvariantAffine(CodegenEnv &env, AffineExpr a, LoopId ldx,
-                              bool &isAtLoop) {
-  return isInvariantAffine(a, env.getCurrentLoopStack(), ldx, isAtLoop);
-}
-
 /// Helper method to inspect affine expressions. Rejects cases where the
 /// same index is used more than once. Also rejects compound affine
 /// expressions in sparse dimensions.
@@ -351,17 +340,6 @@ static void genBuffers(CodegenEnv &env, OpBuilder &builder) {
       llvm::cast<linalg::LinalgOp>(op.getOperation())
           .createLoopRanges(builder, loc);
 
-  assert(loopRange.size() == env.merger().getStartingFilterLoopId());
-  SmallVector<Range, 4> sortedRange;
-  for (unsigned i = 0, e = env.topSortSize(); i < e; i++) {
-    LoopId ldx = env.topSortAt(i);
-    // FIXME: Gets rid of filter loops since we have a better algorithm to deal
-    // with affine index expression.
-    if (ldx < env.merger().getStartingFilterLoopId()) {
-      sortedRange.push_back(loopRange[ldx]);
-    }
-  }
-
   env.emitter().initializeLoopEmit(
       builder, loc,
       /// Generates buffer for the output tensor.
@@ -396,15 +374,14 @@ static void genBuffers(CodegenEnv &env, OpBuilder &builder) {
         }
         return init;
       },
-      [&sortedRange, &env](OpBuilder &b, Location loc, Level l) {
-        assert(l < env.topSortSize());
+      [&loopRange, &env](OpBuilder &b, Location loc, Level l) {
+        assert(l < env.getLoopNum());
         // FIXME: Remove filter loop since we have a better algorithm to
         // deal with affine index expression.
         if (l >= env.merger().getStartingFilterLoopId())
           return Value();
 
-        return mlir::getValueOrCreateConstantIndexOp(b, loc,
-                                                     sortedRange[l].size);
+        return mlir::getValueOrCreateConstantIndexOp(b, loc, loopRange[l].size);
       });
 }
 
@@ -762,7 +739,7 @@ static void genInvariants(CodegenEnv &env, OpBuilder &builder, ExprId exp,
           return;
         if (*sldx == ldx)
           isAtLoop = true;
-      } else if (!isInvariantAffine(env, a, ldx, isAtLoop))
+      } else if (!isInvariantAffine(a, env.getLoopDepth(), ldx, isAtLoop))
         return; // still in play
     }
     // All exhausted at this level (isAtLoop denotes exactly at this LoopId).
@@ -890,29 +867,6 @@ static bool shouldTryParallize(CodegenEnv &env, LoopId ldx, bool isOuter,
   return isParallelFor(env, isOuter, isSparse);
 }
 
-/// Generates a "filter loop" on the given tid level to locate a coordinate that
-/// is of the same value as evaluated by the affine expression in its matching
-/// indexing map.
-static Operation *genFilterLoop(CodegenEnv &env, OpBuilder &builder, LoopId ldx,
-                                TensorLevel tidLvl) {
-  linalg::GenericOp op = env.op();
-  Location loc = op.getLoc();
-  Operation *loop = *env.genLoopBoundary([&](MutableArrayRef<Value> reduc) {
-    assert(env.merger().isFilterLoop(ldx));
-    const auto [tid, lvl] = env.unpackTensorLevel(tidLvl);
-    // tids/lvls must only have one value because filter loops only
-    // corresponding to the one and only sparse tensor level.
-    OpOperand *t = &op->getOpOperand(tid);
-    auto enc = getSparseTensorEncoding(t->get().getType());
-    // Retrieves the affine expression for the filter loop.
-    // FIXME: `toOrigDim` is deprecated.
-    AffineExpr a = op.getMatchingIndexingMap(t).getResult(toOrigDim(enc, lvl));
-    return env.emitter().enterFilterLoopOverTensorAtLvl(builder, loc, tid, lvl,
-                                                        a, reduc);
-  });
-  return loop;
-}
-
 /// Emit a loop to coiterate over the list of tensor levels. The generated loop
 /// can either be a for loop or while loop depending on whether there is at most
 /// one sparse level in the list.
@@ -934,14 +888,8 @@ static Operation *genCoIteration(CodegenEnv &env, OpBuilder &builder,
 /// singleton iteration or co-iteration over the given conjunction.
 static Operation *genLoop(CodegenEnv &env, OpBuilder &builder, LoopOrd at,
                           bool needsUniv, ArrayRef<TensorLevel> tidLvls) {
-  const LoopId ldx = env.topSortAt(at);
-  if (env.merger().isFilterLoop(ldx)) {
-    assert(tidLvls.size() == 1);
-    return genFilterLoop(env, builder, ldx, tidLvls.front());
-  }
-
-  bool tryParallel = shouldTryParallize(env, ldx, at == 0, tidLvls);
-  return genCoIteration(env, builder, ldx, tidLvls, tryParallel, needsUniv);
+  bool tryParallel = shouldTryParallize(env, at, at == 0, tidLvls);
+  return genCoIteration(env, builder, at, tidLvls, tryParallel, needsUniv);
 }
 
 /// Generates the induction structure for a while-loop.
@@ -1066,12 +1014,12 @@ static void endIf(CodegenEnv &env, OpBuilder &builder, scf::IfOp ifOp,
 /// Starts a loop sequence at given level. Returns true if
 /// the universal loop index must be maintained at this level.
 static bool startLoopSeq(CodegenEnv &env, OpBuilder &builder, ExprId exp,
-                         LoopOrd at, LoopId idx, LoopId ldx, LatSetId lts) {
+                         LoopOrd idx, LoopId ldx, LatSetId lts) {
   assert(!env.getLoopVar(idx));
   // Emit invariants at this loop sequence level.
   genInvariants(env, builder, exp, ldx, /*atStart=*/true);
   // Emit access pattern expansion for sparse tensor output.
-  genExpand(env, builder, at, /*atStart=*/true);
+  genExpand(env, builder, idx, /*atStart=*/true);
   // Emit further intitialization at this loop sequence level.
   const LatPointId l0 = env.set(lts)[0];
   bool needsUniv = false;
@@ -1226,7 +1174,8 @@ static bool translateBitsToTidLvlPairs(
             // Constant affine expression are handled in genLoop
             if (!isa<AffineConstantExpr>(exp)) {
               bool isAtLoop = false;
-              if (isInvariantAffine(env, exp, ldx, isAtLoop) && isAtLoop) {
+              if (isInvariantAffine(exp, env.getLoopDepth(), ldx, isAtLoop) &&
+                  isAtLoop) {
                 // If the compound affine is invariant and we are right at the
                 // level. We need to generate the address according to the
                 // affine expression. This is also the best place we can do it
@@ -1273,8 +1222,8 @@ static std::pair<Operation *, bool> startLoop(CodegenEnv &env,
   // becomes invariant and the address shall now be generated at the current
   // level.
   SmallVector<std::pair<TensorLevel, AffineExpr>> affineTidLvls;
-  bool isSingleCond = translateBitsToTidLvlPairs(env, li, env.topSortAt(at),
-                                                 tidLvls, affineTidLvls);
+  bool isSingleCond =
+      translateBitsToTidLvlPairs(env, li, at, tidLvls, affineTidLvls);
 
   // Emit the for/while-loop control.
   Operation *loop = genLoop(env, builder, at, needsUniv, tidLvls);
@@ -1324,13 +1273,13 @@ static bool endLoop(CodegenEnv &env, RewriterBase &rewriter, Operation *loop,
 
 /// Ends a loop sequence at given level.
 static void endLoopSeq(CodegenEnv &env, OpBuilder &builder, unsigned exp,
-                       unsigned at, unsigned idx, unsigned ldx) {
+                       unsigned idx, unsigned ldx) {
   assert(!env.getLoopVar(idx));
   env.emitter().exitCurrentLoopSeq(builder, env.op().getLoc());
   // Unmark bookkeeping of invariants and loop index.
   genInvariants(env, builder, exp, ldx, /*atStart=*/false);
   // Finalize access pattern expansion for sparse tensor output.
-  genExpand(env, builder, at, /*atStart=*/false);
+  genExpand(env, builder, idx, /*atStart=*/false);
 }
 
 /// Recursively generates code while computing iteration lattices in order
@@ -1339,22 +1288,19 @@ static void endLoopSeq(CodegenEnv &env, OpBuilder &builder, unsigned exp,
 static void genStmt(CodegenEnv &env, RewriterBase &rewriter, ExprId exp,
                     LoopOrd at) {
   // At each leaf, assign remaining tensor (sub)expression to output tensor.
-  if (at == env.topSortSize()) {
-    const LoopId ldx = env.topSortAt(at - 1);
-    Value rhs = genExp(env, rewriter, exp, ldx);
+  if (at == env.getLoopNum()) {
+    Value rhs = genExp(env, rewriter, exp, at - 1);
     genTensorStore(env, rewriter, exp, rhs);
     return;
   }
 
   // Construct iteration lattices for current loop index, with L0 at top.
-  const LoopId idx = env.topSortAt(at);
-  const LoopId ldx = at == 0 ? ::mlir::sparse_tensor::detail::kInvalidId
-                             : env.topSortAt(at - 1);
+  const LoopId ldx = at == 0 ? sparse_tensor::detail::kInvalidId : at - 1;
   const LatSetId lts =
-      env.merger().optimizeSet(env.merger().buildLattices(exp, idx));
+      env.merger().optimizeSet(env.merger().buildLattices(exp, at));
 
   // Start a loop sequence.
-  bool needsUniv = startLoopSeq(env, rewriter, exp, at, idx, ldx, lts);
+  bool needsUniv = startLoopSeq(env, rewriter, exp, at, ldx, lts);
 
   // Emit a loop for every lattice point L0 >= Li in this loop sequence.
   //
@@ -1382,7 +1328,7 @@ static void genStmt(CodegenEnv &env, RewriterBase &rewriter, ExprId exp,
       if (li == lj || env.merger().latGT(li, lj)) {
         // Recurse into body of each branch.
         if (!isSingleCond) {
-          scf::IfOp ifOp = genIf(env, rewriter, idx, lj);
+          scf::IfOp ifOp = genIf(env, rewriter, at, lj);
           genStmt(env, rewriter, ej, at + 1);
           endIf(env, rewriter, ifOp, redInput, cntInput, insInput, validIns);
         } else {
@@ -1392,11 +1338,11 @@ static void genStmt(CodegenEnv &env, RewriterBase &rewriter, ExprId exp,
     }
 
     // End a loop.
-    needsUniv = endLoop(env, rewriter, loop, idx, li, needsUniv, isSingleCond);
+    needsUniv = endLoop(env, rewriter, loop, at, li, needsUniv, isSingleCond);
   }
 
   // End a loop sequence.
-  endLoopSeq(env, rewriter, exp, at, idx, ldx);
+  endLoopSeq(env, rewriter, exp, at, ldx);
 }
 
 /// Converts the result computed by the sparse kernel into the required form.
-- 
cgit v1.1


From 73e963379e4d06ca75625f63a5604c286fe37040 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Thu, 16 Nov 2023 13:28:42 -0800
Subject: [RISCV] Add test coverage for partial buildvecs idioms

Test coverage for an upcoming set of changes
---
 .../RISCV/rvv/fixed-vectors-buildvec-of-binop.ll   | 112 +++++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-int-buildvec.ll        |  44 ++++++++
 2 files changed, 156 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 717dfb1..8055944 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -446,6 +446,25 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; This test previously failed with an assertion failure because constant shift
 ; amounts are type legalized early.
 define void @buggy(i32 %0) #0 {
+; RV32-LABEL: buggy:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a0
+; RV32-NEXT:    vadd.vv v8, v8, v8
+; RV32-NEXT:    vor.vi v8, v8, 1
+; RV32-NEXT:    vrgather.vi v9, v8, 0
+; RV32-NEXT:    vse32.v v9, (zero)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: buggy:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    slli a0, a0, 1
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a0
+; RV64-NEXT:    vor.vi v8, v8, 1
+; RV64-NEXT:    vrgather.vi v9, v8, 0
+; RV64-NEXT:    vse32.v v9, (zero)
+; RV64-NEXT:    ret
 entry:
   %mul.us.us.i.3 = shl i32 %0, 1
   %1 = insertelement <4 x i32> zeroinitializer, i32 %mul.us.us.i.3, i64 0
@@ -454,3 +473,96 @@ entry:
   store <4 x i32> %3, ptr null, align 16
   ret void
 }
+
+
+define <8 x i32> @add_constant_rhs_8xi32_vector_in(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_vector_in:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vslideup.vi v8, v10, 1
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 3
+; CHECK-NEXT:    ret
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 1
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 2
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 3
+  ret <8 x i32> %v3
+}
+
+define <8 x i32> @add_constant_rhs_8xi32_vector_in2(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_vector_in2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 5
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    ret
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 4
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7
+  ret <8 x i32> %v3
+}
+
+define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_vector_in3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    ret
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vin, i32 %e0, i32 0
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 2
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 4
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 6
+  ret <8 x i32> %v3
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index 8593937..d843750 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -804,3 +804,47 @@ define <16 x i8> @buildvec_not_vid_v16i8() {
 ; CHECK-NEXT:    ret
   ret <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 3, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0>
 }
+
+define <8 x i32> @prefix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: prefix_overwrite:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a0
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vslideup.vi v8, v10, 1
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 2
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 3
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x i32> %vin, i32 %a, i32 0
+  %v1 = insertelement <8 x i32> %v0, i32 %b, i32 1
+  %v2 = insertelement <8 x i32> %v1, i32 %c, i32 2
+  %v3 = insertelement <8 x i32> %v2, i32 %d, i32 3
+  ret <8 x i32> %v3
+}
+
+define <8 x i32> @suffix_overwrite(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: suffix_overwrite:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 5
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    ret
+  %v0 = insertelement <8 x i32> %vin, i32 %a, i32 4
+  %v1 = insertelement <8 x i32> %v0, i32 %b, i32 5
+  %v2 = insertelement <8 x i32> %v1, i32 %c, i32 6
+  %v3 = insertelement <8 x i32> %v2, i32 %d, i32 7
+  ret <8 x i32> %v3
+}
-- 
cgit v1.1


From ac3779e92ef9405fd2c602a08e8031f7b8aeedd8 Mon Sep 17 00:00:00 2001
From: David Li <57157229+david-xl@users.noreply.github.com>
Date: Thu, 16 Nov 2023 13:47:31 -0800
Subject: Enable Custom Lowering for fabs.v8f16 on AVX (#71730)

[X86]: Enable custom lowering for fabs.v8f16 on AVX

Currently, custom lowering of fabs.v8f16 requires AVX512FP16, which is
too restrictive. For v8f16 fabs lowering, no instructions in AVX512FP16
are needed. Without the fix, horribly inefficient code is generated
without AVX512FP16. Note instcombiner generates calls to intrinsics
@llvm.fabs.v8f16 when simplifyping AND <8 x half> operations.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  2 +
 llvm/test/CodeGen/X86/vec_fabs.ll       | 82 +++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c938121..7f9d971 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1396,6 +1396,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FMINIMUM,          VT, Custom);
     }
 
+    setOperationAction(ISD::FABS, MVT::v8f16, Custom);
+
     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
     // even though v8i16 is a legal type.
     setOperationPromotedToType(ISD::FP_TO_SINT,        MVT::v8i16, MVT::v8i32);
diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll
index 8876d2f..8af067d 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -2,10 +2,12 @@
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX2
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VL
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX512FP16
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VLDQ
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512FP16
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VLDQ
 
 define <2 x double> @fabs_v2f64(<2 x double> %p) {
@@ -137,6 +139,86 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 }
 declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
 
+define <8 x half> @fabs_v8f16(ptr %p) {
+; X86-AVX1-LABEL: fabs_v8f16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    movl 4(%esp), [[ADDRREG:%.*]]
+; X86-AVX1-NEXT:    vmovaps ([[ADDRREG]]), %xmm0
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    retl
+
+; X86-AVX2-LABEL: fabs_v8f16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    movl 4(%esp), [[REG:%.*]]
+; X86-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-AVX2-NEXT:    vpand ([[REG]]), %xmm0, %xmm0
+; X86-AVX2-NEXT:    retl
+
+; X64-AVX512VL-LABEL: fabs_v8f16:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-AVX512VL-NEXT:    vpand (%rdi), %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    retq
+
+; X64-AVX1-LABEL: fabs_v8f16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+
+; X64-AVX2-LABEL: fabs_v8f16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
+
+  %v = load <8 x half>, ptr %p, align 16
+  %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
+  ret <8 x half> %nnv
+}
+declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
+
+define <16 x half> @fabs_v16f16(ptr %p) {
+; X86-AVX512FP16-LABEL: fabs_v16f16:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
+; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]]
+; X86-AVX512FP16-NEXT:  vpand ([[REG]]), [[YMM]], [[YMM]]
+; X86-AVX512FP16-NEXT:  retl
+
+; X64-AVX512FP16-LABEL: fabs_v16f16:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]]
+; X64-AVX512FP16-NEXT:  vpand (%rdi), [[YMM]], [[YMM]]
+; X64-AVX512FP16-NEXT:  retq
+; 
+  %v = load <16 x half>, ptr %p, align 32
+  %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
+  ret <16 x half> %nnv
+}
+declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p)
+
+define <32 x half> @fabs_v32f16(ptr %p) {
+; X86-AVX512FP16-LABEL: fabs_v32f16:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
+; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]]
+; X86-AVX512FP16-NEXT:  vpandq ([[REG]]), [[ZMM]], [[ZMM]]
+; X86-AVX512FP16-NEXT:  retl
+
+; X64-AVX512FP16-LABEL: fabs_v32f16:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]]
+; X64-AVX512FP16-NEXT:  vpandq (%rdi), [[ZMM]], [[ZMM]]
+; X64-AVX512FP16-NEXT:  retq
+
+  %v = load <32 x half>, ptr %p, align 64
+  %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
+  ret <32 x half> %nnv
+}
+declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p)
+
+
 define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ; X86-AVX1-LABEL: fabs_v8f32:
 ; X86-AVX1:       # %bb.0:
-- 
cgit v1.1


From ae623d16d50c9f12de7ae7ac1aa11c9d6857e081 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 16 Nov 2023 13:48:04 -0800
Subject: [Driver,Gnu] Simplify -static -static-pie -shared -pie handling and
 suppress -shared -rdynamic warning

These options select different link modes (note: -shared -static can be
used together for musl and mingw). It makes sense to place them
together, which enables some simplification. The relevant ld options
are now consistently placed after -m, similar to GCC.

While here, suppress -Wunused-command-line-argument warning when -shared
-rdynamic are used together (introduced by commit
291f4a00232b5742940d67e2ecf9168631251317). It can be argued either way
whether the warning is justified (in ELF linkers --export-dynamic
functionality is subsumed by -shared), but it is not useful (users can
do -Wl,--export-dynamic, bypassing the driver diagnostic).
---
 clang/lib/Driver/ToolChains/Gnu.cpp | 55 +++++++++++++++----------------------
 clang/test/Driver/dynamic-linker.c  |  2 +-
 clang/test/Driver/linux-ld.c        | 18 ++----------
 clang/test/Driver/ohos.c            |  2 +-
 4 files changed, 27 insertions(+), 50 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index 7698648..ba95ce9 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -292,18 +292,6 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) {
   }
 }
 
-static bool getPIE(const ArgList &Args, const ToolChain &TC) {
-  if (Args.hasArg(options::OPT_shared) || Args.hasArg(options::OPT_static) ||
-      Args.hasArg(options::OPT_r) || Args.hasArg(options::OPT_static_pie))
-    return false;
-
-  Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
-                           options::OPT_nopie);
-  if (!A)
-    return TC.isPIEDefault(Args);
-  return A->getOption().matches(options::OPT_pie);
-}
-
 static bool getStaticPIE(const ArgList &Args, const ToolChain &TC) {
   bool HasStaticPIE = Args.hasArg(options::OPT_static_pie);
   // -no-pie is an alias for -nopie. So, handling -nopie takes care of
@@ -386,7 +374,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const bool isAndroid = ToolChain.getTriple().isAndroid();
   const bool IsIAMCU = ToolChain.getTriple().isOSIAMCU();
   const bool IsVE = ToolChain.getTriple().isVE();
-  const bool IsPIE = getPIE(Args, ToolChain);
   const bool IsStaticPIE = getStaticPIE(Args, ToolChain);
   const bool IsStatic = getStatic(Args);
   const bool HasCRTBeginEndFiles =
@@ -406,17 +393,6 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
 
-  if (IsPIE)
-    CmdArgs.push_back("-pie");
-
-  if (IsStaticPIE) {
-    CmdArgs.push_back("-static");
-    CmdArgs.push_back("-pie");
-    CmdArgs.push_back("--no-dynamic-linker");
-    CmdArgs.push_back("-z");
-    CmdArgs.push_back("text");
-  }
-
   if (Args.hasArg(options::OPT_s))
     CmdArgs.push_back("-s");
 
@@ -451,19 +427,32 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   if (Triple.isRISCV())
     CmdArgs.push_back("-X");
 
-  if (Args.hasArg(options::OPT_shared))
+  const bool IsShared = Args.hasArg(options::OPT_shared);
+  if (IsShared)
     CmdArgs.push_back("-shared");
-
-  if (IsStatic) {
+  bool IsPIE = false;
+  if (IsStaticPIE) {
+    CmdArgs.push_back("-static");
+    CmdArgs.push_back("-pie");
+    CmdArgs.push_back("--no-dynamic-linker");
+    CmdArgs.push_back("-z");
+    CmdArgs.push_back("text");
+  } else if (IsStatic) {
     CmdArgs.push_back("-static");
-  } else if (!Args.hasArg(options::OPT_r) &&
-             !Args.hasArg(options::OPT_shared) && !IsStaticPIE) {
+  } else if (!Args.hasArg(options::OPT_r)) {
     if (Args.hasArg(options::OPT_rdynamic))
       CmdArgs.push_back("-export-dynamic");
-
-    CmdArgs.push_back("-dynamic-linker");
-    CmdArgs.push_back(Args.MakeArgString(Twine(D.DyldPrefix) +
-                                         ToolChain.getDynamicLinker(Args)));
+    if (!IsShared) {
+      Arg *A = Args.getLastArg(options::OPT_pie, options::OPT_no_pie,
+                               options::OPT_nopie);
+      IsPIE = A ? A->getOption().matches(options::OPT_pie)
+                : ToolChain.isPIEDefault(Args);
+      if (IsPIE)
+        CmdArgs.push_back("-pie");
+      CmdArgs.push_back("-dynamic-linker");
+      CmdArgs.push_back(Args.MakeArgString(Twine(D.DyldPrefix) +
+                                           ToolChain.getDynamicLinker(Args)));
+    }
   }
 
   CmdArgs.push_back("-o");
diff --git a/clang/test/Driver/dynamic-linker.c b/clang/test/Driver/dynamic-linker.c
index 555e46ab..978907e 100644
--- a/clang/test/Driver/dynamic-linker.c
+++ b/clang/test/Driver/dynamic-linker.c
@@ -11,7 +11,7 @@
 // RUN: %clang -target x86_64-unknown-linux-gnu -### -shared /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 
 
-// RUN: %clang -target armv7-unknown-linux-gnueabi -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
+// RUN: %clang --target=armv7-unknown-linux-gnueabi -### -Werror -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 // RUN: %clang -target i386-unknown-linux-gnu -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 // RUN: %clang -target mips64-unknown-linux-gnu -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
 // RUN: %clang -target powerpc64-unknown-linux-gnu -### -shared -rdynamic /dev/null -o /dev/null 2>&1 | FileCheck -check-prefix CHECK-SHARED %s
diff --git a/clang/test/Driver/linux-ld.c b/clang/test/Driver/linux-ld.c
index 1e36665..7adb078 100644
--- a/clang/test/Driver/linux-ld.c
+++ b/clang/test/Driver/linux-ld.c
@@ -182,12 +182,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANG-LD-STATIC-PIE %s
 // CHECK-CLANG-LD-STATIC-PIE: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
-// CHECK-CLANG-LD-STATIC-PIE: "-static"
-// CHECK-CLANG-LD-STATIC-PIE: "-pie"
-// CHECK-CLANG-LD-STATIC-PIE: "--no-dynamic-linker"
-// CHECK-CLANG-LD-STATIC-PIE: "-z"
-// CHECK-CLANG-LD-STATIC-PIE: "text"
 // CHECK-CLANG-LD-STATIC-PIE: "-m" "elf_x86_64"
+// CHECK-CLANG-LD-STATIC-PIE-SAME: "-static" "-pie" "--no-dynamic-linker" "-z" "text"
 // CHECK-CLANG-LD-STATIC-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
@@ -197,12 +193,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANG-LD-STATIC-PIE-PIE %s
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "-static"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "-pie"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "--no-dynamic-linker"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "-z"
-// CHECK-CLANG-LD-STATIC-PIE-PIE: "text"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "-m" "elf_x86_64"
+// CHECK-CLANG-LD-STATIC-PIE-PIE-SAME: "-static" "-pie" "--no-dynamic-linker" "-z" "text"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-PIE: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
@@ -212,12 +204,8 @@
 // RUN:     --sysroot=%S/Inputs/basic_linux_tree \
 // RUN:   | FileCheck --check-prefix=CHECK-CLANG-LD-STATIC-PIE-STATIC %s
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "{{.*}}ld{{(.exe)?}}" "--sysroot=[[SYSROOT:[^"]+]]"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "-static"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "-pie"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "--no-dynamic-linker"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "-z"
-// CHECK-CLANG-LD-STATIC-PIE-STATIC: "text"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "-m" "elf_x86_64"
+// CHECK-CLANG-LD-STATIC-PIE-STATIC-SAME: "-static" "-pie" "--no-dynamic-linker" "-z" "text"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "{{.*}}rcrt1.o"
 // CHECK-CLANG-LD-STATIC-PIE-STATIC: "--start-group" "-lgcc" "-lgcc_eh" "-lc" "--end-group"
 //
diff --git a/clang/test/Driver/ohos.c b/clang/test/Driver/ohos.c
index 9b98c70..dfb7981 100644
--- a/clang/test/Driver/ohos.c
+++ b/clang/test/Driver/ohos.c
@@ -17,9 +17,9 @@
 // CHECK-NOT: "-fno-common"
 // CHECK: {{.*}}ld.lld{{.*}}" "--sysroot=[[SYSROOT]]"
 // CHECK-NOT: "--sysroot=[[SYSROOT]]"
-// CHECK: "-pie"
 // CHECK-NOT: "--build-id"
 // CHECK: "--hash-style=both"
+// CHECK: "-pie"
 // CHECK: "-dynamic-linker" "/lib/ld-musl-arm.so.1"
 // CHECK: Scrt1.o
 // CHECK: crti.o
-- 
cgit v1.1


From 102f7fce8d8251655be5bf6955af33e55bea9c9e Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@mozilla.com>
Date: Thu, 16 Nov 2023 21:55:43 +0000
Subject: [llvm] Reduce memory footprint of Debug metadata nodes (#71227)

Using a combination of reordering fields and using empty SubclassData32
/ SubclassData1, it's possible to improve the size of data structures
used to store debug info in the IR:

Before:

DILexicalBlock: 24
DILexicalBlockFile: 24
DIModule: 24
DITemplateParameter: 24
DICommonBlock: 24
DIMacro: 24
DICompileUnit: 56
DIType: 48
DINamespace: 24
DIVariable: 24
DIGlobalVariable: 32
DILocalVariable: 32
DILabel: 24

After:

DILexicalBlock: 24
DILexicalBlockFile: 16
DIModule: 16
DITemplateParameter: 16
DICommonBlock: 16
DIMacro: 16
DICompileUnit: 48
DIType: 40
DINamespace: 16
DIVariable: 24
DIGlobalVariable: 24
DILocalVariable: 32
DILabel: 16
---
 llvm/include/llvm/IR/DebugInfoMetadata.h | 115 +++++++++++++++++++------------
 llvm/lib/IR/DebugInfoMetadata.cpp        |  33 +++++----
 2 files changed, 90 insertions(+), 58 deletions(-)

diff --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index 1fe0543..3d9ee1c 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -128,6 +128,8 @@ public:
 /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*,
 /// defined in llvm/BinaryFormat/Dwarf.h).  Called \a DINode because it's
 /// potentially used for non-DWARF output.
+///
+/// Uses the SubclassData16 Metadata slot.
 class DINode : public MDNode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -227,6 +229,8 @@ public:
 /// (possibly empty) null-separated \a MDString header that contains arbitrary
 /// fields.  The remaining operands are \a dwarf_operands(), and are pointers
 /// to other metadata.
+///
+/// Uses the SubclassData32 Metadata slot.
 class GenericDINode : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -695,12 +699,13 @@ std::optional<StringRef> DIScope::getSource() const {
 /// TODO: Remove the hardcoded name and context, since many types don't use
 /// them.
 /// TODO: Split up flags.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIType : public DIScope {
   unsigned Line;
   DIFlags Flags;
   uint64_t SizeInBits;
   uint64_t OffsetInBits;
-  uint32_t AlignInBits;
 
 protected:
   DIType(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
@@ -716,7 +721,7 @@ protected:
     this->Line = Line;
     this->Flags = Flags;
     this->SizeInBits = SizeInBits;
-    this->AlignInBits = AlignInBits;
+    this->SubclassData32 = AlignInBits;
     this->OffsetInBits = OffsetInBits;
   }
 
@@ -735,7 +740,7 @@ public:
 
   unsigned getLine() const { return Line; }
   uint64_t getSizeInBits() const { return SizeInBits; }
-  uint32_t getAlignInBits() const { return AlignInBits; }
+  uint32_t getAlignInBits() const { return SubclassData32; }
   uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
   uint64_t getOffsetInBits() const { return OffsetInBits; }
   DIFlags getFlags() const { return Flags; }
@@ -1389,13 +1394,13 @@ public:
 
 private:
   unsigned SourceLanguage;
-  bool IsOptimized;
   unsigned RuntimeVersion;
-  unsigned EmissionKind;
   uint64_t DWOId;
+  unsigned EmissionKind;
+  unsigned NameTableKind;
+  bool IsOptimized;
   bool SplitDebugInlining;
   bool DebugInfoForProfiling;
-  unsigned NameTableKind;
   bool RangesBaseAddress;
 
   DICompileUnit(LLVMContext &C, StorageType Storage, unsigned SourceLanguage,
@@ -1876,6 +1881,10 @@ public:
 /// Debug location.
 ///
 /// A debug location in source code, used for debug info and otherwise.
+///
+/// Uses the SubclassData1, SubclassData16 and SubclassData32
+/// Metadata slots.
+
 class DILocation : public MDNode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -2161,17 +2170,20 @@ public:
   }
 };
 
+/// Debug lexical block.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DILexicalBlock : public DILexicalBlockBase {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
   uint16_t Column;
 
   DILexicalBlock(LLVMContext &C, StorageType Storage, unsigned Line,
                  unsigned Column, ArrayRef<Metadata *> Ops)
-      : DILexicalBlockBase(C, DILexicalBlockKind, Storage, Ops), Line(Line),
+      : DILexicalBlockBase(C, DILexicalBlockKind, Storage, Ops),
         Column(Column) {
+    SubclassData32 = Line;
     assert(Column < (1u << 16) && "Expected 16-bit column");
   }
   ~DILexicalBlock() = default;
@@ -2206,7 +2218,7 @@ public:
 
   TempDILexicalBlock clone() const { return cloneImpl(); }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   unsigned getColumn() const { return Column; }
 
   static bool classof(const Metadata *MD) {
@@ -2218,12 +2230,11 @@ class DILexicalBlockFile : public DILexicalBlockBase {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Discriminator;
-
   DILexicalBlockFile(LLVMContext &C, StorageType Storage,
                      unsigned Discriminator, ArrayRef<Metadata *> Ops)
-      : DILexicalBlockBase(C, DILexicalBlockFileKind, Storage, Ops),
-        Discriminator(Discriminator) {}
+      : DILexicalBlockBase(C, DILexicalBlockFileKind, Storage, Ops) {
+    SubclassData32 = Discriminator;
+  }
   ~DILexicalBlockFile() = default;
 
   static DILexicalBlockFile *getImpl(LLVMContext &Context, DILocalScope *Scope,
@@ -2255,7 +2266,7 @@ public:
                     (Scope, File, Discriminator))
 
   TempDILexicalBlockFile clone() const { return cloneImpl(); }
-  unsigned getDiscriminator() const { return Discriminator; }
+  unsigned getDiscriminator() const { return SubclassData32; }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DILexicalBlockFileKind;
@@ -2338,12 +2349,13 @@ DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
   return std::nullopt;
 }
 
+/// Debug lexical block.
+///
+/// Uses the SubclassData1 Metadata slot.
 class DINamespace : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned ExportSymbols : 1;
-
   DINamespace(LLVMContext &Context, StorageType Storage, bool ExportSymbols,
               ArrayRef<Metadata *> Ops);
   ~DINamespace() = default;
@@ -2373,7 +2385,7 @@ public:
 
   TempDINamespace clone() const { return cloneImpl(); }
 
-  bool getExportSymbols() const { return ExportSymbols; }
+  bool getExportSymbols() const { return SubclassData1; }
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
 
@@ -2387,11 +2399,11 @@ public:
 
 /// Represents a module in the programming language, for example, a Clang
 /// module, or a Fortran module.
+///
+/// Uses the SubclassData1 and SubclassData32 Metadata slots.
 class DIModule : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
-  unsigned LineNo;
-  bool IsDecl;
 
   DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
            bool IsDecl, ArrayRef<Metadata *> Ops);
@@ -2443,8 +2455,8 @@ public:
   StringRef getConfigurationMacros() const { return getStringOperand(3); }
   StringRef getIncludePath() const { return getStringOperand(4); }
   StringRef getAPINotesFile() const { return getStringOperand(5); }
-  unsigned getLineNo() const { return LineNo; }
-  bool getIsDecl() const { return IsDecl; }
+  unsigned getLineNo() const { return SubclassData32; }
+  bool getIsDecl() const { return SubclassData1; }
 
   Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
@@ -2460,13 +2472,15 @@ public:
 };
 
 /// Base class for template parameters.
+///
+/// Uses the SubclassData1 Metadata slot.
 class DITemplateParameter : public DINode {
 protected:
-  bool IsDefault;
-
   DITemplateParameter(LLVMContext &Context, unsigned ID, StorageType Storage,
                       unsigned Tag, bool IsDefault, ArrayRef<Metadata *> Ops)
-      : DINode(Context, ID, Storage, Tag, Ops), IsDefault(IsDefault) {}
+      : DINode(Context, ID, Storage, Tag, Ops) {
+    SubclassData1 = IsDefault;
+  }
   ~DITemplateParameter() = default;
 
 public:
@@ -2475,7 +2489,7 @@ public:
 
   MDString *getRawName() const { return getOperandAs<MDString>(0); }
   Metadata *getRawType() const { return getOperand(1); }
-  bool isDefault() const { return IsDefault; }
+  bool isDefault() const { return SubclassData1; }
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DITemplateTypeParameterKind ||
@@ -2572,9 +2586,10 @@ public:
 };
 
 /// Base class for variables.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIVariable : public DINode {
   unsigned Line;
-  uint32_t AlignInBits;
 
 protected:
   DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, signed Line,
@@ -2587,7 +2602,7 @@ public:
   StringRef getName() const { return getStringOperand(1); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
   DIType *getType() const { return cast_or_null<DIType>(getRawType()); }
-  uint32_t getAlignInBits() const { return AlignInBits; }
+  uint32_t getAlignInBits() const { return SubclassData32; }
   uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
   /// Determines the size of the variable's type.
   std::optional<uint64_t> getSizeInBits() const;
@@ -3161,9 +3176,10 @@ public:
   }
 };
 
+/// Debug common block.
+///
+/// Uses the SubclassData32 Metadata slot.
 class DICommonBlock : public DIScope {
-  unsigned LineNo;
-
   friend class LLVMContextImpl;
   friend class MDNode;
 
@@ -3205,7 +3221,7 @@ public:
   }
   StringRef getName() const { return getStringOperand(2); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
-  unsigned getLineNo() const { return LineNo; }
+  unsigned getLineNo() const { return SubclassData32; }
 
   Metadata *getRawScope() const { return getOperand(0); }
   Metadata *getRawDecl() const { return getOperand(1); }
@@ -3310,12 +3326,11 @@ public:
 
 /// Label.
 ///
+/// Uses the SubclassData32 Metadata slot.
 class DILabel : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
           ArrayRef<Metadata *> Ops);
   ~DILabel() = default;
@@ -3353,7 +3368,7 @@ public:
   DILocalScope *getScope() const {
     return cast_or_null<DILocalScope>(getRawScope());
   }
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   StringRef getName() const { return getStringOperand(1); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
 
@@ -3455,15 +3470,17 @@ public:
 };
 
 /// An imported module (C++ using directive or similar).
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIImportedEntity : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DIImportedEntity(LLVMContext &C, StorageType Storage, unsigned Tag,
                    unsigned Line, ArrayRef<Metadata *> Ops)
-      : DINode(C, DIImportedEntityKind, Storage, Tag, Ops), Line(Line) {}
+      : DINode(C, DIImportedEntityKind, Storage, Tag, Ops) {
+    SubclassData32 = Line;
+  }
   ~DIImportedEntity() = default;
 
   static DIImportedEntity *getImpl(LLVMContext &Context, unsigned Tag,
@@ -3499,7 +3516,7 @@ public:
 
   TempDIImportedEntity clone() const { return cloneImpl(); }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   DINode *getEntity() const { return cast_or_null<DINode>(getRawEntity()); }
   StringRef getName() const { return getStringOperand(2); }
@@ -3567,6 +3584,8 @@ public:
 /// \c DW_MACINFO_*, defined in llvm/BinaryFormat/Dwarf.h).  Called \a
 /// DIMacroNode
 /// because it's potentially used for non-DWARF output.
+///
+/// Uses the SubclassData16 Metadata slot.
 class DIMacroNode : public MDNode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -3611,15 +3630,18 @@ public:
   }
 };
 
+/// Macro
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIMacro : public DIMacroNode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DIMacro(LLVMContext &C, StorageType Storage, unsigned MIType, unsigned Line,
           ArrayRef<Metadata *> Ops)
-      : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops), Line(Line) {}
+      : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops) {
+    SubclassData32 = Line;
+  }
   ~DIMacro() = default;
 
   static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line,
@@ -3649,7 +3671,7 @@ public:
 
   TempDIMacro clone() const { return cloneImpl(); }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
 
   StringRef getName() const { return getStringOperand(0); }
   StringRef getValue() const { return getStringOperand(1); }
@@ -3662,15 +3684,18 @@ public:
   }
 };
 
+/// Macro file
+///
+/// Uses the SubclassData32 Metadata slot.
 class DIMacroFile : public DIMacroNode {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  unsigned Line;
-
   DIMacroFile(LLVMContext &C, StorageType Storage, unsigned MIType,
               unsigned Line, ArrayRef<Metadata *> Ops)
-      : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops), Line(Line) {}
+      : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops) {
+    SubclassData32 = Line;
+  }
   ~DIMacroFile() = default;
 
   static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType,
@@ -3711,7 +3736,7 @@ public:
     replaceOperandWith(1, Elements.get());
   }
 
-  unsigned getLine() const { return Line; }
+  unsigned getLine() const { return SubclassData32; }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
 
   DIMacroNodeArray getElements() const {
diff --git a/llvm/lib/IR/DebugInfoMetadata.cpp b/llvm/lib/IR/DebugInfoMetadata.cpp
index 943826c..927aefb 100644
--- a/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -915,11 +915,11 @@ DICompileUnit::DICompileUnit(LLVMContext &C, StorageType Storage,
                              bool DebugInfoForProfiling, unsigned NameTableKind,
                              bool RangesBaseAddress, ArrayRef<Metadata *> Ops)
     : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
-      SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
-      RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind), DWOId(DWOId),
-      SplitDebugInlining(SplitDebugInlining),
+      SourceLanguage(SourceLanguage), RuntimeVersion(RuntimeVersion),
+      DWOId(DWOId), EmissionKind(EmissionKind), NameTableKind(NameTableKind),
+      IsOptimized(IsOptimized), SplitDebugInlining(SplitDebugInlining),
       DebugInfoForProfiling(DebugInfoForProfiling),
-      NameTableKind(NameTableKind), RangesBaseAddress(RangesBaseAddress) {
+      RangesBaseAddress(RangesBaseAddress) {
   assert(Storage != Uniqued);
 }
 
@@ -1181,8 +1181,9 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
 
 DINamespace::DINamespace(LLVMContext &Context, StorageType Storage,
                          bool ExportSymbols, ArrayRef<Metadata *> Ops)
-    : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops),
-      ExportSymbols(ExportSymbols) {}
+    : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace, Ops) {
+  SubclassData1 = ExportSymbols;
+}
 DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
                                   MDString *Name, bool ExportSymbols,
                                   StorageType Storage, bool ShouldCreate) {
@@ -1196,8 +1197,9 @@ DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
 DICommonBlock::DICommonBlock(LLVMContext &Context, StorageType Storage,
                              unsigned LineNo, ArrayRef<Metadata *> Ops)
     : DIScope(Context, DICommonBlockKind, Storage, dwarf::DW_TAG_common_block,
-              Ops),
-      LineNo(LineNo) {}
+              Ops) {
+  SubclassData32 = LineNo;
+}
 DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
                                       Metadata *Decl, MDString *Name,
                                       Metadata *File, unsigned LineNo,
@@ -1211,8 +1213,10 @@ DICommonBlock *DICommonBlock::getImpl(LLVMContext &Context, Metadata *Scope,
 
 DIModule::DIModule(LLVMContext &Context, StorageType Storage, unsigned LineNo,
                    bool IsDecl, ArrayRef<Metadata *> Ops)
-    : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops),
-      LineNo(LineNo), IsDecl(IsDecl) {}
+    : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops) {
+  SubclassData1 = IsDecl;
+  SubclassData32 = LineNo;
+}
 DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *File,
                             Metadata *Scope, MDString *Name,
                             MDString *ConfigurationMacros,
@@ -1301,8 +1305,9 @@ DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
 DIVariable::DIVariable(LLVMContext &C, unsigned ID, StorageType Storage,
                        signed Line, ArrayRef<Metadata *> Ops,
                        uint32_t AlignInBits)
-    : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line),
-      AlignInBits(AlignInBits) {}
+    : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {
+  SubclassData32 = AlignInBits;
+}
 std::optional<uint64_t> DIVariable::getSizeInBits() const {
   // This is used by the Verifier so be mindful of broken types.
   const Metadata *RawType = getRawType();
@@ -1328,7 +1333,9 @@ std::optional<uint64_t> DIVariable::getSizeInBits() const {
 
 DILabel::DILabel(LLVMContext &C, StorageType Storage, unsigned Line,
                  ArrayRef<Metadata *> Ops)
-    : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops), Line(Line) {}
+    : DINode(C, DILabelKind, Storage, dwarf::DW_TAG_label, Ops) {
+  SubclassData32 = Line;
+}
 DILabel *DILabel::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
                           Metadata *File, unsigned Line, StorageType Storage,
                           bool ShouldCreate) {
-- 
cgit v1.1


From 5f64b940761002efcff04c40d6e882167d05197c Mon Sep 17 00:00:00 2001
From: Jason Molenda <jmolenda@apple.com>
Date: Thu, 16 Nov 2023 13:58:07 -0800
Subject: Clarify error messages on corefiles that no plugin handles (#72559)

These error messages are written in a way that makes sense to an lldb
developer, but not to an end user who asks lldb to run on a compressed
corefile or whatever. Simplfy the messages.
---
 lldb/source/Commands/CommandObjectTarget.cpp             | 8 +++-----
 lldb/source/Core/IOHandlerCursesGUI.cpp                  | 4 ++--
 lldb/test/API/commands/target/basic/TestTargetCommand.py | 2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index 8f052d0..58785cd 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -436,8 +436,7 @@ protected:
           error = process_sp->LoadCore();
 
           if (error.Fail()) {
-            result.AppendError(
-                error.AsCString("can't find plug-in for core file"));
+            result.AppendError(error.AsCString("unknown core file format"));
             return;
           } else {
             result.AppendMessageWithFormatv(
@@ -447,9 +446,8 @@ protected:
             on_error.release();
           }
         } else {
-          result.AppendErrorWithFormatv(
-              "Unable to find process plug-in for core file '{0}'\n",
-              core_file.GetPath());
+          result.AppendErrorWithFormatv("Unknown core file format '{0}'\n",
+                                        core_file.GetPath());
         }
       } else {
         result.AppendMessageWithFormat(
diff --git a/lldb/source/Core/IOHandlerCursesGUI.cpp b/lldb/source/Core/IOHandlerCursesGUI.cpp
index 22b8cc3..abf0b6b 100644
--- a/lldb/source/Core/IOHandlerCursesGUI.cpp
+++ b/lldb/source/Core/IOHandlerCursesGUI.cpp
@@ -3178,13 +3178,13 @@ public:
         m_debugger.GetListener(), llvm::StringRef(), &core_file_spec, false));
 
     if (!process_sp) {
-      SetError("Unable to find process plug-in for core file!");
+      SetError("Unknown core file format!");
       return;
     }
 
     Status status = process_sp->LoadCore();
     if (status.Fail()) {
-      SetError("Can't find plug-in for core file!");
+      SetError("Unknown core file format!");
       return;
     }
   }
diff --git a/lldb/test/API/commands/target/basic/TestTargetCommand.py b/lldb/test/API/commands/target/basic/TestTargetCommand.py
index 96e7fe8..cb7a5f3 100644
--- a/lldb/test/API/commands/target/basic/TestTargetCommand.py
+++ b/lldb/test/API/commands/target/basic/TestTargetCommand.py
@@ -447,7 +447,7 @@ class targetCommandTestCase(TestBase):
         self.expect(
             "target create -c '" + invalid_core_path + "'",
             error=True,
-            substrs=["Unable to find process plug-in for core file '"],
+            substrs=["Unknown core file format '"],
         )
 
     # Write only files don't seem to be supported on Windows.
-- 
cgit v1.1


From add20537cc764b8afb97df85acba7c21d6537ae3 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154@yahoo.com>
Date: Thu, 16 Nov 2023 13:36:10 -0800
Subject: Reland "[llvm-exegesis] Fix preservation of RDI in subprocess mode
 (#72458)"

This reverts commit 186db1bcb0096a6af348d7e17866c68fa2004068.

This relands commit 0718c1a8405ac130d72cd3525befed2911618cc7.

The REQUIRES flag in the test that was added only specified that the
machine needed to have the capability to execute the snippet rather than
actually run it with performance counters. This would work with
--dummy-perf-counters, but that is not currently supported in the
subprocess execution mode. So for now, we require the ability to
actually perform measurements to prevent test failures in configurations
that don't have libpfm or access to performance counters.
---
 .../X86/latency/subprocess-preserved-registers.s   | 30 ++++++++++++++++++++++
 llvm/tools/llvm-exegesis/lib/X86/Target.cpp        |  2 +-
 2 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s

diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
new file mode 100644
index 0000000..320d1d7
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/latency/subprocess-preserved-registers.s
@@ -0,0 +1,30 @@
+# REQUIRES: exegesis-can-measure-latency, x86_64-linux
+
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mode=latency -snippets-file=%s -execution-mode=subprocess | FileCheck %s
+
+# See comment in ./subprocess-abnormal-exit-code.s on the transient
+# PTRACE_ATTACH failure.
+# ALLOW_RETRIES: 2
+
+# Check that the value of the registers preserved in subprocess mode while
+# making the ioctl system call are actually preserved correctly.
+
+# LLVM-EXEGESIS-DEFREG RAX 11
+# LLVM-EXEGESIS-DEFREG RDI 13
+# LLVM-EXEGESIS-DEFREG RSI 17
+# LLVM-EXEGESIS-DEFREG R13 0
+# LLVM-EXEGESIS-DEFREG R12 127
+
+cmpq $0x11, %rax
+cmovneq %r12, %r13
+cmpq $0x13, %rdi
+cmovneq %r12, %r13
+cmpq $0x17, %rsi
+cmovneq %r12, %r13
+
+movq $60, %rax
+movq %r13, %rdi
+syscall
+
+# CHECK-NOT: error:           'Child benchmarking process exited with non-zero exit code: Child process returned with unknown exit code'
+
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index ac99e98..bf8fd9ec 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -1205,7 +1205,7 @@ ExegesisX86Target::configurePerfCounter(long Request, bool SaveRegisters) const
   if(SaveRegisters) {
     // Restore RAX, RDI, and RSI, in reverse order.
     generateRegisterStackPop(X86::RSI, ConfigurePerfCounterCode);
-    generateRegisterStackPop(X86::RIP, ConfigurePerfCounterCode);
+    generateRegisterStackPop(X86::RDI, ConfigurePerfCounterCode);
     generateRegisterStackPop(X86::RAX, ConfigurePerfCounterCode);
   }
   return ConfigurePerfCounterCode;
-- 
cgit v1.1


From 066c4524bc1d91b49048e7f05dc6e045bb3c9eef Mon Sep 17 00:00:00 2001
From: Jonathan Thackray <jthackray@users.noreply.github.com>
Date: Thu, 16 Nov 2023 22:08:58 +0000
Subject: [AArch64] Add support for Cortex-A520, Cortex-A720 and Cortex-X4 CPUs
 (#72395)

Cortex-A520, Cortex-A720 and Cortex-X4 are Armv9.2 AArch64 CPUs.

Technical Reference Manual for Cortex-A520:
   https://developer.arm.com/documentation/102517/latest/

Technical Reference Manual for Cortex-A720:
   https://developer.arm.com/documentation/102530/latest/

Technical Reference Manual for Cortex-X4:
   https://developer.arm.com/documentation/102484/latest/

Patch co-authored by: Sivan Shani <sivan.shani@arm.com>
---
 clang/docs/ReleaseNotes.rst                        |  6 +++
 clang/test/Driver/aarch64-mcpu.c                   |  6 +++
 clang/test/Misc/target-invalid-cpu-note.c          |  4 +-
 llvm/docs/ReleaseNotes.rst                         |  2 +
 .../llvm/TargetParser/AArch64TargetParser.h        | 17 +++++++++
 llvm/lib/Target/AArch64/AArch64.td                 | 43 ++++++++++++++++++++++
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp       |  3 ++
 llvm/lib/Target/AArch64/AArch64Subtarget.h         |  3 ++
 llvm/lib/TargetParser/Host.cpp                     |  3 ++
 llvm/unittests/TargetParser/TargetParserTest.cpp   | 40 +++++++++++++++++++-
 10 files changed, 124 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index ed1a978..31ebe89 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -760,6 +760,12 @@ Arm and AArch64 Support
 
 - New AArch64 asm constraints have been added for r8-r11(Uci) and r12-r15(Ucj).
 
+  Support has been added for the following processors (-mcpu identifiers in parenthesis):
+
+  * Arm Cortex-A520 (cortex-a520).
+  * Arm Cortex-A720 (cortex-a720).
+  * Arm Cortex-X4 (cortex-x4).
+
 Android Support
 ^^^^^^^^^^^^^^^
 
diff --git a/clang/test/Driver/aarch64-mcpu.c b/clang/test/Driver/aarch64-mcpu.c
index 321d3a73..511482a 100644
--- a/clang/test/Driver/aarch64-mcpu.c
+++ b/clang/test/Driver/aarch64-mcpu.c
@@ -44,12 +44,16 @@
 // CORTEXX1C: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x1c"
 // RUN: %clang --target=aarch64 -mcpu=cortex-x3 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXX3 %s
 // CORTEXX3: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x3"
+// RUN: %clang --target=aarch64 -mcpu=cortex-x4 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-X4 %s
+// CORTEX-X4: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-x4"
 // RUN: %clang --target=aarch64 -mcpu=cortex-a78  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXA78 %s
 // CORTEXA78: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78"
 // RUN: %clang --target=aarch64 -mcpu=cortex-a78c  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A78C %s
 // CORTEX-A78C: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a78c"
 // RUN: %clang --target=aarch64 -mcpu=cortex-a715  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A715 %s
 // CORTEX-A715: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a715"
+// RUN: %clang --target=aarch64 -mcpu=cortex-a720  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A720 %s
+// CORTEX-A720: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a720"
 // RUN: %clang --target=aarch64 -mcpu=neoverse-e1  -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-E1 %s
 // NEOVERSE-E1: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-e1"
 // RUN: %clang --target=aarch64 -mcpu=neoverse-v1  -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-V1 %s
@@ -62,6 +66,8 @@
 // NEOVERSE-N2: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-n2"
 // RUN: %clang --target=aarch64 -mcpu=neoverse-512tvb -### -c %s 2>&1 | FileCheck -check-prefix=NEOVERSE-512TVB %s
 // NEOVERSE-512TVB: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "neoverse-512tvb"
+// RUN: %clang --target=aarch64 -mcpu=cortex-a520 -### -c %s 2>&1 | FileCheck -check-prefix=CORTEX-A520 %s
+// CORTEX-A520: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-a520"
 
 // RUN: %clang --target=aarch64 -mcpu=cortex-r82  -### -c %s 2>&1 | FileCheck -check-prefix=CORTEXR82 %s
 // CORTEXR82: "-cc1"{{.*}} "-triple" "aarch64{{.*}}" "-target-cpu" "cortex-r82"
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 8e91eb4..25ff51e 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -5,11 +5,11 @@
 
 // RUN: not %clang_cc1 -triple arm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AARCH64
 // AARCH64: error: unknown target CPU 'not-a-cpu'
-// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
+// AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16, apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
 
 // RUN: not %clang_cc1 -triple arm64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_AARCH64
 // TUNE_AARCH64: error: unknown target CPU 'not-a-cpu'
-// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16,  apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
+// TUNE_AARCH64-NEXT: note: valid target CPU values are: cortex-a34, cortex-a35, cortex-a53, cortex-a55, cortex-a510, cortex-a520, cortex-a57, cortex-a65, cortex-a65ae, cortex-a72, cortex-a73, cortex-a75, cortex-a76, cortex-a76ae, cortex-a77, cortex-a78, cortex-a78c, cortex-a710, cortex-a715, cortex-a720, cortex-r82, cortex-x1, cortex-x1c, cortex-x2, cortex-x3, cortex-x4, neoverse-e1, neoverse-n1, neoverse-n2, neoverse-512tvb, neoverse-v1, neoverse-v2, cyclone, apple-a7, apple-a8, apple-a9, apple-a10, apple-a11, apple-a12, apple-a13, apple-a14, apple-a15, apple-a16,  apple-m1, apple-m2, apple-s4, apple-s5, exynos-m3, exynos-m4, exynos-m5, falkor, saphira, kryo, thunderx2t99, thunderx3t110, thunderx, thunderxt88, thunderxt81, thunderxt83, tsv110, a64fx, carmel, ampere1, ampere1a, grace{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index ff767d9..eccac66 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -102,6 +102,8 @@ Changes to the AMDGPU Backend
 
 * Implemented :ref:`llvm.get.rounding <int_get_rounding>`
 
+* Added support for Cortex-A520, Cortex-A720 and Cortex-X4 CPUs.
+
 Changes to the ARM Backend
 --------------------------
 
diff --git a/llvm/include/llvm/TargetParser/AArch64TargetParser.h b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
index 4505f6e..c74b7b8 100644
--- a/llvm/include/llvm/TargetParser/AArch64TargetParser.h
+++ b/llvm/include/llvm/TargetParser/AArch64TargetParser.h
@@ -426,6 +426,11 @@ inline constexpr CpuInfo CpuInfos[] = {
           AArch64::AEK_PAUTH, AArch64::AEK_MTE, AArch64::AEK_SSBS,
           AArch64::AEK_SVE, AArch64::AEK_SVE2, AArch64::AEK_SVE2BITPERM,
           AArch64::AEK_FP16FML}))},
+    {"cortex-a520", ARMV9_2A,
+     (AArch64::ExtensionBitset(
+         {AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_MTE,
+          AArch64::AEK_FP16FML, AArch64::AEK_PAUTH, AArch64::AEK_SVE2BITPERM,
+          AArch64::AEK_FLAGM, AArch64::AEK_PERFMON, AArch64::AEK_PREDRES}))},
     {"cortex-a57", ARMV8A,
      (AArch64::ExtensionBitset(
          {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_CRC}))},
@@ -483,6 +488,12 @@ inline constexpr CpuInfo CpuInfos[] = {
           AArch64::AEK_I8MM, AArch64::AEK_PREDRES, AArch64::AEK_PERFMON,
           AArch64::AEK_PROFILE, AArch64::AEK_SVE, AArch64::AEK_SVE2BITPERM,
           AArch64::AEK_BF16, AArch64::AEK_FLAGM}))},
+    {"cortex-a720", ARMV9_2A,
+     (AArch64::ExtensionBitset(
+         {AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_MTE,
+          AArch64::AEK_FP16FML, AArch64::AEK_PAUTH, AArch64::AEK_SVE2BITPERM,
+          AArch64::AEK_FLAGM, AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+          AArch64::AEK_PROFILE}))},
     {"cortex-r82", ARMV8R,
      (AArch64::ExtensionBitset({AArch64::AEK_LSE}))},
     {"cortex-x1", ARMV8_2A,
@@ -508,6 +519,12 @@ inline constexpr CpuInfo CpuInfos[] = {
           AArch64::AEK_SVE2BITPERM, AArch64::AEK_SB, AArch64::AEK_PAUTH,
           AArch64::AEK_FP16, AArch64::AEK_FP16FML, AArch64::AEK_PREDRES,
           AArch64::AEK_FLAGM, AArch64::AEK_SSBS}))},
+    {"cortex-x4", ARMV9_2A,
+     (AArch64::ExtensionBitset(
+         {AArch64::AEK_SB, AArch64::AEK_SSBS, AArch64::AEK_MTE,
+          AArch64::AEK_FP16FML, AArch64::AEK_PAUTH, AArch64::AEK_SVE2BITPERM,
+          AArch64::AEK_FLAGM, AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+          AArch64::AEK_PROFILE}))},
     {"neoverse-e1", ARMV8_2A,
      (AArch64::ExtensionBitset(
          {AArch64::AEK_AES, AArch64::AEK_SHA2, AArch64::AEK_DOTPROD,
diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td
index dd2ae3e..f1f3d56 100644
--- a/llvm/lib/Target/AArch64/AArch64.td
+++ b/llvm/lib/Target/AArch64/AArch64.td
@@ -852,6 +852,12 @@ def TuneA510    : SubtargetFeature<"a510", "ARMProcFamily", "CortexA510",
                                    FeaturePostRAScheduler
                                    ]>;
 
+def TuneA520    : SubtargetFeature<"a520", "ARMProcFamily", "CortexA520",
+                                   "Cortex-A520 ARM processors", [
+                                   FeatureFuseAES,
+                                   FeatureFuseAdrpAdd,
+                                   FeaturePostRAScheduler]>;
+
 def TuneA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                    "Cortex-A57 ARM processors", [
                                    FeatureFuseAES,
@@ -957,6 +963,17 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715",
                                  FeatureEnableSelectOptimize,
                                  FeaturePredictableSelectIsExpensive]>;
 
+def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720",
+                                "Cortex-A720 ARM processors", [
+                                 FeatureFuseAES,
+                                 FeaturePostRAScheduler,
+                                 FeatureCmpBccFusion,
+                                 FeatureAddrLSLFast,
+                                 FeatureALULSLFast,
+                                 FeatureFuseAdrpAdd,
+                                 FeatureEnableSelectOptimize,
+                                 FeaturePredictableSelectIsExpensive]>;
+
 def TuneR82 : SubtargetFeature<"cortex-r82", "ARMProcFamily",
                                "CortexR82",
                                "Cortex-R82 ARM processors", [
@@ -994,6 +1011,16 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3",
                                FeatureEnableSelectOptimize,
                                FeaturePredictableSelectIsExpensive]>;
 
+def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4",
+                              "Cortex-X4 ARM processors", [
+                               FeatureAddrLSLFast,
+                               FeatureALULSLFast,
+                               FeatureFuseAdrpAdd,
+                               FeatureFuseAES,
+                               FeaturePostRAScheduler,
+                               FeatureEnableSelectOptimize,
+                               FeaturePredictableSelectIsExpensive]>;
+
 def TuneA64FX : SubtargetFeature<"a64fx", "ARMProcFamily", "A64FX",
                                  "Fujitsu A64FX processors", [
                                  FeaturePostRAScheduler,
@@ -1329,6 +1356,9 @@ def ProcessorFeatures {
                                  FeatureMatMulInt8, FeatureBF16, FeatureAM,
                                  FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
                                  FeatureFP16FML];
+  list<SubtargetFeature> A520 = [HasV9_2aOps, FeaturePerfMon, FeatureAM,
+                                 FeatureMTE, FeatureETE, FeatureSVE2BitPerm,
+                                 FeatureFP16FML];
   list<SubtargetFeature> A65  = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8,
                                  FeatureNEON, FeatureFullFP16, FeatureDotProd,
                                  FeatureRCPC, FeatureSSBS, FeatureRAS,
@@ -1355,6 +1385,9 @@ def ProcessorFeatures {
                                  FeatureFP16FML, FeatureSVE, FeatureTRBE,
                                  FeatureSVE2BitPerm, FeatureBF16, FeatureETE,
                                  FeaturePerfMon, FeatureMatMulInt8, FeatureSPE];
+  list<SubtargetFeature> A720 = [HasV9_2aOps, FeatureMTE, FeatureFP16FML,
+                                 FeatureTRBE, FeatureSVE2BitPerm, FeatureETE,
+                                 FeaturePerfMon, FeatureSPE, FeatureSPE_EEF];
   list<SubtargetFeature> R82  = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16,
                                  FeatureFP16FML, FeatureSSBS, FeaturePredRes,
                                  FeatureSB];
@@ -1376,6 +1409,10 @@ def ProcessorFeatures {
                                  FeatureSPE, FeatureBF16, FeatureMatMulInt8,
                                  FeatureMTE, FeatureSVE2BitPerm, FeatureFullFP16,
                                  FeatureFP16FML];
+  list<SubtargetFeature> X4 =   [HasV9_2aOps,
+                                 FeaturePerfMon, FeatureETE, FeatureTRBE,
+                                 FeatureSPE, FeatureMTE, FeatureSVE2BitPerm,
+                                 FeatureFP16FML, FeatureSPE_EEF];
   list<SubtargetFeature> A64FX    = [HasV8_2aOps, FeatureFPARMv8, FeatureNEON,
                                      FeatureSHA2, FeaturePerfMon, FeatureFullFP16,
                                      FeatureSVE, FeatureComplxNum];
@@ -1480,6 +1517,8 @@ def : ProcessorModel<"cortex-a55", CortexA55Model, ProcessorFeatures.A55,
                      [TuneA55]>;
 def : ProcessorModel<"cortex-a510", CortexA510Model, ProcessorFeatures.A510,
                      [TuneA510]>;
+def : ProcessorModel<"cortex-a520", CortexA510Model, ProcessorFeatures.A520,
+                     [TuneA520]>;
 def : ProcessorModel<"cortex-a57", CortexA57Model, ProcessorFeatures.A53,
                      [TuneA57]>;
 def : ProcessorModel<"cortex-a65", CortexA53Model, ProcessorFeatures.A65,
@@ -1506,6 +1545,8 @@ def : ProcessorModel<"cortex-a710", NeoverseN2Model, ProcessorFeatures.A710,
                      [TuneA710]>;
 def : ProcessorModel<"cortex-a715", NeoverseN2Model, ProcessorFeatures.A715,
                      [TuneA715]>;
+def : ProcessorModel<"cortex-a720", NeoverseN2Model, ProcessorFeatures.A720,
+                     [TuneA720]>;
 def : ProcessorModel<"cortex-r82", CortexA55Model, ProcessorFeatures.R82,
                      [TuneR82]>;
 def : ProcessorModel<"cortex-x1", CortexA57Model, ProcessorFeatures.X1,
@@ -1516,6 +1557,8 @@ def : ProcessorModel<"cortex-x2", NeoverseN2Model, ProcessorFeatures.X2,
                      [TuneX2]>;
 def : ProcessorModel<"cortex-x3", NeoverseN2Model, ProcessorFeatures.X3,
                      [TuneX3]>;
+def : ProcessorModel<"cortex-x4", NeoverseN2Model, ProcessorFeatures.X4,
+                     [TuneX4]>;
 def : ProcessorModel<"neoverse-e1", CortexA53Model,
                      ProcessorFeatures.NeoverseE1, [TuneNeoverseE1]>;
 def : ProcessorModel<"neoverse-n1", NeoverseN1Model,
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index ff14fcf..beae9b5 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -149,6 +149,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
     MaxBytesForLoopAlignment = 16;
     break;
   case CortexA510:
+  case CortexA520:
     PrefFunctionAlignment = Align(16);
     VScaleForTuning = 1;
     PrefLoopAlignment = Align(16);
@@ -156,8 +157,10 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
     break;
   case CortexA710:
   case CortexA715:
+  case CortexA720:
   case CortexX2:
   case CortexX3:
+  case CortexX4:
     PrefFunctionAlignment = Align(16);
     VScaleForTuning = 1;
     PrefLoopAlignment = Align(32);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b2ee2e7..ae2bea6 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -56,6 +56,7 @@ public:
     CortexA53,
     CortexA55,
     CortexA510,
+    CortexA520,
     CortexA57,
     CortexA65,
     CortexA72,
@@ -67,11 +68,13 @@ public:
     CortexA78C,
     CortexA710,
     CortexA715,
+    CortexA720,
     CortexR82,
     CortexX1,
     CortexX1C,
     CortexX2,
     CortexX3,
+    CortexX4,
     ExynosM3,
     Falkor,
     Kryo,
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index aba2ebf..ae47929 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -208,6 +208,7 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xd03", "cortex-a53")
         .Case("0xd05", "cortex-a55")
         .Case("0xd46", "cortex-a510")
+        .Case("0xd80", "cortex-a520")
         .Case("0xd07", "cortex-a57")
         .Case("0xd08", "cortex-a72")
         .Case("0xd09", "cortex-a73")
@@ -217,10 +218,12 @@ StringRef sys::detail::getHostCPUNameForARM(StringRef ProcCpuinfoContent) {
         .Case("0xd41", "cortex-a78")
         .Case("0xd47", "cortex-a710")
         .Case("0xd4d", "cortex-a715")
+        .Case("0xd81", "cortex-a720")
         .Case("0xd44", "cortex-x1")
         .Case("0xd4c", "cortex-x1c")
         .Case("0xd48", "cortex-x2")
         .Case("0xd4e", "cortex-x3")
+        .Case("0xd82", "cortex-x4")
         .Case("0xd0c", "neoverse-n1")
         .Case("0xd49", "neoverse-n2")
         .Case("0xd40", "neoverse-v1")
diff --git a/llvm/unittests/TargetParser/TargetParserTest.cpp b/llvm/unittests/TargetParser/TargetParserTest.cpp
index 68cb482..d41e254 100644
--- a/llvm/unittests/TargetParser/TargetParserTest.cpp
+++ b/llvm/unittests/TargetParser/TargetParserTest.cpp
@@ -1097,6 +1097,18 @@ INSTANTIATE_TEST_SUITE_P(
                  AArch64::AEK_SB})),
             "9-A"),
         ARMCPUTestParams<AArch64::ExtensionBitset>(
+            "cortex-a520", "armv9.2-a", "crypto-neon-fp-armv8",
+            (AArch64::ExtensionBitset(
+                {AArch64::AEK_BF16,  AArch64::AEK_I8MM,  AArch64::AEK_SVE,
+                 AArch64::AEK_SVE2,  AArch64::AEK_FP16,  AArch64::AEK_DOTPROD,
+                 AArch64::AEK_LSE,  AArch64::AEK_RDM,  AArch64::AEK_SIMD,
+                 AArch64::AEK_RCPC,  AArch64::AEK_RAS,  AArch64::AEK_CRC,
+                 AArch64::AEK_FP,  AArch64::AEK_SB,  AArch64::AEK_SSBS,
+                 AArch64::AEK_MTE,  AArch64::AEK_FP16FML,  AArch64::AEK_PAUTH,
+                 AArch64::AEK_SVE2BITPERM,  AArch64::AEK_FLAGM,
+                 AArch64::AEK_PERFMON, AArch64::AEK_PREDRES})),
+            "9.2-A"),
+        ARMCPUTestParams<AArch64::ExtensionBitset>(
             "cortex-a57", "armv8-a", "crypto-neon-fp-armv8",
             (AArch64::ExtensionBitset(
                 {AArch64::AEK_CRC, AArch64::AEK_AES, AArch64::AEK_SHA2,
@@ -1209,6 +1221,19 @@ INSTANTIATE_TEST_SUITE_P(
                  AArch64::AEK_FLAGM})),
             "9-A"),
         ARMCPUTestParams<AArch64::ExtensionBitset>(
+            "cortex-a720", "armv9.2-a", "crypto-neon-fp-armv8",
+            (AArch64::ExtensionBitset(
+                {AArch64::AEK_BF16,  AArch64::AEK_I8MM,  AArch64::AEK_SVE,
+                 AArch64::AEK_SVE2,  AArch64::AEK_FP16,  AArch64::AEK_DOTPROD,
+                 AArch64::AEK_LSE,  AArch64::AEK_RDM,  AArch64::AEK_SIMD,
+                 AArch64::AEK_RCPC,  AArch64::AEK_RAS,  AArch64::AEK_CRC,
+                 AArch64::AEK_FP,  AArch64::AEK_SB,  AArch64::AEK_SSBS,
+                 AArch64::AEK_MTE,  AArch64::AEK_FP16FML,  AArch64::AEK_PAUTH,
+                 AArch64::AEK_SVE2BITPERM,  AArch64::AEK_FLAGM,
+                 AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+                 AArch64::AEK_PROFILE})),
+            "9.2-A"),
+        ARMCPUTestParams<AArch64::ExtensionBitset>(
             "neoverse-v1", "armv8.4-a", "crypto-neon-fp-armv8",
             (AArch64::ExtensionBitset(
                 {AArch64::AEK_RAS,     AArch64::AEK_SVE,  AArch64::AEK_SSBS,
@@ -1288,6 +1313,19 @@ INSTANTIATE_TEST_SUITE_P(
                  AArch64::AEK_SSBS})),
             "9-A"),
         ARMCPUTestParams<AArch64::ExtensionBitset>(
+            "cortex-x4", "armv9.2-a", "crypto-neon-fp-armv8",
+            (AArch64::ExtensionBitset(
+                {AArch64::AEK_BF16,  AArch64::AEK_I8MM,  AArch64::AEK_SVE,
+                 AArch64::AEK_SVE2,  AArch64::AEK_FP16,  AArch64::AEK_DOTPROD,
+                 AArch64::AEK_LSE,  AArch64::AEK_RDM,  AArch64::AEK_SIMD,
+                 AArch64::AEK_RCPC,  AArch64::AEK_RAS,  AArch64::AEK_CRC,
+                 AArch64::AEK_FP,  AArch64::AEK_SB,  AArch64::AEK_SSBS,
+                 AArch64::AEK_MTE,  AArch64::AEK_FP16FML,  AArch64::AEK_PAUTH,
+                 AArch64::AEK_SVE2BITPERM,  AArch64::AEK_FLAGM,
+                 AArch64::AEK_PERFMON, AArch64::AEK_PREDRES,
+                 AArch64::AEK_PROFILE})),
+            "9.2-A"),
+        ARMCPUTestParams<AArch64::ExtensionBitset>(
             "cyclone", "armv8-a", "crypto-neon-fp-armv8",
             (AArch64::ExtensionBitset(
                 {AArch64::AEK_NONE, AArch64::AEK_AES, AArch64::AEK_SHA2,
@@ -1568,7 +1606,7 @@ INSTANTIATE_TEST_SUITE_P(
             "8.2-A")));
 
 // Note: number of CPUs includes aliases.
-static constexpr unsigned NumAArch64CPUArchs = 62;
+static constexpr unsigned NumAArch64CPUArchs = 65;
 
 TEST(TargetParserTest, testAArch64CPUArchList) {
   SmallVector<StringRef, NumAArch64CPUArchs> List;
-- 
cgit v1.1


From 1aa493f0645395908fe77bc69bce93fd4e80b1e8 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Thu, 16 Nov 2023 14:04:34 -0800
Subject: [RISCV] Further expand coverage for insert_vector_elt patterns

---
 .../CodeGen/RISCV/rvv/concat-vector-insert-elt.ll  | 241 +++++++++++++++++++++
 .../RISCV/rvv/fixed-vectors-buildvec-of-binop.ll   |  38 ++++
 2 files changed, 279 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
new file mode 100644
index 0000000..9193f7a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
@@ -0,0 +1,241 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \
+; RUN:     -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 1
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi8_concat_vector_insert_idx1(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 1
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 1
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi8_concat_vector_insert_idx2(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vsetivli zero, 3, e8, mf4, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 2
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) {
+; CHECK-LABEL: v4xi8_concat_vector_insert_idx3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    vle8.v v9, (a1)
+; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 2
+; CHECK-NEXT:    vmv.s.x v9, a2
+; CHECK-NEXT:    vslideup.vi v8, v9, 3
+; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    ret
+  %v1 = load <2 x i8>, ptr %a
+  %v2 = load <2 x i8>, ptr %b
+  %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 3
+  store <4 x i8> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx0:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a3
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx0:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 1
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 1
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx1(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx1:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a3
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 1
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx1:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 1
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 1
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx2:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a3
+; RV32-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx2:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vsetivli zero, 3, e64, m2, tu, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 2
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
+define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) {
+; RV32-LABEL: v4xi64_concat_vector_insert_idx3:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vle64.v v8, (a0)
+; RV32-NEXT:    vle64.v v10, (a1)
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    vsetivli zero, 2, e32, m2, ta, ma
+; RV32-NEXT:    vslide1down.vx v10, v8, a2
+; RV32-NEXT:    vslide1down.vx v10, v10, a3
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v10, 3
+; RV32-NEXT:    vse64.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: v4xi64_concat_vector_insert_idx3:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vle64.v v8, (a0)
+; RV64-NEXT:    vle64.v v10, (a1)
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    vmv.s.x v10, a2
+; RV64-NEXT:    vslideup.vi v8, v10, 3
+; RV64-NEXT:    vse64.v v8, (a0)
+; RV64-NEXT:    ret
+  %v1 = load <2 x i64>, ptr %a
+  %v2 = load <2 x i64>, ptr %b
+  %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 3
+  store <4 x i64> %ins, ptr %a
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 8055944..2ffca98 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -566,3 +566,41 @@ define <8 x i32> @add_constant_rhs_8xi32_vector_in3(<8 x i32> %vin, i32 %a, i32
   %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 6
   ret <8 x i32> %v3
 }
+
+define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: add_constant_rhs_8xi32_partial:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lui a4, %hi(.LCPI19_0)
+; CHECK-NEXT:    addi a4, a4, %lo(.LCPI19_0)
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vle32.v v10, (a4)
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    addi a0, a0, 23
+; CHECK-NEXT:    addi a1, a1, 25
+; CHECK-NEXT:    addi a2, a2, 1
+; CHECK-NEXT:    addi a3, a3, 2047
+; CHECK-NEXT:    addi a3, a3, 308
+; CHECK-NEXT:    vmv.s.x v10, a0
+; CHECK-NEXT:    vsetivli zero, 5, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 4
+; CHECK-NEXT:    vmv.s.x v10, a1
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 5
+; CHECK-NEXT:    vmv.s.x v10, a2
+; CHECK-NEXT:    vsetivli zero, 7, e32, m2, tu, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 6
+; CHECK-NEXT:    vmv.s.x v10, a3
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v10, 7
+; CHECK-NEXT:    ret
+  %vadd = add <8 x i32> %vin, <i32 1, i32 2, i32 3, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
+  %e0 = add i32 %a, 23
+  %e1 = add i32 %b, 25
+  %e2 = add i32 %c, 1
+  %e3 = add i32 %d, 2355
+  %v0 = insertelement <8 x i32> %vadd, i32 %e0, i32 4
+  %v1 = insertelement <8 x i32> %v0, i32 %e1, i32 5
+  %v2 = insertelement <8 x i32> %v1, i32 %e2, i32 6
+  %v3 = insertelement <8 x i32> %v2, i32 %e3, i32 7
+  ret <8 x i32> %v3
+}
-- 
cgit v1.1


From 46396108deb24564159c441c6e6ebfac26714d7b Mon Sep 17 00:00:00 2001
From: Augusto Noronha <augusto2112@me.com>
Date: Thu, 16 Nov 2023 14:20:14 -0800
Subject: [lldb] Add interface to check if UserExpression::Parse() is cacheable
 (#66826)

When setting conditional breakpoints, we currently assume that a call to
UserExpression::Parse() can be cached and resued multiple times. This
may not be true for every user expression. Add a new method so
subclasses of UserExpression can customize if they are parseable or not.
---
 lldb/include/lldb/Expression/UserExpression.h | 8 ++++++++
 lldb/source/Breakpoint/BreakpointLocation.cpp | 1 +
 2 files changed, 9 insertions(+)

diff --git a/lldb/include/lldb/Expression/UserExpression.h b/lldb/include/lldb/Expression/UserExpression.h
index df7a766..b6cfeec7 100644
--- a/lldb/include/lldb/Expression/UserExpression.h
+++ b/lldb/include/lldb/Expression/UserExpression.h
@@ -192,6 +192,14 @@ public:
   /// expression.  Text() should contain the definition of this function.
   const char *FunctionName() override { return "$__lldb_expr"; }
 
+  /// Returns whether the call to Parse on this user expression is cacheable.
+  /// This function exists to provide an escape hatch for supporting languages
+  /// where parsing an expression in the exact same context is unsafe. For
+  /// example, languages where generic functions aren't monomorphized, but
+  /// implement some other mechanism to represent generic values, may be unsafe
+  /// to cache, as the concrete type substitution may be different in every
+  /// expression evaluation.
+  virtual bool IsParseCacheable() { return true; }
   /// Return the language that should be used when parsing.  To use the
   /// default, return eLanguageTypeUnknown.
   lldb::LanguageType Language() const override { return m_language; }
diff --git a/lldb/source/Breakpoint/BreakpointLocation.cpp b/lldb/source/Breakpoint/BreakpointLocation.cpp
index 27dc745..931e1ad 100644
--- a/lldb/source/Breakpoint/BreakpointLocation.cpp
+++ b/lldb/source/Breakpoint/BreakpointLocation.cpp
@@ -250,6 +250,7 @@ bool BreakpointLocation::ConditionSaysStop(ExecutionContext &exe_ctx,
   DiagnosticManager diagnostics;
 
   if (condition_hash != m_condition_hash || !m_user_expression_sp ||
+      !m_user_expression_sp->IsParseCacheable() ||
       !m_user_expression_sp->MatchesContext(exe_ctx)) {
     LanguageType language = eLanguageTypeUnknown;
     // See if we can figure out the language from the frame, otherwise use the
-- 
cgit v1.1


From ccd923e3cbbb62be565fbe7c401fa9e3eba566f8 Mon Sep 17 00:00:00 2001
From: Peiming Liu <36770114+PeimingLiu@users.noreply.github.com>
Date: Thu, 16 Nov 2023 14:26:09 -0800
Subject: [mlir][sparse] code cleanup (remove dead code related to filter
 loop). (#72573)

---
 .../mlir/Dialect/SparseTensor/Utils/Merger.h       |  46 ++-------
 .../Dialect/SparseTensor/Transforms/CodegenEnv.cpp |  12 +--
 .../Dialect/SparseTensor/Transforms/CodegenEnv.h   |   3 +-
 .../SparseTensor/Transforms/Sparsification.cpp     | 105 ++++-----------------
 mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp     |   8 +-
 mlir/unittests/Dialect/SparseTensor/MergerTest.cpp |   3 +-
 6 files changed, 38 insertions(+), 139 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
index cde6b2d..0e995d1 100644
--- a/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
+++ b/mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h
@@ -223,29 +223,16 @@ struct LatPoint final {
 /// independently from the basic algorithm if bottlenecks are identified.
 class Merger {
 public:
-  /// Constructs a merger for the given number of tensors, native loops, and
-  /// filter loops. The user supplies the number of tensors involved in the
-  /// kernel, with the last tensor in this set denoting the output tensor.
-  /// The merger adds an additional synthetic tensor at the end of this set
-  /// to represent all invariant expressions in the kernel.
-  ///
-  /// In addition to natives loops (which are specified by the GenericOp),
-  /// extra filter loops are needed in order to handle affine expressions on
-  /// sparse levels.  E.g., (d0, d1, d2) => (d0 + d1, d2), a naive
-  /// implementation of the filter loop could be generated as
-  ///
-  /// for (const auto c0 : coordinates[0]) {
-  ///   if (c0 == d0 + d1) {
-  ///      generated_code;
-  ///   }
-  /// }
-  ///
-  /// to filter out coordinates that are not equal to the affine expression.
+  /// Constructs a merger for the given number of tensors and loops. The user
+  /// supplies the number of tensors involved in the kernel, with the last
+  /// tensor in this set denoting the output tensor. The merger adds an
+  /// additional synthetic tensor at the end of this set to represent all
+  /// invariant expressions in the kernel.
   ///
   /// The maxLvlRank specifies the max level rank of all inputs/output tensors.
   /// It is used to pre-allocate sufficient memory for internal storage.
-  Merger(unsigned numInputOutputTensors, unsigned numNativeLoops,
-         unsigned numFilterLoops, unsigned maxLvlRank);
+  Merger(unsigned numInputOutputTensors, unsigned numLoops,
+         unsigned maxLvlRank);
 
   //
   // Constructing valid tensor and loop identifiers.
@@ -366,19 +353,6 @@ public:
   /// Gets the total number of loops (native loops + filter loops).
   constexpr unsigned getNumLoops() const { return numLoops; }
 
-  /// Gets the number of native loops.
-  constexpr unsigned getNumNativeLoops() const { return numNativeLoops; }
-
-  /// Gets the number of filter loops.
-  constexpr unsigned getNumFilterLoops() const {
-    return numLoops - numNativeLoops;
-  }
-
-  /// Gets the identifier of the first filter-loop.
-  constexpr LoopId getStartingFilterLoopId() const {
-    return getNumNativeLoops();
-  }
-
   /// Returns true if `b` is the `i`th loop of the output tensor.
   constexpr bool isOutTensor(TensorLoopId b, LoopId i) const {
     return b == makeTensorLoopId(outTensor, i);
@@ -391,11 +365,6 @@ public:
   /// tensor expressions).
   constexpr TensorId getSynTensorID() const { return syntheticTensor; }
 
-  constexpr bool isFilterLoop(LoopId i) const {
-    assert(isValidLoopId(i));
-    return i >= numNativeLoops;
-  }
-
   /// Returns true if the expression is `(kTensor t)`.
   bool expIsTensor(ExprId e, TensorId t) const {
     const auto &expr = exp(e);
@@ -657,7 +626,6 @@ private:
   const TensorId outTensor;
   const TensorId syntheticTensor;
   const unsigned numTensors;
-  const unsigned numNativeLoops;
   const unsigned numLoops;
   bool hasSparseOut;
 
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
index ad2f649..cc05f1d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.cpp
@@ -42,14 +42,12 @@ static void sortDependentLoops(std::vector<LoopCoeffPair> &target) {
 //===----------------------------------------------------------------------===//
 
 CodegenEnv::CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
-                       unsigned numTensors, unsigned numLoops,
-                       unsigned numFilterLoops, unsigned maxRank)
+                       unsigned numTensors, unsigned numLoops, unsigned maxRank)
     : linalgOp(linop), sparseOptions(opts),
-      latticeMerger(numTensors, numLoops, numFilterLoops, maxRank),
-      loopEmitter(), sparseOut(nullptr), outerParNest(-1u), insChain(),
-      expValues(), expFilled(), expAdded(), expCount(), redVal(),
-      redExp(detail::kInvalidId), redCustom(detail::kInvalidId),
-      redValidLexInsert() {}
+      latticeMerger(numTensors, numLoops, maxRank), loopEmitter(),
+      sparseOut(nullptr), outerParNest(-1u), insChain(), expValues(),
+      expFilled(), expAdded(), expCount(), redVal(), redExp(detail::kInvalidId),
+      redCustom(detail::kInvalidId), redValidLexInsert() {}
 
 LogicalResult CodegenEnv::initTensorExp() {
   // Builds the tensor expression for the Linalg operation in SSA form.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
index af783b9..963cdd1 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/CodegenEnv.h
@@ -38,8 +38,7 @@ public:
   /// passed around during sparsification for bookkeeping
   /// together with some consistency asserts.
   CodegenEnv(linalg::GenericOp linop, SparsificationOptions opts,
-             unsigned numTensors, unsigned numLoops, unsigned numFilterLoops,
-             unsigned maxRank);
+             unsigned numTensors, unsigned numLoops, unsigned maxRank);
 
   //
   // General methods.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
index ec96ce2..07a6e6d 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp
@@ -78,12 +78,8 @@ static bool isInvariantAffine(AffineExpr a, unsigned loopDepth, LoopId ldx,
 /// Helper method to inspect affine expressions. Rejects cases where the
 /// same index is used more than once. Also rejects compound affine
 /// expressions in sparse dimensions.
-/// filterIdx stores the current filter loop idx should be used for the next
-/// compound affine sparse level, and it will be incremented by one when
-/// used.
 static bool findAffine(Merger &merger, TensorId tid, Level lvl, AffineExpr a,
-                       DimLevelType dlt, LoopId &filterLdx,
-                       bool setLvlFormat = true) {
+                       DimLevelType dlt, bool setLvlFormat = true) {
   switch (a.getKind()) {
   case AffineExprKind::DimId: {
     const LoopId idx = merger.makeLoopId(cast<AffineDimExpr>(a).getPosition());
@@ -97,22 +93,14 @@ static bool findAffine(Merger &merger, TensorId tid, Level lvl, AffineExpr a,
   case AffineExprKind::Add:
   case AffineExprKind::Mul:
   case AffineExprKind::Constant: {
-    if (!isDenseDLT(dlt) && setLvlFormat) {
-      assert(isUndefDLT(merger.getLvlType(tid, filterLdx)));
-      // Use a filter loop for sparse affine expression.
-      merger.setLevelAndType(tid, filterLdx, lvl, dlt);
-      ++filterLdx;
-    }
-
+    assert(isDenseDLT(dlt));
     if (auto binOp = dyn_cast<AffineBinaryOpExpr>(a)) {
       // We do not set dim level format for affine expression like d0 + d1 on
       // either loop index at d0 or d1.
       // We continue the recursion merely to check whether current affine is
       // admissible or not.
-      return findAffine(merger, tid, lvl, binOp.getLHS(), dlt, filterLdx,
-                        false) &&
-             findAffine(merger, tid, lvl, binOp.getRHS(), dlt, filterLdx,
-                        false);
+      return findAffine(merger, tid, lvl, binOp.getLHS(), dlt, false) &&
+             findAffine(merger, tid, lvl, binOp.getRHS(), dlt, false);
     }
     // Falls through when it is a constant Affine
     return true;
@@ -225,32 +213,13 @@ static unsigned getNumNonTrivialIdxExpOnSparseLvls(AffineMap map,
     return 0;
   const SparseTensorType stt(rtp);
 
-  // FIXME: There's some dim/lvl confusion here.  The previous version of
-  // the code asserted that there are `lvlRank`-many expressions, but then
-  // the `exprs[d]` expression assumes there are in fact `dimRank`-many
-  // expressions.  Even though `ArrayRef::operator[]` will check for OOB,
-  // the mismatch between the assertion and the usage belies that this code
-  // cannot support non-permutations.
-  //
-  // Elsewhere in this file the maps returned by
-  // `linalg::GenericOp::getMatchingIndexingMap` are inconsistent about
-  // whether they're expected to have `lvlRank`-many or `dimRank`-many
-  // expressions (cf., `genSubscript` vs `findSparseAnnotations`);
-  // so those are no help in determining which is actually intended.
-  //
-  // For now we work around this problem by asserting the two ranks agree.
-  const Dimension dimRank = stt.getDimRank();
   const Level lvlRank = stt.getLvlRank();
-  assert(dimRank == lvlRank && "Non-permutations not currently supported");
   const auto exprs = map.getResults();
-  assert(static_cast<Dimension>(exprs.size()) == dimRank &&
+  assert(static_cast<Dimension>(exprs.size()) == lvlRank &&
          "AffineMap does not have dimension-rank many results");
-  (void)dimRank;
   unsigned num = 0;
   for (Level l = 0; l < lvlRank; l++) {
-    // FIXME: `toOrigDim` is deprecated.
-    const Dimension d = toOrigDim(stt.getEncoding(), l);
-    if (!isa<AffineDimExpr>(exprs[d]) && !stt.isDenseLvl(l))
+    if (!isa<AffineDimExpr>(exprs[l]) && !stt.isDenseLvl(l))
       num++;
   }
   return num;
@@ -281,15 +250,10 @@ static bool hasNonTrivialAffineOnSparseOut(linalg::GenericOp op) {
 /// no annotations are found or inadmissible constructs occur.
 /// We currently support two different ways to handle non-trivial index
 /// expression on sparse tensors, and they accept different affine expressions.
-/// When using filter-loop-based approach, it accept (almost) arbitrary affine
-/// index expression on sparse tensor but it is much less efficient, and will be
-/// gradually removed from the codebase.
 /// When using dependent index reducton-based approach, it currently only
 /// supports affine addition index expression.
 static bool findSparseAnnotations(CodegenEnv &env, bool idxReducBased) {
   bool annotated = false;
-  // `filterLdx` may be mutated by `findAffine`.
-  LoopId filterLdx = env.merger().getStartingFilterLoopId();
   for (OpOperand &t : env.op()->getOpOperands()) {
     const TensorId tid = env.makeTensorId(t.getOperandNumber());
     const auto map = env.op().getMatchingIndexingMap(&t);
@@ -310,19 +274,17 @@ static bool findSparseAnnotations(CodegenEnv &env, bool idxReducBased) {
     // If then current tensor being inspected requires affine index, it need
     // to be sliced.
     for (Level l = 0; l < lvlRank; l++) {
-      // FIXME: `toOrigDim` is deprecated.
-      const AffineExpr a = map.getResult(toOrigDim(enc, l));
+      const AffineExpr a = map.getResult(l);
       const DimLevelType dlt = enc.getLvlType(l);
       if (idxReducBased && needIdxReduc) {
         if (!findDepIdxSet(env.merger(), tid, l, a, dlt))
           return false; // inadmissible affine expression
       } else {
-        if (!findAffine(env.merger(), tid, l, a, dlt, filterLdx))
+        if (!findAffine(env.merger(), tid, l, a, dlt))
           return false; // inadmissible affine expression
       }
     }
   }
-  assert(filterLdx == env.merger().getNumLoops());
   return annotated;
 }
 
@@ -374,13 +336,8 @@ static void genBuffers(CodegenEnv &env, OpBuilder &builder) {
         }
         return init;
       },
-      [&loopRange, &env](OpBuilder &b, Location loc, Level l) {
-        assert(l < env.getLoopNum());
-        // FIXME: Remove filter loop since we have a better algorithm to
-        // deal with affine index expression.
-        if (l >= env.merger().getStartingFilterLoopId())
-          return Value();
-
+      [&loopRange](OpBuilder &b, Location loc, Level l) {
+        assert(l < loopRange.size());
         return mlir::getValueOrCreateConstantIndexOp(b, loc, loopRange[l].size);
       });
 }
@@ -394,10 +351,7 @@ static Value genIndex(CodegenEnv &env, OpOperand *t) {
   const auto stt = getSparseTensorType(t->get());
   const Level lvlRank = stt.getLvlRank();
   assert(static_cast<Level>(map.getNumResults()) == lvlRank);
-  // FIXME: `toOrigDim` is deprecated.
-  // FIXME: above we asserted that there are `lvlRank` many results,
-  // but this is assuming there are in fact `dimRank` many results instead.
-  const AffineExpr a = map.getResult(toOrigDim(stt.getEncoding(), lvlRank - 1));
+  const AffineExpr a = map.getResult(lvlRank - 1);
   assert(a.getKind() == AffineExprKind::DimId);
   const LoopId idx = env.makeLoopId(cast<AffineDimExpr>(a).getPosition());
   return env.getLoopVar(idx);
@@ -727,19 +681,8 @@ static void genInvariants(CodegenEnv &env, OpBuilder &builder, ExprId exp,
     const Level lvlRank = stt.getLvlRank();
     assert(static_cast<Level>(map.getNumResults()) == lvlRank);
     for (Level l = 0; l < lvlRank; l++) {
-      // FIXME: `toOrigDim` is deprecated.
-      // FIXME: above we asserted that there are `lvlRank` many results,
-      // but this is assuming there are in fact `dimRank` many results instead.
-      const AffineExpr a = map.getResult(toOrigDim(stt.getEncoding(), l));
-      const auto sldx =
-          env.merger().getLoopId(env.makeTensorId(t.getOperandNumber()), l);
-      if (sldx && env.merger().isFilterLoop(*sldx)) {
-        if (!env.getLoopVar(*sldx))
-          // The filter loops has not been constructed.
-          return;
-        if (*sldx == ldx)
-          isAtLoop = true;
-      } else if (!isInvariantAffine(a, env.getLoopDepth(), ldx, isAtLoop))
+      const AffineExpr a = map.getResult(l);
+      if (!isInvariantAffine(a, env.getLoopDepth(), ldx, isAtLoop))
         return; // still in play
     }
     // All exhausted at this level (isAtLoop denotes exactly at this LoopId).
@@ -1073,10 +1016,8 @@ static void genConstantDenseAddressFromLevel(CodegenEnv &env,
     const TensorId tid = env.makeTensorId(input->getOperandNumber());
     const Level lvlRank = enc.getLvlRank();
     assert(lvlExprs.size() == static_cast<size_t>(lvlRank));
-    // FIXME: there is dim/lvl confusion here
     for (Level l = startLvl; l < lvlRank; l++) {
-      // FIXME: `toOrigDim` is deprecated.
-      AffineExpr lvlExpr = lvlExprs[toOrigDim(enc, l)];
+      AffineExpr lvlExpr = lvlExprs[l];
       if (enc.isDenseLvl(l) && isa<AffineConstantExpr>(lvlExpr))
         env.emitter().genDenseAffineAddress(
             builder, loc, env.makeTensorLevel(tid, l), lvlExpr);
@@ -1164,8 +1105,7 @@ static bool translateBitsToTidLvlPairs(
           const Level lvlRank = stt.getLvlRank();
           assert(affines.size() == static_cast<size_t>(lvlRank));
           for (Level l = 0; l < lvlRank; l++) {
-            // FIXME: `toOrigDim` is deprecated.
-            AffineExpr exp = affines[toOrigDim(stt.getEncoding(), l)];
+            AffineExpr exp = affines[l];
             // Skip simple affine expression and non-dense levels (which
             // have their own filter loop).
             if (isa<AffineDimExpr>(exp) || !stt.isDenseLvl(l))
@@ -1396,14 +1336,13 @@ public:
           op, "Loops not yet scheduled, try run --sparse-reinterpret-map "
               "before sparsification.");
     }
+    // Must have been demapped as well if the generic op is sorted.
+    assert(!hasAnyNonIdentityOperandsOrResults(op));
 
     // Sets up a code generation environment.
     const unsigned numTensors = op->getNumOperands();
     const unsigned numLoops = op.getNumLoops();
-    const unsigned numFilterLoops = getNumNonTrivialIdxExpOnSparseLvls(op);
-    // TODO: we should probably always use slice-based codegen whenever
-    // possible, we can even intermix slice-based and filter-loop based codegen.
-    bool idxReducBased = numFilterLoops != 0;
+    bool needIdxRed = getNumNonTrivialIdxExpOnSparseLvls(op) != 0;
     // If we have indexing map like (d0) -> (0, d0), there might be more
     // levels then loops because of the constant index, that means we can not
     // use numLoops as the upper bound for ranks of all tensors.
@@ -1417,14 +1356,10 @@ public:
       }
     }
 
-    // A slice based algorithm for affine indices does not need filter loops.
-    CodegenEnv env(op, options, numTensors, numLoops,
-                   /*numFilterLoops=*/idxReducBased ? 0 : numFilterLoops,
-                   maxLvlRank);
-
+    CodegenEnv env(op, options, numTensors, numLoops, maxLvlRank);
     // Detects sparse annotations and translates the per-level sparsity
     // information for all tensors to loop indices in the kernel.
-    if (!findSparseAnnotations(env, idxReducBased))
+    if (!findSparseAnnotations(env, needIdxRed))
       return failure();
 
     // Only standard reduction operations (add, sub, or, xor) that can be
diff --git a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
index 033b61f..12fc51b 100644
--- a/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp
@@ -220,12 +220,12 @@ TensorExp::TensorExp(TensorExp::Kind k, unsigned x, ExprId y, Value v,
   llvm_unreachable("unexpected kind");
 }
 
-Merger::Merger(unsigned numInputOutputTensors, unsigned numNativeLoops,
-               unsigned numFilterLoops, unsigned maxLvlRank)
+Merger::Merger(unsigned numInputOutputTensors, unsigned numLoops,
+               unsigned maxLvlRank)
     : outTensor(numInputOutputTensors - 1),
       syntheticTensor(numInputOutputTensors),
-      numTensors(numInputOutputTensors + 1), numNativeLoops(numNativeLoops),
-      numLoops(numNativeLoops + numFilterLoops), hasSparseOut(false),
+      numTensors(numInputOutputTensors + 1), numLoops(numLoops),
+      hasSparseOut(false),
       lvlTypes(numTensors,
                std::vector<DimLevelType>(numLoops, DimLevelType::Undef)),
       loopToLvl(numTensors,
diff --git a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
index e28d88e..2a20327 100644
--- a/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
+++ b/mlir/unittests/Dialect/SparseTensor/MergerTest.cpp
@@ -123,8 +123,7 @@ FOREVERY_BINOP(IMPL_BINOP_PATTERN)
 class MergerTestBase : public ::testing::Test {
 protected:
   MergerTestBase(unsigned numTensors, unsigned numLoops)
-      : merger(numTensors, numLoops, /*numFilterLoops=*/0,
-               /*maxRank=*/numLoops) {
+      : merger(numTensors, numLoops, /*maxRank=*/numLoops) {
     tensors.reserve(numTensors);
     for (unsigned t = 0; t < numTensors; t++)
       tensors.push_back(merger.addTensorExp(tid(t)));
-- 
cgit v1.1


From 233971b475a48d9ad8c61632660a1b45186897cc Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Thu, 16 Nov 2023 14:24:17 -0800
Subject: [RISCV] Fix typo in a test and regen another to reduce test diff

---
 .../CodeGen/RISCV/rvv/concat-vector-insert-elt.ll  | 24 ++++++++--------------
 .../CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll |  7 +++++++
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
index 9193f7a..3fc2281 100644
--- a/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/concat-vector-insert-elt.ll
@@ -12,16 +12,14 @@ define void @v4xi8_concat_vector_insert_idx0(ptr %a, ptr %b, i8 %x) {
 ; CHECK-NEXT:    vle8.v v9, (a1)
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vslideup.vi v8, v9, 2
-; CHECK-NEXT:    vmv.s.x v9, a2
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf4, tu, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 1
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e8, mf4, tu, ma
+; CHECK-NEXT:    vmv.s.x v8, a2
 ; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
   %v1 = load <2 x i8>, ptr %a
   %v2 = load <2 x i8>, ptr %b
   %concat = shufflevector <2 x i8> %v1, <2 x i8> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %ins = insertelement <4 x i8> %concat, i8 %x, i32 1
+  %ins = insertelement <4 x i8> %concat, i8 %x, i32 0
   store <4 x i8> %ins, ptr %a
   ret void
 }
@@ -98,11 +96,9 @@ define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
 ; RV32-NEXT:    vle64.v v10, (a1)
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vslideup.vi v8, v10, 2
-; RV32-NEXT:    vsetivli zero, 2, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a2
-; RV32-NEXT:    vslide1down.vx v10, v10, a3
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
-; RV32-NEXT:    vslideup.vi v8, v10, 1
+; RV32-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
+; RV32-NEXT:    vslide1down.vx v8, v8, a2
+; RV32-NEXT:    vslide1down.vx v8, v8, a3
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
@@ -114,16 +110,14 @@ define void @v4xi64_concat_vector_insert_idx0(ptr %a, ptr %b, i64 %x) {
 ; RV64-NEXT:    vle64.v v10, (a1)
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vslideup.vi v8, v10, 2
-; RV64-NEXT:    vmv.s.x v10, a2
-; RV64-NEXT:    vsetivli zero, 2, e64, m1, tu, ma
-; RV64-NEXT:    vslideup.vi v8, v10, 1
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vsetvli zero, zero, e64, m2, tu, ma
+; RV64-NEXT:    vmv.s.x v8, a2
 ; RV64-NEXT:    vse64.v v8, (a0)
 ; RV64-NEXT:    ret
   %v1 = load <2 x i64>, ptr %a
   %v2 = load <2 x i64>, ptr %b
   %concat = shufflevector <2 x i64> %v1, <2 x i64> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %ins = insertelement <4 x i64> %concat, i64 %x, i32 1
+  %ins = insertelement <4 x i64> %concat, i64 %x, i32 0
   store <4 x i64> %ins, ptr %a
   ret void
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index d1ea56a..2d8bae7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -1080,6 +1080,13 @@ define <32 x double> @buildvec_v32f64(double %e0, double %e1, double %e2, double
 ; FIXME: These constants have enough sign bits that we could use vmv.v.x/i and
 ; vsext, but we don't support this for FP yet.
 define <2 x float> @signbits() {
+; CHECK-LABEL: signbits:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    lui a0, %hi(.LCPI24_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI24_0)
+; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    ret
 entry:
   ret <2 x float> <float 0x36A0000000000000, float 0.000000e+00>
 }
-- 
cgit v1.1


From 003a3b04b0889bfb3bd0719f80323f8e879c05fe Mon Sep 17 00:00:00 2001
From: Mark de Wever <koraq@xs4all.nl>
Date: Thu, 16 Nov 2023 23:56:34 +0100
Subject: [libc++] Fixes lit portability issues. (#72435)

@StephanTLavavej mentioned the libc++ tests no longer works for MSVC
STL. The regex changes have been provided by Stephan.
---
 libcxx/utils/libcxx/test/params.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py
index e311305..3fedbf9 100644
--- a/libcxx/utils/libcxx/test/params.py
+++ b/libcxx/utils/libcxx/test/params.py
@@ -114,7 +114,7 @@ DEFAULT_PARAMETERS = [
         ),
         actions=lambda std: [
             AddFeature(std),
-            AddSubstitution("%{cxx_std}", re.sub("\+", "x", std)),
+            AddSubstitution("%{cxx_std}", re.sub(r"\+", "x", std)),
             AddCompileFlag(lambda cfg: getStdFlag(cfg, std)),
         ],
     ),
@@ -187,7 +187,7 @@ DEFAULT_PARAMETERS = [
                 AddFeature("stdlib={}".format(stdlib)),
                 # Also add an umbrella feature 'stdlib=libc++' for all flavors of libc++, to simplify
                 # the test suite.
-                AddFeature("stdlib=libc++") if re.match(".+-libc\+\+", stdlib) else None,
+                AddFeature("stdlib=libc++") if re.match(r".+-libc\+\+", stdlib) else None,
             ],
         ),
     ),
-- 
cgit v1.1


From f0ad9ea36ad65cec8c5e5d1d59c00192b87f287d Mon Sep 17 00:00:00 2001
From: Owen Pan <owenpiano@gmail.com>
Date: Thu, 16 Nov 2023 15:00:09 -0800
Subject: [clang-format] Handle lambdas in QualifierAlignment (#72456)

Fixed #62780.
---
 clang/lib/Format/QualifierAlignmentFixer.cpp  | 12 +++++++++---
 clang/lib/Format/QualifierAlignmentFixer.h    |  4 ++++
 clang/unittests/Format/QualifierFixerTest.cpp |  6 ++++++
 3 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Format/QualifierAlignmentFixer.cpp b/clang/lib/Format/QualifierAlignmentFixer.cpp
index e2fab1c..8494174 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.cpp
+++ b/clang/lib/Format/QualifierAlignmentFixer.cpp
@@ -535,14 +535,21 @@ LeftRightQualifierAlignmentFixer::analyze(
     SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
     FormatTokenLexer &Tokens) {
   tooling::Replacements Fixes;
-  const AdditionalKeywords &Keywords = Tokens.getKeywords();
-  const SourceManager &SourceMgr = Env.getSourceManager();
   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
+  fixQualifierAlignment(AnnotatedLines, Tokens, Fixes);
+  return {Fixes, 0};
+}
 
+void LeftRightQualifierAlignmentFixer::fixQualifierAlignment(
+    SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, FormatTokenLexer &Tokens,
+    tooling::Replacements &Fixes) {
+  const AdditionalKeywords &Keywords = Tokens.getKeywords();
+  const SourceManager &SourceMgr = Env.getSourceManager();
   tok::TokenKind QualifierToken = getTokenFromQualifier(Qualifier);
   assert(QualifierToken != tok::identifier && "Unrecognised Qualifier");
 
   for (AnnotatedLine *Line : AnnotatedLines) {
+    fixQualifierAlignment(Line->Children, Tokens, Fixes);
     if (!Line->Affected || Line->InPPDirective)
       continue;
     FormatToken *First = Line->First;
@@ -565,7 +572,6 @@ LeftRightQualifierAlignmentFixer::analyze(
       }
     }
   }
-  return {Fixes, 0};
 }
 
 void prepareLeftRightOrderingForQualifierAlignmentFixer(
diff --git a/clang/lib/Format/QualifierAlignmentFixer.h b/clang/lib/Format/QualifierAlignmentFixer.h
index a72d135..e922d80 100644
--- a/clang/lib/Format/QualifierAlignmentFixer.h
+++ b/clang/lib/Format/QualifierAlignmentFixer.h
@@ -52,6 +52,10 @@ public:
 
   static tok::TokenKind getTokenFromQualifier(const std::string &Qualifier);
 
+  void fixQualifierAlignment(SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
+                             FormatTokenLexer &Tokens,
+                             tooling::Replacements &Fixes);
+
   const FormatToken *analyzeRight(const SourceManager &SourceMgr,
                                   const AdditionalKeywords &Keywords,
                                   tooling::Replacements &Fixes,
diff --git a/clang/unittests/Format/QualifierFixerTest.cpp b/clang/unittests/Format/QualifierFixerTest.cpp
index a56323a..324366c 100644
--- a/clang/unittests/Format/QualifierFixerTest.cpp
+++ b/clang/unittests/Format/QualifierFixerTest.cpp
@@ -357,6 +357,9 @@ TEST_F(QualifierFixerTest, RightQualifier) {
   verifyFormat("void f(std::integral auto const &x);",
                "void f(const std::integral auto &x);", Style);
 
+  verifyFormat("auto lambda = [] { int const i = 0; };",
+               "auto lambda = [] { const int i = 0; };", Style);
+
   verifyFormat("Foo<Foo<int> const> P;\n#if 0\n#else\n#endif",
                "Foo<const Foo<int>> P;\n#if 0\n#else\n#endif", Style);
 
@@ -663,6 +666,9 @@ TEST_F(QualifierFixerTest, LeftQualifier) {
   verifyFormat("void f(const std::integral auto &x);",
                "void f(std::integral auto const &x);", Style);
 
+  verifyFormat("auto lambda = [] { const int i = 0; };",
+               "auto lambda = [] { int const i = 0; };", Style);
+
   verifyFormat("Foo<const Foo<int>> P;\n#if 0\n#else\n#endif",
                "Foo<Foo<int> const> P;\n#if 0\n#else\n#endif", Style);
 
-- 
cgit v1.1


From 615ebfc3e5e338cb40fa84405da0f70f0961c6b6 Mon Sep 17 00:00:00 2001
From: Davide Italiano <dcci@users.noreply.github.com>
Date: Thu, 16 Nov 2023 15:57:51 -0800
Subject: [SampleProfileProbe] Downgrade probes too large from error to
 warning. (#72574)

---
 llvm/lib/Transforms/IPO/SampleProfileProbe.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index b786685..8f0b12d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -237,7 +237,8 @@ void SampleProfileProber::computeProbeIdForCallsites() {
       if (LastProbeId >= 0xFFFF) {
         std::string Msg = "Pseudo instrumentation incomplete for " +
                           std::string(F->getName()) + " because it's too large";
-        Ctx.diagnose(DiagnosticInfoSampleProfile(M->getName().data(), Msg));
+        Ctx.diagnose(
+            DiagnosticInfoSampleProfile(M->getName().data(), Msg, DS_Warning));
         return;
       }
 
-- 
cgit v1.1


From 7ff8094a397127c8204c34079f8893fc8acbf1dd Mon Sep 17 00:00:00 2001
From: Shraiysh <Shraiysh.Vaishay@amd.com>
Date: Thu, 16 Nov 2023 18:03:32 -0600
Subject: [flang][OpenMP] Add semantic check for declare target (#71861)

This patch adds the following check from OpenMP 5.2.

```
If the directive has a clause, it must contain at least one enter clause
or at least one link clause.
```

Also added a warning for the deprication of `TO` clause on `DECLARE
TARGET` construct.

```
The clause-name to may be used as a synonym for the clause-name enter.
This use has been deprecated.
```

Based on the tests for to clause, the tests for enter clause are added.

This patch does not add tests where both to and enter clause are used together.
---
 flang/lib/Lower/OpenMP.cpp                         |  15 ++
 flang/lib/Parser/openmp-parsers.cpp                |   2 +
 flang/lib/Semantics/check-omp-structure.cpp        |  51 ++++--
 flang/lib/Semantics/check-omp-structure.h          |   2 +
 flang/lib/Semantics/resolve-directives.cpp         |   3 +
 .../test/Lower/OpenMP/FIR/declare-target-data.f90  |  16 ++
 .../OpenMP/FIR/declare-target-func-and-subr.f90    |  68 ++++++++
 ...are-target-implicit-func-and-subr-cap-enter.f90 | 192 +++++++++++++++++++++
 flang/test/Lower/OpenMP/declare-target-data.f90    |  16 ++
 .../Lower/OpenMP/declare-target-func-and-subr.f90  |  68 ++++++++
 ...are-target-implicit-func-and-subr-cap-enter.f90 | 192 +++++++++++++++++++++
 .../OpenMP/declare-target-implicit-tarop-cap.f90   |  14 ++
 flang/test/Lower/OpenMP/function-filtering-2.f90   |   8 +
 flang/test/Lower/OpenMP/function-filtering.f90     |  18 ++
 .../Parser/OpenMP/declare_target-device_type.f90   |  27 ++-
 .../Semantics/OpenMP/declarative-directive.f90     |  12 +-
 flang/test/Semantics/OpenMP/declare-target01.f90   |  48 ++++++
 flang/test/Semantics/OpenMP/declare-target02.f90   |  54 ++++++
 flang/test/Semantics/OpenMP/declare-target06.f90   |   5 +
 flang/test/Semantics/OpenMP/requires04.f90         |   5 +-
 flang/test/Semantics/OpenMP/requires05.f90         |   2 +
 llvm/include/llvm/Frontend/OpenMP/OMP.td           |   6 +-
 22 files changed, 803 insertions(+), 21 deletions(-)
 create mode 100644 flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90
 create mode 100644 flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 00f1602..6657f46 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -590,6 +590,8 @@ public:
   bool processSectionsReduction(mlir::Location currentLocation) const;
   bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
   bool
+  processEnter(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
+  bool
   processUseDeviceAddr(llvm::SmallVectorImpl<mlir::Value> &operands,
                        llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
                        llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
@@ -1851,6 +1853,18 @@ bool ClauseProcessor::processTo(
       });
 }
 
+bool ClauseProcessor::processEnter(
+    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
+  return findRepeatableClause<ClauseTy::Enter>(
+      [&](const ClauseTy::Enter *enterClause,
+          const Fortran::parser::CharBlock &) {
+        // Case: declare target to(func, var1, var2)...
+        gatherFuncAndVarSyms(enterClause->v,
+                             mlir::omp::DeclareTargetCaptureClause::enter,
+                             result);
+      });
+}
+
 bool ClauseProcessor::processUseDeviceAddr(
     llvm::SmallVectorImpl<mlir::Value> &operands,
     llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
@@ -2792,6 +2806,7 @@ static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
 
     ClauseProcessor cp(converter, *clauseList);
     cp.processTo(symbolAndClause);
+    cp.processEnter(symbolAndClause);
     cp.processLink(symbolAndClause);
     cp.processDeviceType(deviceType);
     cp.processTODO<Fortran::parser::OmpClause::Indirect>(
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 0271f69..bba1be2 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -259,6 +259,8 @@ TYPE_PARSER(
             parenthesized("STATIC" >> maybe("," >> scalarIntExpr)))) ||
     "DYNAMIC_ALLOCATORS" >>
         construct<OmpClause>(construct<OmpClause::DynamicAllocators>()) ||
+    "ENTER" >> construct<OmpClause>(construct<OmpClause::Enter>(
+                   parenthesized(Parser<OmpObjectList>{}))) ||
     "FINAL" >> construct<OmpClause>(construct<OmpClause::Final>(
                    parenthesized(scalarLogicalExpr))) ||
     "FULL" >> construct<OmpClause>(construct<OmpClause::Full>()) ||
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 0b1a581..4dbbf12 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -1165,13 +1165,31 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Allocate &x) {
   }
 }
 
+void OmpStructureChecker::Enter(const parser::OmpDeclareTargetWithClause &x) {
+  SetClauseSets(llvm::omp::Directive::OMPD_declare_target);
+}
+
+void OmpStructureChecker::Leave(const parser::OmpDeclareTargetWithClause &x) {
+  if (x.v.v.size() > 0) {
+    const parser::OmpClause *enterClause =
+        FindClause(llvm::omp::Clause::OMPC_enter);
+    const parser::OmpClause *toClause = FindClause(llvm::omp::Clause::OMPC_to);
+    const parser::OmpClause *linkClause =
+        FindClause(llvm::omp::Clause::OMPC_link);
+    if (!enterClause && !toClause && !linkClause) {
+      context_.Say(x.source,
+          "If the DECLARE TARGET directive has a clause, it must contain at lease one ENTER clause or LINK clause"_err_en_US);
+    }
+    if (toClause) {
+      context_.Say(toClause->source,
+          "The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead."_warn_en_US);
+    }
+  }
+}
+
 void OmpStructureChecker::Enter(const parser::OpenMPDeclareTargetConstruct &x) {
   const auto &dir{std::get<parser::Verbatim>(x.t)};
   PushContext(dir.source, llvm::omp::Directive::OMPD_declare_target);
-  const auto &spec{std::get<parser::OmpDeclareTargetSpecifier>(x.t)};
-  if (std::holds_alternative<parser::OmpDeclareTargetWithClause>(spec.u)) {
-    SetClauseSets(llvm::omp::Directive::OMPD_declare_target);
-  }
 }
 
 void OmpStructureChecker::Enter(const parser::OmpDeclareTargetWithList &x) {
@@ -1245,7 +1263,8 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) {
     CheckThreadprivateOrDeclareTargetVar(*objectList);
   } else if (const auto *clauseList{
                  parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
-    bool toClauseFound{false}, deviceTypeClauseFound{false};
+    bool toClauseFound{false}, deviceTypeClauseFound{false},
+        enterClauseFound{false};
     for (const auto &clause : clauseList->v) {
       common::visit(
           common::visitors{
@@ -1260,6 +1279,12 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) {
                 CheckIsVarPartOfAnotherVar(dir.source, linkClause.v);
                 CheckThreadprivateOrDeclareTargetVar(linkClause.v);
               },
+              [&](const parser::OmpClause::Enter &enterClause) {
+                enterClauseFound = true;
+                CheckSymbolNames(dir.source, enterClause.v);
+                CheckIsVarPartOfAnotherVar(dir.source, enterClause.v);
+                CheckThreadprivateOrDeclareTargetVar(enterClause.v);
+              },
               [&](const parser::OmpClause::DeviceType &deviceTypeClause) {
                 deviceTypeClauseFound = true;
                 if (deviceTypeClause.v.v !=
@@ -1273,7 +1298,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) {
           },
           clause.u);
 
-      if (toClauseFound && !deviceTypeClauseFound) {
+      if ((toClauseFound || enterClauseFound) && !deviceTypeClauseFound) {
         deviceConstructFound_ = true;
       }
     }
@@ -2228,6 +2253,7 @@ CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type)
 CHECK_SIMPLE_CLAUSE(Doacross, OMPC_doacross)
 CHECK_SIMPLE_CLAUSE(OmpxAttribute, OMPC_ompx_attribute)
 CHECK_SIMPLE_CLAUSE(OmpxBare, OMPC_ompx_bare)
+CHECK_SIMPLE_CLAUSE(Enter, OMPC_enter)
 
 CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize)
 CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks)
@@ -3229,12 +3255,13 @@ const parser::OmpObjectList *OmpStructureChecker::GetOmpObjectList(
     const parser::OmpClause &clause) {
 
   // Clauses with OmpObjectList as its data member
-  using MemberObjectListClauses = std::tuple<parser::OmpClause::Copyprivate,
-      parser::OmpClause::Copyin, parser::OmpClause::Firstprivate,
-      parser::OmpClause::From, parser::OmpClause::Lastprivate,
-      parser::OmpClause::Link, parser::OmpClause::Private,
-      parser::OmpClause::Shared, parser::OmpClause::To,
-      parser::OmpClause::UseDevicePtr, parser::OmpClause::UseDeviceAddr>;
+  using MemberObjectListClauses =
+      std::tuple<parser::OmpClause::Copyprivate, parser::OmpClause::Copyin,
+          parser::OmpClause::Firstprivate, parser::OmpClause::From,
+          parser::OmpClause::Lastprivate, parser::OmpClause::Link,
+          parser::OmpClause::Private, parser::OmpClause::Shared,
+          parser::OmpClause::To, parser::OmpClause::Enter,
+          parser::OmpClause::UseDevicePtr, parser::OmpClause::UseDeviceAddr>;
 
   // Clauses with OmpObjectList in the tuple
   using TupleObjectListClauses =
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index d35602c..90e5c9f 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -80,6 +80,8 @@ public:
   void Enter(const parser::OpenMPDeclareTargetConstruct &);
   void Leave(const parser::OpenMPDeclareTargetConstruct &);
   void Enter(const parser::OmpDeclareTargetWithList &);
+  void Enter(const parser::OmpDeclareTargetWithClause &);
+  void Leave(const parser::OmpDeclareTargetWithClause &);
   void Enter(const parser::OpenMPExecutableAllocate &);
   void Leave(const parser::OpenMPExecutableAllocate &);
   void Enter(const parser::OpenMPAllocatorsConstruct &);
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index bbb105e..882ecad 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1744,6 +1744,9 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPDeclareTargetConstruct &x) {
       } else if (const auto *linkClause{
                      std::get_if<parser::OmpClause::Link>(&clause.u)}) {
         ResolveOmpObjectList(linkClause->v, Symbol::Flag::OmpDeclareTarget);
+      } else if (const auto *enterClause{
+                     std::get_if<parser::OmpClause::Enter>(&clause.u)}) {
+        ResolveOmpObjectList(enterClause->v, Symbol::Flag::OmpDeclareTarget);
       }
     }
   }
diff --git a/flang/test/Lower/OpenMP/FIR/declare-target-data.f90 b/flang/test/Lower/OpenMP/FIR/declare-target-data.f90
index e57d928..bb3bbc8 100644
--- a/flang/test/Lower/OpenMP/FIR/declare-target-data.f90
+++ b/flang/test/Lower/OpenMP/FIR/declare-target-data.f90
@@ -32,6 +32,10 @@ INTEGER, POINTER :: pt2 => pt2_tar
 INTEGER :: data_int_to = 5
 !$omp declare target to(data_int_to)
 
+!CHECK-DAG: fir.global @_QMtest_0Edata_int_enter {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : i32
+INTEGER :: data_int_enter = 5
+!$omp declare target enter(data_int_enter)
+
 !CHECK-DAG: fir.global @_QMtest_0Edata_int_clauseless {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : i32
 INTEGER :: data_int_clauseless = 1
 !$omp declare target(data_int_clauseless)
@@ -42,6 +46,12 @@ REAL :: data_extended_to_1 = 2
 REAL :: data_extended_to_2 = 3
 !$omp declare target to(data_extended_to_1, data_extended_to_2)
 
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
+REAL :: data_extended_enter_1 = 2
+REAL :: data_extended_enter_2 = 3
+!$omp declare target enter(data_extended_enter_1, data_extended_enter_2)
+
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 REAL :: data_extended_link_1 = 2
@@ -69,4 +79,10 @@ PROGRAM commons
     REAL :: two_to = 2
     COMMON /numbers_to/ one_to, two_to
     !$omp declare target to(/numbers_to/)
+
+    !CHECK-DAG: fir.global @numbers_enter_ {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : tuple<f32, f32> {
+    REAL :: one_enter = 1
+    REAL :: two_enter = 2
+    COMMON /numbers_enter/ one_enter, two_enter
+    !$omp declare target enter(/numbers_enter/)
 END
diff --git a/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90
index 26741c6..36d4d7d 100644
--- a/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90
+++ b/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90
@@ -13,6 +13,14 @@ FUNCTION FUNC_T_DEVICE() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_DEVICE
 
+! DEVICE-LABEL: func.func @_QPfunc_enter_device()
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
+!$omp declare target enter(FUNC_ENTER_DEVICE) device_type(nohost)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_ENTER_DEVICE
+
 ! HOST-LABEL: func.func @_QPfunc_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_HOST() RESULT(I)
@@ -21,6 +29,14 @@ FUNCTION FUNC_T_HOST() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_HOST
 
+! HOST-LABEL: func.func @_QPfunc_enter_host()
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_ENTER_HOST() RESULT(I)
+!$omp declare target enter(FUNC_ENTER_HOST) device_type(host)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_ENTER_HOST
+
 ! ALL-LABEL: func.func @_QPfunc_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_ANY() RESULT(I)
@@ -29,6 +45,14 @@ FUNCTION FUNC_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_ANY
 
+! ALL-LABEL: func.func @_QPfunc_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_ENTER_ANY() RESULT(I)
+!$omp declare target enter(FUNC_ENTER_ANY) device_type(any)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_ENTER_ANY
+
 ! ALL-LABEL: func.func @_QPfunc_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
@@ -37,6 +61,14 @@ FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_DEFAULT_T_ANY
 
+! ALL-LABEL: func.func @_QPfunc_default_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
+!$omp declare target enter(FUNC_DEFAULT_ENTER_ANY)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_DEFAULT_ENTER_ANY
+
 ! ALL-LABEL: func.func @_QPfunc_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
@@ -65,24 +97,48 @@ SUBROUTINE SUBR_T_DEVICE()
 !$omp declare target to(SUBR_T_DEVICE) device_type(nohost)
 END
 
+! DEVICE-LABEL: func.func @_QPsubr_enter_device()
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_ENTER_DEVICE()
+!$omp declare target enter(SUBR_ENTER_DEVICE) device_type(nohost)
+END
+
 ! HOST-LABEL: func.func @_QPsubr_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_HOST()
 !$omp declare target to(SUBR_T_HOST) device_type(host)
 END
 
+! HOST-LABEL: func.func @_QPsubr_enter_host()
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_ENTER_HOST()
+!$omp declare target enter(SUBR_ENTER_HOST) device_type(host)
+END
+
 ! ALL-LABEL: func.func @_QPsubr_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_ANY()
 !$omp declare target to(SUBR_T_ANY) device_type(any)
 END
 
+! ALL-LABEL: func.func @_QPsubr_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_ENTER_ANY()
+!$omp declare target enter(SUBR_ENTER_ANY) device_type(any)
+END
+
 ! ALL-LABEL: func.func @_QPsubr_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_T_ANY()
 !$omp declare target to(SUBR_DEFAULT_T_ANY)
 END
 
+! ALL-LABEL: func.func @_QPsubr_default_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_DEFAULT_ENTER_ANY()
+!$omp declare target enter(SUBR_DEFAULT_ENTER_ANY)
+END
+
 ! ALL-LABEL: func.func @_QPsubr_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_ANY()
@@ -108,3 +164,15 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
         K = RECURSIVE_DECLARE_TARGET(INCREMENT + 1)
     END IF
 END FUNCTION RECURSIVE_DECLARE_TARGET
+
+! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K)
+!$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost)
+    INTEGER :: INCREMENT, K
+    IF (INCREMENT == 10) THEN
+        K = INCREMENT
+    ELSE
+        K = RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT + 1)
+    END IF
+END FUNCTION RECURSIVE_DECLARE_TARGET_ENTER
diff --git a/flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90
new file mode 100644
index 0000000..8e88d1b0
--- /dev/null
+++ b/flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90
@@ -0,0 +1,192 @@
+!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s  --check-prefix=DEVICE
+!RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
+!RUN: bbc -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_twice() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_twice
+
+! CHECK-LABEL: func.func @_QPtarget_function_twice_host
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+function target_function_twice_host() result(i)
+!$omp declare target enter(target_function_twice_host) device_type(host)
+   integer :: i
+   i = implicitly_captured_twice()
+end function target_function_twice_host
+
+! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_twice_device() result(i)
+!$omp declare target enter(target_function_twice_device) device_type(nohost)
+   integer :: i
+   i = implicitly_captured_twice()
+end function target_function_twice_device
+
+!! -----
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_nest() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_nest
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_one
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter){{.*}}}
+function implicitly_captured_one() result(k)
+   k = implicitly_captured_nest()
+end function implicitly_captured_one
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_two() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_two
+
+! DEVICE-LABEL: func.func @_QPtarget_function_test
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_test() result(j)
+!$omp declare target enter(target_function_test) device_type(nohost)
+   integer :: i, j
+   i = implicitly_captured_one()
+   j = implicitly_captured_two() + i
+end function target_function_test
+
+!! -----
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_nest_twice() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_nest_twice
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_one_twice() result(k)
+   k = implicitly_captured_nest_twice()
+end function implicitly_captured_one_twice
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_two_twice() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_two_twice
+
+! DEVICE-LABEL: func.func @_QPtarget_function_test_device
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_test_device() result(j)
+   !$omp declare target enter(target_function_test_device) device_type(nohost)
+   integer :: i, j
+   i = implicitly_captured_one_twice()
+   j = implicitly_captured_two_twice() + i
+end function target_function_test_device
+
+! CHECK-LABEL: func.func @_QPtarget_function_test_host
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+function target_function_test_host() result(j)
+   !$omp declare target enter(target_function_test_host) device_type(host)
+   integer :: i, j
+   i = implicitly_captured_one_twice()
+   j = implicitly_captured_two_twice() + i
+end function target_function_test_host
+
+!! -----
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
+!$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host)
+   integer :: increment, k
+   if (increment == 10) then
+      k = increment
+   else
+      k = implicitly_captured_with_dev_type_recursive(increment + 1)
+   end if
+end function implicitly_captured_with_dev_type_recursive
+
+! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_with_dev_type_recurse() result(i)
+!$omp declare target enter(target_function_with_dev_type_recurse) device_type(nohost)
+   integer :: i
+   i = implicitly_captured_with_dev_type_recursive(0)
+end function target_function_with_dev_type_recurse
+
+!! -----
+
+module test_module
+contains
+! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+   function implicitly_captured_nest_twice() result(i)
+      integer :: i
+      i = 10
+   end function implicitly_captured_nest_twice
+
+! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+   function implicitly_captured_one_twice() result(k)
+      !$omp declare target enter(implicitly_captured_one_twice) device_type(host)
+      k = implicitly_captured_nest_twice()
+   end function implicitly_captured_one_twice
+
+! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+   function implicitly_captured_two_twice() result(y)
+      integer :: y
+      y = 5
+   end function implicitly_captured_two_twice
+
+! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+   function target_function_test_device() result(j)
+      !$omp declare target enter(target_function_test_device) device_type(nohost)
+      integer :: i, j
+      i = implicitly_captured_one_twice()
+      j = implicitly_captured_two_twice() + i
+   end function target_function_test_device
+end module test_module
+
+!! -----
+
+program mb
+   interface
+      subroutine caller_recursive
+         !$omp declare target enter(caller_recursive) device_type(nohost)
+      end subroutine
+
+      recursive subroutine implicitly_captured_recursive(increment)
+         integer :: increment
+      end subroutine
+   end interface
+end program
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+recursive subroutine implicitly_captured_recursive(increment)
+   integer :: increment
+   if (increment == 10) then
+      return
+   else
+      call implicitly_captured_recursive(increment + 1)
+   end if
+end subroutine
+
+! DEVICE-LABEL: func.func @_QPcaller_recursive
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+subroutine caller_recursive
+!$omp declare target enter(caller_recursive) device_type(nohost)
+   call implicitly_captured_recursive(0)
+end subroutine
diff --git a/flang/test/Lower/OpenMP/declare-target-data.f90 b/flang/test/Lower/OpenMP/declare-target-data.f90
index e74f847..4568810 100644
--- a/flang/test/Lower/OpenMP/declare-target-data.f90
+++ b/flang/test/Lower/OpenMP/declare-target-data.f90
@@ -32,6 +32,10 @@ INTEGER, POINTER :: pt2 => pt2_tar
 INTEGER :: data_int_to = 5
 !$omp declare target to(data_int_to)
 
+!CHECK-DAG: fir.global @_QMtest_0Edata_int_enter {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : i32
+INTEGER :: data_int_enter = 5
+!$omp declare target enter(data_int_enter)
+
 !CHECK-DAG: fir.global @_QMtest_0Edata_int_clauseless {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : i32
 INTEGER :: data_int_clauseless = 1
 !$omp declare target(data_int_clauseless)
@@ -42,6 +46,12 @@ REAL :: data_extended_to_1 = 2
 REAL :: data_extended_to_2 = 3
 !$omp declare target to(data_extended_to_1, data_extended_to_2)
 
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
+!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
+REAL :: data_extended_enter_1 = 2
+REAL :: data_extended_enter_2 = 3
+!$omp declare target enter(data_extended_enter_1, data_extended_enter_2)
+
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 REAL :: data_extended_link_1 = 2
@@ -69,4 +79,10 @@ PROGRAM commons
     REAL :: two_to = 2
     COMMON /numbers_to/ one_to, two_to
     !$omp declare target to(/numbers_to/)
+
+    !CHECK-DAG: fir.global @numbers_enter_ {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : tuple<f32, f32> {
+    REAL :: one_enter = 1
+    REAL :: two_enter = 2
+    COMMON /numbers_enter/ one_enter, two_enter
+    !$omp declare target enter(/numbers_enter/)
 END
diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
index a5cdfca8..3d2c406 100644
--- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
+++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
@@ -13,6 +13,14 @@ FUNCTION FUNC_T_DEVICE() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_DEVICE
 
+! DEVICE-LABEL: func.func @_QPfunc_enter_device()
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
+!$omp declare target enter(FUNC_ENTER_DEVICE) device_type(nohost)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_ENTER_DEVICE
+
 ! HOST-LABEL: func.func @_QPfunc_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_HOST() RESULT(I)
@@ -21,6 +29,14 @@ FUNCTION FUNC_T_HOST() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_HOST
 
+! HOST-LABEL: func.func @_QPfunc_enter_host()
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_ENTER_HOST() RESULT(I)
+!$omp declare target enter(FUNC_ENTER_HOST) device_type(host)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_ENTER_HOST
+
 ! ALL-LABEL: func.func @_QPfunc_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_ANY() RESULT(I)
@@ -29,6 +45,14 @@ FUNCTION FUNC_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_ANY
 
+! ALL-LABEL: func.func @_QPfunc_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_ENTER_ANY() RESULT(I)
+!$omp declare target enter(FUNC_ENTER_ANY) device_type(any)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_ENTER_ANY
+
 ! ALL-LABEL: func.func @_QPfunc_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
@@ -37,6 +61,14 @@ FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_DEFAULT_T_ANY
 
+! ALL-LABEL: func.func @_QPfunc_default_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
+!$omp declare target enter(FUNC_DEFAULT_ENTER_ANY)
+    INTEGER :: I
+    I = 1
+END FUNCTION FUNC_DEFAULT_ENTER_ANY
+
 ! ALL-LABEL: func.func @_QPfunc_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
@@ -65,24 +97,48 @@ SUBROUTINE SUBR_T_DEVICE()
 !$omp declare target to(SUBR_T_DEVICE) device_type(nohost)
 END
 
+! DEVICE-LABEL: func.func @_QPsubr_enter_device()
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_ENTER_DEVICE()
+!$omp declare target enter(SUBR_ENTER_DEVICE) device_type(nohost)
+END
+
 ! HOST-LABEL: func.func @_QPsubr_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_HOST()
 !$omp declare target to(SUBR_T_HOST) device_type(host)
 END
 
+! HOST-LABEL: func.func @_QPsubr_enter_host()
+! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_ENTER_HOST()
+!$omp declare target enter(SUBR_ENTER_HOST) device_type(host)
+END
+
 ! ALL-LABEL: func.func @_QPsubr_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_ANY()
 !$omp declare target to(SUBR_T_ANY) device_type(any)
 END
 
+! ALL-LABEL: func.func @_QPsubr_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_ENTER_ANY()
+!$omp declare target enter(SUBR_ENTER_ANY) device_type(any)
+END
+
 ! ALL-LABEL: func.func @_QPsubr_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_T_ANY()
 !$omp declare target to(SUBR_DEFAULT_T_ANY)
 END
 
+! ALL-LABEL: func.func @_QPsubr_default_enter_any()
+! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
+SUBROUTINE SUBR_DEFAULT_ENTER_ANY()
+!$omp declare target enter(SUBR_DEFAULT_ENTER_ANY)
+END
+
 ! ALL-LABEL: func.func @_QPsubr_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_ANY()
@@ -108,3 +164,15 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
         K = RECURSIVE_DECLARE_TARGET(INCREMENT + 1)
     END IF
 END FUNCTION RECURSIVE_DECLARE_TARGET
+
+! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
+RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K)
+!$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost)
+    INTEGER :: INCREMENT, K
+    IF (INCREMENT == 10) THEN
+        K = INCREMENT
+    ELSE
+        K = RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT + 1)
+    END IF
+END FUNCTION RECURSIVE_DECLARE_TARGET_ENTER
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
new file mode 100644
index 0000000..ed718a4
--- /dev/null
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
@@ -0,0 +1,192 @@
+!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
+!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s  --check-prefix=DEVICE
+!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
+!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_twice() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_twice
+
+! CHECK-LABEL: func.func @_QPtarget_function_twice_host
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+function target_function_twice_host() result(i)
+!$omp declare target enter(target_function_twice_host) device_type(host)
+   integer :: i
+   i = implicitly_captured_twice()
+end function target_function_twice_host
+
+! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_twice_device() result(i)
+!$omp declare target enter(target_function_twice_device) device_type(nohost)
+   integer :: i
+   i = implicitly_captured_twice()
+end function target_function_twice_device
+
+!! -----
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_nest() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_nest
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_one
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter){{.*}}}
+function implicitly_captured_one() result(k)
+   k = implicitly_captured_nest()
+end function implicitly_captured_one
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_two() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_two
+
+! DEVICE-LABEL: func.func @_QPtarget_function_test
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_test() result(j)
+!$omp declare target enter(target_function_test) device_type(nohost)
+   integer :: i, j
+   i = implicitly_captured_one()
+   j = implicitly_captured_two() + i
+end function target_function_test
+
+!! -----
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_nest_twice() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_nest_twice
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_one_twice() result(k)
+   k = implicitly_captured_nest_twice()
+end function implicitly_captured_one_twice
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_two_twice() result(k)
+   integer :: i
+   i = 10
+   k = i
+end function implicitly_captured_two_twice
+
+! DEVICE-LABEL: func.func @_QPtarget_function_test_device
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_test_device() result(j)
+   !$omp declare target enter(target_function_test_device) device_type(nohost)
+   integer :: i, j
+   i = implicitly_captured_one_twice()
+   j = implicitly_captured_two_twice() + i
+end function target_function_test_device
+
+! CHECK-LABEL: func.func @_QPtarget_function_test_host
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+function target_function_test_host() result(j)
+   !$omp declare target enter(target_function_test_host) device_type(host)
+   integer :: i, j
+   i = implicitly_captured_one_twice()
+   j = implicitly_captured_two_twice() + i
+end function target_function_test_host
+
+!! -----
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
+!$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host)
+   integer :: increment, k
+   if (increment == 10) then
+      k = increment
+   else
+      k = implicitly_captured_with_dev_type_recursive(increment + 1)
+   end if
+end function implicitly_captured_with_dev_type_recursive
+
+! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+function target_function_with_dev_type_recurse() result(i)
+!$omp declare target enter(target_function_with_dev_type_recurse) device_type(nohost)
+   integer :: i
+   i = implicitly_captured_with_dev_type_recursive(0)
+end function target_function_with_dev_type_recurse
+
+!! -----
+
+module test_module
+contains
+! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+   function implicitly_captured_nest_twice() result(i)
+      integer :: i
+      i = 10
+   end function implicitly_captured_nest_twice
+
+! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
+   function implicitly_captured_one_twice() result(k)
+      !$omp declare target enter(implicitly_captured_one_twice) device_type(host)
+      k = implicitly_captured_nest_twice()
+   end function implicitly_captured_one_twice
+
+! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+   function implicitly_captured_two_twice() result(y)
+      integer :: y
+      y = 5
+   end function implicitly_captured_two_twice
+
+! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+   function target_function_test_device() result(j)
+      !$omp declare target enter(target_function_test_device) device_type(nohost)
+      integer :: i, j
+      i = implicitly_captured_one_twice()
+      j = implicitly_captured_two_twice() + i
+   end function target_function_test_device
+end module test_module
+
+!! -----
+
+program mb
+   interface
+      subroutine caller_recursive
+         !$omp declare target enter(caller_recursive) device_type(nohost)
+      end subroutine
+
+      recursive subroutine implicitly_captured_recursive(increment)
+         integer :: increment
+      end subroutine
+   end interface
+end program
+
+! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+recursive subroutine implicitly_captured_recursive(increment)
+   integer :: increment
+   if (increment == 10) then
+      return
+   else
+      call implicitly_captured_recursive(increment + 1)
+   end if
+end subroutine
+
+! DEVICE-LABEL: func.func @_QPcaller_recursive
+! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
+subroutine caller_recursive
+!$omp declare target enter(caller_recursive) device_type(nohost)
+   call implicitly_captured_recursive(0)
+end subroutine
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
index 0f54860..7d1ae06 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
@@ -34,6 +34,20 @@ function implicitly_captured_one_twice() result(k)
    k = implicitly_captured_nest_twice()
 end function implicitly_captured_one_twice
 
+! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice_enter
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_nest_twice_enter() result(i)
+   integer :: i
+   i = 10
+end function implicitly_captured_nest_twice_enter
+
+! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice_enter
+! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
+function implicitly_captured_one_twice_enter() result(k)
+!$omp declare target enter(implicitly_captured_one_twice_enter) device_type(host)
+   k = implicitly_captured_nest_twice_enter()
+end function implicitly_captured_one_twice_enter
+
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_two_twice
 ! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
 function implicitly_captured_two_twice() result(y)
diff --git a/flang/test/Lower/OpenMP/function-filtering-2.f90 b/flang/test/Lower/OpenMP/function-filtering-2.f90
index 17cd0d4..e1d0f72 100644
--- a/flang/test/Lower/OpenMP/function-filtering-2.f90
+++ b/flang/test/Lower/OpenMP/function-filtering-2.f90
@@ -19,6 +19,14 @@ subroutine declaretarget()
     call implicit_invocation()
 end subroutine declaretarget
 
+! MLIR: func.func @{{.*}}declaretarget_enter() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>}
+! MLIR: return
+! LLVM: define {{.*}} @{{.*}}declaretarget_enter{{.*}}(
+subroutine declaretarget_enter()
+!$omp declare target enter(declaretarget_enter) device_type(nohost)
+    call implicit_invocation()
+end subroutine declaretarget_enter
+
 ! MLIR: func.func @{{.*}}no_declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
 ! MLIR: return
 ! LLVM: define {{.*}} @{{.*}}no_declaretarget{{.*}}(
diff --git a/flang/test/Lower/OpenMP/function-filtering.f90 b/flang/test/Lower/OpenMP/function-filtering.f90
index 45d8c2e2..c473b99 100644
--- a/flang/test/Lower/OpenMP/function-filtering.f90
+++ b/flang/test/Lower/OpenMP/function-filtering.f90
@@ -19,6 +19,16 @@ function device_fn() result(x)
   x = 10
 end function device_fn
 
+! MLIR-ALL: func.func @{{.*}}device_fn_enter(
+! MLIR-ALL: return
+
+! LLVM-ALL: define {{.*}} @{{.*}}device_fn_enter{{.*}}(
+function device_fn_enter() result(x)
+  !$omp declare target enter(device_fn_enter) device_type(nohost)
+  integer :: x
+  x = 10
+end function device_fn_enter
+
 ! MLIR-HOST: func.func @{{.*}}host_fn(
 ! MLIR-HOST: return
 ! MLIR-DEVICE-NOT: func.func {{.*}}host_fn(
@@ -31,6 +41,14 @@ function host_fn() result(x)
   x = 10
 end function host_fn
 
+! LLVM-HOST: define {{.*}} @{{.*}}host_fn_enter{{.*}}(
+! LLVM-DEVICE-NOT: {{.*}} @{{.*}}host_fn_enter{{.*}}(
+function host_fn_enter() result(x)
+  !$omp declare target enter(host_fn_enter) device_type(host)
+  integer :: x
+  x = 10
+end function host_fn_enter
+
 ! MLIR-ALL: func.func @{{.*}}target_subr(
 ! MLIR-ALL: return
 
diff --git a/flang/test/Parser/OpenMP/declare_target-device_type.f90 b/flang/test/Parser/OpenMP/declare_target-device_type.f90
index 2f6b68a..0b4f75e 100644
--- a/flang/test/Parser/OpenMP/declare_target-device_type.f90
+++ b/flang/test/Parser/OpenMP/declare_target-device_type.f90
@@ -2,12 +2,27 @@
 ! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s
 
 subroutine openmp_declare_target
-    !CHECK: !$omp declare target device_type(host)
-    !$omp declare target device_type(host)
-    !CHECK: !$omp declare target device_type(nohost)
-    !$omp declare target device_type(nohost)
-    !CHECK: !$omp declare target device_type(any)
-    !$omp declare target device_type(any)
+    integer, save :: x, y
+    !CHECK: !$omp declare target device_type(host) enter(x)
+    !$omp declare target device_type(host) enter(x)
+    !CHECK: !$omp declare target device_type(nohost) enter(x)
+    !$omp declare target device_type(nohost) enter(x)
+    !CHECK: !$omp declare target device_type(any) enter(x)
+    !$omp declare target device_type(any) enter(x)
+
+    !CHECK: !$omp declare target device_type(host) to(x)
+    !$omp declare target device_type(host) to(x)
+    !CHECK: !$omp declare target device_type(nohost) to(x)
+    !$omp declare target device_type(nohost) to(x)
+    !CHECK: !$omp declare target device_type(any) to(x)
+    !$omp declare target device_type(any) to(x)
+
+    !CHECK: !$omp declare target device_type(host) enter(y) to(x)
+    !$omp declare target device_type(host) enter(y) to(x)
+    !CHECK: !$omp declare target device_type(nohost) enter(y) to(x)
+    !$omp declare target device_type(nohost) enter(y) to(x)
+    !CHECK: !$omp declare target device_type(any) enter(y) to(x)
+    !$omp declare target device_type(any) enter(y) to(x)
     integer :: a(1024), i
     !CHECK: do
     do i = 1, 1024
diff --git a/flang/test/Semantics/OpenMP/declarative-directive.f90 b/flang/test/Semantics/OpenMP/declarative-directive.f90
index e48a682..4d10dc2 100644
--- a/flang/test/Semantics/OpenMP/declarative-directive.f90
+++ b/flang/test/Semantics/OpenMP/declarative-directive.f90
@@ -61,12 +61,22 @@ contains
     !WARNING: The entity with PARAMETER attribute is used in a DECLARE TARGET directive
     !WARNING: The entity with PARAMETER attribute is used in a DECLARE TARGET directive
     !$omp declare target (foo, N, M)
+    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target to(Q, S) link(R)
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target enter(Q, S) link(R)
+    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !ERROR: MAP clause is not allowed on the DECLARE TARGET directive
+    !$omp declare target to(Q) map(from:Q)
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !ERROR: MAP clause is not allowed on the DECLARE TARGET directive
-    !$omp declare target map(from:Q)
+    !$omp declare target enter(Q) map(from:Q)
     integer, parameter :: N=10000, M=1024
     integer :: i
     real :: Q(N, N), R(N,M), S(M,M)
diff --git a/flang/test/Semantics/OpenMP/declare-target01.f90 b/flang/test/Semantics/OpenMP/declare-target01.f90
index 862b4c5..3168781 100644
--- a/flang/test/Semantics/OpenMP/declare-target01.f90
+++ b/flang/test/Semantics/OpenMP/declare-target01.f90
@@ -49,41 +49,89 @@ module declare_target01
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
   !$omp declare target (y%KIND)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var)
 
+  !$omp declare target enter (my_var)
+
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var) device_type(host)
+  
+  !$omp declare target enter (my_var) device_type(host)
 
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%t_i)
 
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (my_var%t_i)
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%t_arr)
 
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (my_var%t_arr)
+
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%kind_param)
 
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (my_var%kind_param)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%len_param)
 
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (my_var%len_param)
+
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr)
 
+  !$omp declare target enter (arr)
+
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr) device_type(nohost)
 
+  !$omp declare target enter (arr) device_type(nohost)
+
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr(1))
 
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (arr(1))
+
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr(1:2))
 
+  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (arr(1:2))
+
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (x%KIND)
 
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (x%KIND)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (w%LEN)
 
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (w%LEN)
+
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (y%KIND)
 
+  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
+  !$omp declare target enter (y%KIND)
+
   !$omp declare target link (my_var2)
 
   !$omp declare target link (my_var2) device_type(any)
diff --git a/flang/test/Semantics/OpenMP/declare-target02.f90 b/flang/test/Semantics/OpenMP/declare-target02.f90
index 57a2776..8166e10 100644
--- a/flang/test/Semantics/OpenMP/declare-target02.f90
+++ b/flang/test/Semantics/OpenMP/declare-target02.f90
@@ -16,13 +16,23 @@ program declare_target02
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target (a1)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr1_to)
 
+  !$omp declare target enter (arr1_to)
+
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (blk1_to)
 
+  !$omp declare target enter (blk1_to)
+
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target to (a1_to)
 
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target enter (a1_to)
+
   !$omp declare target link (arr1_link)
 
   !$omp declare target link (blk1_link)
@@ -34,19 +44,27 @@ program declare_target02
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target (eq_a)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target to (eq_a)
 
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target enter (eq_a)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target link (eq_b)
 
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target (eq_c)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target to (eq_c)
 
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
+  !$omp declare target enter (eq_c)
+
+  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target link (eq_d)
   equivalence(eq_c, eq_d)
 
@@ -69,17 +87,32 @@ contains
     !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
     !$omp declare target (a3)
 
+    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target to (arr2_to)
 
+    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+    !$omp declare target enter (arr2_to)
+
+    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !$omp declare target to (arr3_to)
 
+    !$omp declare target enter (arr3_to)
+
+    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
     !$omp declare target to (a2_to)
 
     !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target enter (a2_to)
+
+    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
     !$omp declare target to (a3_to)
 
+    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+    !$omp declare target enter (a3_to)
+
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target link (arr2_link)
 
@@ -104,13 +137,22 @@ module mod4
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target (a4)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr4_to)
 
+  !$omp declare target enter (arr4_to)
+
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (blk4_to)
+  !$omp declare target enter (blk4_to)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target to (a4_to)
 
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target enter (a4_to)
+
   !$omp declare target link (arr4_link)
 
   !$omp declare target link (blk4_link)
@@ -132,15 +174,27 @@ subroutine func5()
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target (a5)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
   !$omp declare target to (arr5_to)
+  
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target enter (arr5_to)
 
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
   !$omp declare target to (blk5_to)
 
+  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
+  !$omp declare target enter (blk5_to)
+
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target to (a5_to)
 
+  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
+  !$omp declare target enter (a5_to)
+
   !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
   !$omp declare target link (arr5_link)
 
diff --git a/flang/test/Semantics/OpenMP/declare-target06.f90 b/flang/test/Semantics/OpenMP/declare-target06.f90
index a2d8263..a1c55d3 100644
--- a/flang/test/Semantics/OpenMP/declare-target06.f90
+++ b/flang/test/Semantics/OpenMP/declare-target06.f90
@@ -15,9 +15,14 @@ module test_0
 !$omp declare target link(no_implicit_materialization_2)
 
 !ERROR: The given DECLARE TARGET directive clause has an invalid argument
+!WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
 !ERROR: No explicit type declared for 'no_implicit_materialization_3'
 !$omp declare target to(no_implicit_materialization_3)
 
+!ERROR: The given DECLARE TARGET directive clause has an invalid argument
+!ERROR: No explicit type declared for 'no_implicit_materialization_3'
+!$omp declare target enter(no_implicit_materialization_3)
+
 INTEGER :: data_int = 10
 !$omp declare target(data_int)
 end module test_0
diff --git a/flang/test/Semantics/OpenMP/requires04.f90 b/flang/test/Semantics/OpenMP/requires04.f90
index ae1c2c0..bb4101c 100644
--- a/flang/test/Semantics/OpenMP/requires04.f90
+++ b/flang/test/Semantics/OpenMP/requires04.f90
@@ -5,7 +5,10 @@
 ! device constructs, such as declare target with device_type=nohost|any.
 
 subroutine f
-  !$omp declare target device_type(nohost)
+  integer, save :: x
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
+  !$omp declare target to(x) device_type(nohost)
+  !$omp declare target enter(x) device_type(nohost)
 end subroutine f
 
 subroutine g
diff --git a/flang/test/Semantics/OpenMP/requires05.f90 b/flang/test/Semantics/OpenMP/requires05.f90
index 9281f8b..dd27e38 100644
--- a/flang/test/Semantics/OpenMP/requires05.f90
+++ b/flang/test/Semantics/OpenMP/requires05.f90
@@ -5,7 +5,9 @@
 ! device constructs, such as declare target with 'to' clause and no device_type.
 
 subroutine f
+  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to(f)
+  !$omp declare target enter(f)
 end subroutine f
 
 subroutine g
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index f8b3b0c..8bfacd8 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -289,6 +289,9 @@ def OMPC_To : Clause<"to"> {
   let clangClass = "OMPToClause";
   let flangClass = "OmpObjectList";
 }
+def OMPC_Enter : Clause<"enter"> {
+  let flangClass = "OmpObjectList";
+}
 def OMPC_From : Clause<"from"> {
   let clangClass = "OMPFromClause";
   let flangClass = "OmpObjectList";
@@ -1124,7 +1127,8 @@ def OMP_DeclareTarget : Directive<"declare target"> {
   let allowedClauses = [
     VersionedClause<OMPC_To>,
     VersionedClause<OMPC_Link>,
-    VersionedClause<OMPC_Indirect>
+    VersionedClause<OMPC_Indirect>,
+    VersionedClause<OMPC_Enter, 52>
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_DeviceType, 50>
-- 
cgit v1.1


From 49795d27761b9f398302354acd30980a319b1502 Mon Sep 17 00:00:00 2001
From: Brad Smith <brad@comstyle.com>
Date: Wed, 15 Nov 2023 19:32:40 -0500
Subject: [Driver][NFC] A bit more const for OpenBSD and DragonFly

---
 clang/lib/Driver/ToolChains/DragonFly.cpp |  8 ++++----
 clang/lib/Driver/ToolChains/OpenBSD.cpp   | 14 +++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/DragonFly.cpp b/clang/lib/Driver/ToolChains/DragonFly.cpp
index b13449b..9942fc6 100644
--- a/clang/lib/Driver/ToolChains/DragonFly.cpp
+++ b/clang/lib/Driver/ToolChains/DragonFly.cpp
@@ -57,11 +57,11 @@ void dragonfly::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const auto &ToolChain = static_cast<const DragonFly &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
+  const bool Static = Args.hasArg(options::OPT_static);
+  const bool Shared = Args.hasArg(options::OPT_shared);
+  const bool Profiling = Args.hasArg(options::OPT_pg);
+  const bool Pie = Args.hasArg(options::OPT_pie);
   ArgStringList CmdArgs;
-  bool Static = Args.hasArg(options::OPT_static);
-  bool Shared = Args.hasArg(options::OPT_shared);
-  bool Profiling = Args.hasArg(options::OPT_pg);
-  bool Pie = Args.hasArg(options::OPT_pie);
 
   if (!D.SysRoot.empty())
     CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp
index 5d06cd8..c8f0216 100644
--- a/clang/lib/Driver/ToolChains/OpenBSD.cpp
+++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp
@@ -33,9 +33,9 @@ void openbsd::Assembler::ConstructJob(Compilation &C, const JobAction &JA,
   const auto &ToolChain = static_cast<const OpenBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple &Triple = ToolChain.getTriple();
+  ArgStringList CmdArgs;
 
   claimNoWarnArgs(Args);
-  ArgStringList CmdArgs;
 
   switch (ToolChain.getArch()) {
   case llvm::Triple::x86:
@@ -112,13 +112,13 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const auto &ToolChain = static_cast<const OpenBSD &>(getToolChain());
   const Driver &D = ToolChain.getDriver();
   const llvm::Triple::ArchType Arch = ToolChain.getArch();
-  ArgStringList CmdArgs;
-  bool Static = Args.hasArg(options::OPT_static);
-  bool Shared = Args.hasArg(options::OPT_shared);
-  bool Profiling = Args.hasArg(options::OPT_pg);
-  bool Pie = Args.hasArg(options::OPT_pie);
-  bool Nopie = Args.hasArg(options::OPT_nopie);
+  const bool Static = Args.hasArg(options::OPT_static);
+  const bool Shared = Args.hasArg(options::OPT_shared);
+  const bool Profiling = Args.hasArg(options::OPT_pg);
+  const bool Pie = Args.hasArg(options::OPT_pie);
+  const bool Nopie = Args.hasArg(options::OPT_nopie);
   const bool Relocatable = Args.hasArg(options::OPT_r);
+  ArgStringList CmdArgs;
 
   // Silence warning for "clang -g foo.o -o foo"
   Args.ClaimAllArgs(options::OPT_g_Group);
-- 
cgit v1.1


From 484a27e412297b56027bb9fbb2f90462c668d20e Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Thu, 16 Nov 2023 16:00:41 -0800
Subject: [SLP][NFC]Make needToDelay constant, NFC.

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5b9c206..3af683d 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10261,7 +10261,7 @@ public:
   /// Checks if the specified entry \p E needs to be delayed because of its
   /// dependency nodes.
   Value *needToDelay(const TreeEntry *E,
-                     ArrayRef<SmallVector<const TreeEntry *>> Deps) {
+                     ArrayRef<SmallVector<const TreeEntry *>> Deps) const {
     // No need to delay emission if all deps are ready.
     if (all_of(Deps, [](ArrayRef<const TreeEntry *> TEs) {
           return all_of(
-- 
cgit v1.1


From b2e752731856dc0dcd8948cf5518b8ca62d91c83 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 13 Nov 2023 14:36:01 -0800
Subject: [ORC] Fix some typos in comments in MachOPlatform.

---
 llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
index 6d8e239..164adce 100644
--- a/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/MachOPlatform.cpp
@@ -469,11 +469,11 @@ MachOPlatform::MachOPlatform(
   // itself (to build the allocation actions that will call the registration
   // functions). Further complicating the situation (a) the graph containing
   // the registration functions is allowed to depend on other graphs (e.g. the
-  // graph containing the ORC runtime RTTI support) so we need to handle with
-  // an unknown set of dependencies during bootstrap, and (b) these graphs may
+  // graph containing the ORC runtime RTTI support) so we need to handle an
+  // unknown set of dependencies during bootstrap, and (b) these graphs may
   // be linked concurrently if the user has installed a concurrent dispatcher.
   //
-  // We satisfy these constraint by implementing a bootstrap phase during which
+  // We satisfy these constraints by implementing a bootstrap phase during which
   // allocation actions generated by MachOPlatform are appended to a list of
   // deferred allocation actions, rather than to the graphs themselves. At the
   // end of the bootstrap process the deferred actions are attached to a final
-- 
cgit v1.1


From b2bbe8cc1c7653fb33f8aef69bbf77c1d3af4f26 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Wed, 15 Nov 2023 20:56:41 -0800
Subject: [ORC-RT] Add bitmask-enum and bit_ceil utilities to the ORC runtime.

bitmask_enum.h is essentially a copy of llvm/ADT/BitmaskEnum.h, with some minor
cleanup and renaming.

The bit_ceil function is a placeholder for std::bit_ceil, which we can use once
compiler-rt can use c++20.

These utilities will be used to simplify bitfield enum usage in upcoming
ORC-RT patches.
---
 compiler-rt/lib/orc/bitmask_enum.h                 | 151 +++++++++++++++++++++
 compiler-rt/lib/orc/stl_extras.h                   |  11 ++
 compiler-rt/lib/orc/tests/unit/CMakeLists.txt      |   1 +
 .../lib/orc/tests/unit/bitmask_enum_test.cpp       | 143 +++++++++++++++++++
 4 files changed, 306 insertions(+)
 create mode 100644 compiler-rt/lib/orc/bitmask_enum.h
 create mode 100644 compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp

diff --git a/compiler-rt/lib/orc/bitmask_enum.h b/compiler-rt/lib/orc/bitmask_enum.h
new file mode 100644
index 0000000..b9fb776
--- /dev/null
+++ b/compiler-rt/lib/orc/bitmask_enum.h
@@ -0,0 +1,151 @@
+//===---- bitmask_enum.h - Enable bitmask operations on enums ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime support library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ORC_RT_BITMASK_ENUM_H
+#define ORC_RT_BITMASK_ENUM_H
+
+#include "stl_extras.h"
+
+#include <cassert>
+#include <type_traits>
+
+namespace __orc_rt {
+
+/// ORC_RT_MARK_AS_BITMASK_ENUM lets you opt in an individual enum type so you
+/// can perform bitwise operations on it without putting static_cast everywhere.
+///
+/// \code
+///   enum MyEnum {
+///     E1 = 1, E2 = 2, E3 = 4, E4 = 8,
+///     ORC_RT_MARK_AS_BITMASK_ENUM(/* LargestValue = */ E4)
+///   };
+///
+///   void Foo() {
+///     MyEnum A = (E1 | E2) & E3 ^ ~E4; // Look, ma: No static_cast!
+///   }
+/// \endcode
+///
+/// Normally when you do a bitwise operation on an enum value, you get back an
+/// instance of the underlying type (e.g. int).  But using this macro, bitwise
+/// ops on your enum will return you back instances of the enum.  This is
+/// particularly useful for enums which represent a combination of flags.
+///
+/// The parameter to ORC_RT_MARK_AS_BITMASK_ENUM should be the largest
+/// individual value in your enum.
+///
+/// All of the enum's values must be non-negative.
+#define ORC_RT_MARK_AS_BITMASK_ENUM(LargestValue)                              \
+  ORC_RT_BITMASK_LARGEST_ENUMERATOR = LargestValue
+
+/// ORC_RT_DECLARE_ENUM_AS_BITMASK can be used to declare an enum type as a bit
+/// set, so that bitwise operation on such enum does not require static_cast.
+///
+/// \code
+///   enum MyEnum { E1 = 1, E2 = 2, E3 = 4, E4 = 8 };
+///   ORC_RT_DECLARE_ENUM_AS_BITMASK(MyEnum, E4);
+///
+///   void Foo() {
+///     MyEnum A = (E1 | E2) & E3 ^ ~E4; // No static_cast
+///   }
+/// \endcode
+///
+/// The second parameter to ORC_RT_DECLARE_ENUM_AS_BITMASK specifies the largest
+/// bit value of the enum type.
+///
+/// ORC_RT_DECLARE_ENUM_AS_BITMASK should be used in __orc_rt namespace.
+///
+/// This a non-intrusive alternative for ORC_RT_MARK_AS_BITMASK_ENUM. It allows
+/// declaring more than one non-scoped enumerations as bitmask types in the same
+/// scope. Otherwise it provides the same functionality as
+/// ORC_RT_MARK_AS_BITMASK_ENUM.
+#define ORC_RT_DECLARE_ENUM_AS_BITMASK(Enum, LargestValue)                     \
+  template <> struct is_bitmask_enum<Enum> : std::true_type {};                \
+  template <> struct largest_bitmask_enum_bit<Enum> {                          \
+    static constexpr std::underlying_type_t<Enum> value = LargestValue;        \
+  }
+
+/// Traits class to determine whether an enum has been declared as a bitwise
+/// enum via ORC_RT_DECLARE_ENUM_AS_BITMASK.
+template <typename E, typename Enable = void>
+struct is_bitmask_enum : std::false_type {};
+
+template <typename E>
+struct is_bitmask_enum<
+    E, std::enable_if_t<sizeof(E::ORC_RT_BITMASK_LARGEST_ENUMERATOR) >= 0>>
+    : std::true_type {};
+
+template <typename E>
+inline constexpr bool is_bitmask_enum_v = is_bitmask_enum<E>::value;
+
+/// Traits class to deermine bitmask enum largest bit.
+template <typename E, typename Enable = void> struct largest_bitmask_enum_bit;
+
+template <typename E>
+struct largest_bitmask_enum_bit<
+    E, std::enable_if_t<sizeof(E::ORC_RT_BITMASK_LARGEST_ENUMERATOR) >= 0>> {
+  using UnderlyingTy = std::underlying_type_t<E>;
+  static constexpr UnderlyingTy value =
+      static_cast<UnderlyingTy>(E::ORC_RT_BITMASK_LARGEST_ENUMERATOR);
+};
+
+template <typename E> constexpr std::underlying_type_t<E> Mask() {
+  return bit_ceil(largest_bitmask_enum_bit<E>::value) - 1;
+}
+
+template <typename E> constexpr std::underlying_type_t<E> Underlying(E Val) {
+  auto U = static_cast<std::underlying_type_t<E>>(Val);
+  assert(U >= 0 && "Negative enum values are not allowed");
+  assert(U <= Mask<E>() && "Enum value too large (or langest val too small");
+  return U;
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator~(E Val) {
+  return static_cast<E>(~Underlying(Val) & Mask<E>());
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator|(E LHS, E RHS) {
+  return static_cast<E>(Underlying(LHS) | Underlying(RHS));
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator&(E LHS, E RHS) {
+  return static_cast<E>(Underlying(LHS) & Underlying(RHS));
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+constexpr E operator^(E LHS, E RHS) {
+  return static_cast<E>(Underlying(LHS) ^ Underlying(RHS));
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+E &operator|=(E &LHS, E RHS) {
+  LHS = LHS | RHS;
+  return LHS;
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+E &operator&=(E &LHS, E RHS) {
+  LHS = LHS & RHS;
+  return LHS;
+}
+
+template <typename E, typename = std::enable_if_t<is_bitmask_enum_v<E>>>
+E &operator^=(E &LHS, E RHS) {
+  LHS = LHS ^ RHS;
+  return LHS;
+}
+
+} // end namespace __orc_rt
+
+#endif // ORC_RT_BITMASK_ENUM_H
diff --git a/compiler-rt/lib/orc/stl_extras.h b/compiler-rt/lib/orc/stl_extras.h
index 33c877b..1eef565 100644
--- a/compiler-rt/lib/orc/stl_extras.h
+++ b/compiler-rt/lib/orc/stl_extras.h
@@ -28,6 +28,17 @@ template <class Ty> struct identity {
   const Ty &operator()(const Ty &self) const { return self; }
 };
 
+/// Substitute for std::bit_ceil.
+constexpr uint64_t bit_ceil(uint64_t Val) noexcept {
+  Val |= (Val >> 1);
+  Val |= (Val >> 2);
+  Val |= (Val >> 4);
+  Val |= (Val >> 8);
+  Val |= (Val >> 16);
+  Val |= (Val >> 32);
+  return Val + 1;
+}
+
 } // namespace __orc_rt
 
 #endif // ORC_RT_STL_EXTRAS
diff --git a/compiler-rt/lib/orc/tests/unit/CMakeLists.txt b/compiler-rt/lib/orc/tests/unit/CMakeLists.txt
index 7792d21..81df7e80 100644
--- a/compiler-rt/lib/orc/tests/unit/CMakeLists.txt
+++ b/compiler-rt/lib/orc/tests/unit/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(UNITTEST_SOURCES
   adt_test.cpp
+  bitmask_enum_test.cpp
   c_api_test.cpp
   endian_test.cpp
   error_test.cpp
diff --git a/compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp b/compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp
new file mode 100644
index 0000000..4c27d54
--- /dev/null
+++ b/compiler-rt/lib/orc/tests/unit/bitmask_enum_test.cpp
@@ -0,0 +1,143 @@
+//===-- adt_test.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of the ORC runtime.
+//
+//===----------------------------------------------------------------------===//
+
+#include "bitmask_enum.h"
+#include "gtest/gtest.h"
+
+#include <sstream>
+#include <string>
+
+using namespace __orc_rt;
+
+namespace {
+
+enum Flags { F0 = 0, F1 = 1, F2 = 2, F3 = 4, F4 = 8 };
+
+} // namespace
+
+namespace __orc_rt {
+ORC_RT_DECLARE_ENUM_AS_BITMASK(Flags, F4);
+} // namespace __orc_rt
+
+static_assert(is_bitmask_enum<Flags>::value != 0);
+static_assert(largest_bitmask_enum_bit<Flags>::value == Flags::F4);
+
+namespace {
+
+static_assert(is_bitmask_enum<Flags>::value != 0);
+static_assert(largest_bitmask_enum_bit<Flags>::value == Flags::F4);
+
+TEST(BitmaskEnumTest, BitwiseOr) {
+  Flags f = F1 | F2;
+  EXPECT_EQ(3, f);
+
+  f = f | F3;
+  EXPECT_EQ(7, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseOrEquals) {
+  Flags f = F1;
+  f |= F3;
+  EXPECT_EQ(5, f);
+
+  // |= should return a reference to the LHS.
+  f = F2;
+  (f |= F3) = F1;
+  EXPECT_EQ(F1, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseAnd) {
+  Flags f = static_cast<Flags>(3) & F2;
+  EXPECT_EQ(F2, f);
+
+  f = (f | F3) & (F1 | F2 | F3);
+  EXPECT_EQ(6, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseAndEquals) {
+  Flags f = F1 | F2 | F3;
+  f &= F1 | F2;
+  EXPECT_EQ(3, f);
+
+  // &= should return a reference to the LHS.
+  (f &= F1) = F3;
+  EXPECT_EQ(F3, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseXor) {
+  Flags f = (F1 | F2) ^ (F2 | F3);
+  EXPECT_EQ(5, f);
+
+  f = f ^ F1;
+  EXPECT_EQ(4, f);
+}
+
+TEST(BitmaskEnumTest, BitwiseXorEquals) {
+  Flags f = (F1 | F2);
+  f ^= (F2 | F4);
+  EXPECT_EQ(9, f);
+
+  // ^= should return a reference to the LHS.
+  (f ^= F4) = F3;
+  EXPECT_EQ(F3, f);
+}
+
+TEST(BitmaskEnumTest, ConstantExpression) {
+  constexpr Flags f1 = ~F1;
+  constexpr Flags f2 = F1 | F2;
+  constexpr Flags f3 = F1 & F2;
+  constexpr Flags f4 = F1 ^ F2;
+  EXPECT_EQ(f1, ~F1);
+  EXPECT_EQ(f2, F1 | F2);
+  EXPECT_EQ(f3, F1 & F2);
+  EXPECT_EQ(f4, F1 ^ F2);
+}
+
+TEST(BitmaskEnumTest, BitwiseNot) {
+  Flags f = ~F1;
+  EXPECT_EQ(14, f); // Largest value for f is 15.
+  EXPECT_EQ(15, ~F0);
+}
+
+enum class FlagsClass {
+  F0 = 0,
+  F1 = 1,
+  F2 = 2,
+  F3 = 4,
+  ORC_RT_MARK_AS_BITMASK_ENUM(F3)
+};
+
+TEST(BitmaskEnumTest, ScopedEnum) {
+  FlagsClass f = (FlagsClass::F1 & ~FlagsClass::F0) | FlagsClass::F2;
+  f |= FlagsClass::F3;
+  EXPECT_EQ(7, static_cast<int>(f));
+}
+
+struct Container {
+  enum Flags {
+    F0 = 0,
+    F1 = 1,
+    F2 = 2,
+    F3 = 4,
+    ORC_RT_MARK_AS_BITMASK_ENUM(F3)
+  };
+
+  static Flags getFlags() {
+    Flags f = F0 | F1;
+    f |= F2;
+    return f;
+  }
+};
+
+TEST(BitmaskEnumTest, EnumInStruct) { EXPECT_EQ(3, Container::getFlags()); }
+
+} // namespace
-- 
cgit v1.1


From c11be311047c4ae4a79f0b95af0323c1d99a14dd Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Nov 2023 15:34:12 -0800
Subject: [C-API] Fix typo in comment.

---
 llvm/include/llvm-c/Orc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm-c/Orc.h b/llvm/include/llvm-c/Orc.h
index a40b17b..9a57c2e 100644
--- a/llvm/include/llvm-c/Orc.h
+++ b/llvm/include/llvm-c/Orc.h
@@ -508,7 +508,7 @@ void LLVMOrcSymbolStringPoolClearDeadEntries(LLVMOrcSymbolStringPoolRef SSP);
  * Intern a string in the ExecutionSession's SymbolStringPool and return a
  * reference to it. This increments the ref-count of the pool entry, and the
  * returned value should be released once the client is done with it by
- * calling LLVMOrReleaseSymbolStringPoolEntry.
+ * calling LLVMOrcReleaseSymbolStringPoolEntry.
  *
  * Since strings are uniqued within the SymbolStringPool
  * LLVMOrcSymbolStringPoolEntryRefs can be compared by value to test string
-- 
cgit v1.1


From 4fe29d0dc2f65e60ae7dde63e7f4595446f3baca Mon Sep 17 00:00:00 2001
From: Usama Hameed <u_hameed@apple.com>
Date: Thu, 16 Nov 2023 16:29:05 -0800
Subject: [ASan] AddressSanitizerPass constructor should honor the AsanCtorKind
 argument (#72330)

Currently, the ConstructorKind member variable in AddressSanitizerPass
gets overriden by the ClConstructorKind whether the option is passed
from the command line or not. This override should only happen if the
ClConstructorKind argument is passed from the command line. Otherwise,
the constructor should honor the argument passed to it. This patch makes
this fix.

rdar://118423755
---
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 80c044b..c82b926 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -803,7 +803,9 @@ public:
         // do globals-gc.
         UseCtorComdat(UseGlobalsGC && ClWithComdat && !this->CompileKernel),
         DestructorKind(DestructorKind),
-        ConstructorKind(ConstructorKind) {
+        ConstructorKind(ClConstructorKind.getNumOccurrences() > 0
+                            ? ClConstructorKind
+                            : ConstructorKind) {
     C = &(M.getContext());
     int LongSize = M.getDataLayout().getPointerSizeInBits();
     IntptrTy = Type::getIntNTy(*C, LongSize);
@@ -1151,7 +1153,7 @@ AddressSanitizerPass::AddressSanitizerPass(
     AsanCtorKind ConstructorKind)
     : Options(Options), UseGlobalGC(UseGlobalGC),
       UseOdrIndicator(UseOdrIndicator), DestructorKind(DestructorKind),
-      ConstructorKind(ClConstructorKind) {}
+      ConstructorKind(ConstructorKind) {}
 
 PreservedAnalyses AddressSanitizerPass::run(Module &M,
                                             ModuleAnalysisManager &MAM) {
-- 
cgit v1.1


From c5a67e16b6117d0c37d004dd5467b56be006ad8f Mon Sep 17 00:00:00 2001
From: Yinying Li <107574043+yinying-lisa-li@users.noreply.github.com>
Date: Thu, 16 Nov 2023 19:30:21 -0500
Subject: [mlir][sparse] Use variable instead of inlining sparse encoding
 (#72561)

Example:

#CSR = #sparse_tensor.encoding<{
  map = (d0, d1) -> (d0 : dense, d1 : compressed),
}>

// CHECK: #[[$CSR.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0
: dense, d1 : compressed) }>
// CHECK-LABEL: func private @sparse_csr(
// CHECK-SAME: tensor<?x?xf32, **#[[$CSR]]**>)
func.func private @sparse_csr(tensor<?x?xf32, #CSR>)
---
 .../SparseTensor/IR/SparseTensorDialect.cpp        |  15 +
 .../Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir   |  14 +-
 .../Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir   |  14 +-
 .../SparseTensor/GPU/gpu_sampled_matmul_lib.mlir   |  16 +-
 .../Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir    |  10 +-
 mlir/test/Dialect/SparseTensor/codegen.mlir        |   4 +-
 mlir/test/Dialect/SparseTensor/dense.mlir          |  20 +-
 mlir/test/Dialect/SparseTensor/fold.mlir           |   4 +-
 mlir/test/Dialect/SparseTensor/one_trip.mlir       |   8 +-
 mlir/test/Dialect/SparseTensor/pre_rewriting.mlir  |  26 +-
 mlir/test/Dialect/SparseTensor/rejected.mlir       |   2 +-
 .../SparseTensor/rewriting_for_codegen.mlir        |  14 +-
 mlir/test/Dialect/SparseTensor/roundtrip.mlir      |  16 +-
 .../Dialect/SparseTensor/roundtrip_encoding.mlir   |  60 ++--
 mlir/test/Dialect/SparseTensor/semi_ring.mlir      |  10 +-
 mlir/test/Dialect/SparseTensor/sorted_coo.mlir     |  40 +--
 mlir/test/Dialect/SparseTensor/sparse_1d.mlir      | 200 ++++++-------
 mlir/test/Dialect/SparseTensor/sparse_2d.mlir      | 314 ++++++++++-----------
 mlir/test/Dialect/SparseTensor/sparse_3d.mlir      | 190 ++++++-------
 mlir/test/Dialect/SparseTensor/sparse_affine.mlir  |  70 ++---
 mlir/test/Dialect/SparseTensor/sparse_concat.mlir  | 128 ++++-----
 .../SparseTensor/sparse_conv_2d_slice_based.mlir   |  44 +--
 .../Dialect/SparseTensor/sparse_extract_slice.mlir |   2 +-
 mlir/test/Dialect/SparseTensor/sparse_foreach.mlir |   6 +-
 mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir  |  96 +++----
 mlir/test/Dialect/SparseTensor/sparse_index.mlir   |  46 +--
 mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir |  88 +++---
 mlir/test/Dialect/SparseTensor/sparse_kernels.mlir | 106 +++----
 mlir/test/Dialect/SparseTensor/sparse_lower.mlir   |   8 +-
 .../Dialect/SparseTensor/sparse_lower_col.mlir     |   2 +-
 .../Dialect/SparseTensor/sparse_lower_inplace.mlir |   8 +-
 mlir/test/Dialect/SparseTensor/sparse_nd.mlir      |  12 +-
 mlir/test/Dialect/SparseTensor/sparse_out.mlir     | 196 ++++++-------
 mlir/test/Dialect/SparseTensor/sparse_pack.mlir    |   2 +-
 .../SparseTensor/sparse_parallel_reduce.mlir       |   2 +-
 mlir/test/Dialect/SparseTensor/sparse_perm.mlir    |  14 +-
 .../Dialect/SparseTensor/sparse_perm_lower.mlir    |  10 +-
 .../SparseTensor/sparse_reinterpret_map.mlir       |  22 +-
 mlir/test/Dialect/SparseTensor/sparse_reshape.mlir |  32 +--
 mlir/test/Dialect/SparseTensor/sparse_scalars.mlir |  12 +-
 mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir   |  40 +--
 .../Dialect/SparseTensor/sparse_sddmm_org.mlir     |  28 +-
 .../SparseTensor/sparse_tensor_reshape.mlir        |   2 +-
 .../Dialect/SparseTensor/sparse_transpose.mlir     |  32 +--
 .../Dialect/SparseTensor/sparse_vector_chain.mlir  |  16 +-
 .../Dialect/SparseTensor/sparse_vector_index.mlir  |  16 +-
 mlir/test/Dialect/SparseTensor/spy_sddmm.mlir      |  12 +-
 mlir/test/Dialect/SparseTensor/unused-tensor.mlir  |   2 +-
 .../Dialect/SparseTensor/vectorize_reduction.mlir  |  84 +++---
 49 files changed, 1075 insertions(+), 1040 deletions(-)

diff --git a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
index 53a0c24..28445ce 100644
--- a/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
+++ b/mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp
@@ -1797,7 +1797,22 @@ Operation *SparseTensorDialect::materializeConstant(OpBuilder &builder,
   return nullptr;
 }
 
+namespace {
+struct SparseTensorAsmDialectInterface : public OpAsmDialectInterface {
+  using OpAsmDialectInterface::OpAsmDialectInterface;
+
+  AliasResult getAlias(Attribute attr, raw_ostream &os) const override {
+    if (attr.isa<SparseTensorEncodingAttr>()) {
+      os << "sparse";
+      return AliasResult::OverridableAlias;
+    }
+    return AliasResult::NoAlias;
+  }
+};
+} // namespace
+
 void SparseTensorDialect::initialize() {
+  addInterface<SparseTensorAsmDialectInterface>();
   addAttributes<
 #define GET_ATTRDEF_LIST
 #include "mlir/Dialect/SparseTensor/IR/SparseTensorAttrDefs.cpp.inc"
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
index 34189d3..699508a 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matmul_lib.mlir
@@ -6,18 +6,18 @@
 // Compute matrix matrix C = AB
 //
 // CHECK-LABEL:   func.func @matmul(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<?x?xf64>) -> tensor<?x?xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_12:.*]] = gpu.wait async
 // CHECK:           %[[VAL_13:.*]] = memref.dim %[[VAL_9]], %[[VAL_3]] : memref<?xindex>
 // CHECK:           %[[VAL_14:.*]], %[[VAL_15:.*]] = gpu.alloc async {{\[}}%[[VAL_12]]] (%[[VAL_13]]) : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
index bd0bf69..14a0e9c 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_matvec_lib.mlir
@@ -7,17 +7,17 @@
 module {
 
 // CHECK-LABEL:   func.func @matvec(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<?xf64>) -> tensor<?xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_11:.*]] = gpu.wait async
 // CHECK:           %[[VAL_12:.*]] = memref.dim %[[VAL_8]], %[[VAL_3]] : memref<?xindex, strided<[?], offset: ?>>
 // CHECK:           %[[VAL_13:.*]], %[[VAL_14:.*]] = gpu.alloc async {{\[}}%[[VAL_11]]] (%[[VAL_12]]) : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir
index ce7af53..97f36d4 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_sampled_matmul_lib.mlir
@@ -22,12 +22,12 @@
 #CSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
 
 // CHECK-LABEL:   func.func @sparse_sampled_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK:           %[[VAL_7:.*]] = gpu.wait async
 // CHECK:           %[[VAL_8:.*]], %[[VAL_9:.*]] = gpu.alloc async {{\[}}%[[VAL_7]]] () : memref<8x8xf64>
@@ -36,9 +36,9 @@
 // CHECK:           %[[VAL_12:.*]] = gpu.wait async
 // CHECK:           %[[VAL_13:.*]], %[[VAL_14:.*]] = gpu.alloc async {{\[}}%[[VAL_12]]] () : memref<8x8xf64>
 // CHECK:           %[[VAL_15:.*]] = gpu.memcpy async {{\[}}%[[VAL_14]]] %[[VAL_13]], %[[VAL_11]] : memref<8x8xf64>, memref<8x8xf64>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_19:.*]] = gpu.wait async
 // CHECK:           %[[VAL_20:.*]] = memref.dim %[[VAL_16]], %[[VAL_4]] : memref<?xindex>
 // CHECK:           %[[VAL_21:.*]], %[[VAL_22:.*]] = gpu.alloc async {{\[}}%[[VAL_19]]] (%[[VAL_20]]) : memref<?xindex>
@@ -70,8 +70,8 @@
 // CHECK:           %[[VAL_57:.*]] = gpu.memcpy async {{\[}}%[[VAL_56]]] %[[VAL_18]], %[[VAL_31]] : memref<?xf64>, memref<?xf64>
 // CHECK:           %[[VAL_58:.*]] = gpu.dealloc async {{\[}}%[[VAL_57]]] %[[VAL_31]] : memref<?xf64>
 // CHECK:           gpu.wait {{\[}}%[[VAL_58]]]
-// CHECK:           %[[VAL_59:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_59]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_59:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_59]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 //
 // A kernel that computes a direct sampled matrix matrix multiplication
diff --git a/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir b/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir
index dd79a90..93f4900 100644
--- a/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir
+++ b/mlir/test/Dialect/SparseTensor/GPU/gpu_sddmm_lib.mlir
@@ -19,14 +19,14 @@
 }
 
 // CHECK-LABEL:   func.func @SDDMM_block(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf32>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<?x?xf32>) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 4 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.number_of_entries %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_9:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf32>
 // CHECK:           %[[VAL_10:.*]] = tensor.dim %[[VAL_2]], %[[VAL_4]] : tensor<?x?xf32>
@@ -79,8 +79,8 @@
 // CHECK:           %[[VAL_66:.*]] = gpu.memcpy async {{\[}}%[[VAL_65]]] %[[VAL_27]], %[[VAL_40]] : memref<?xf32>, memref<?xf32>
 // CHECK:           %[[VAL_67:.*]] = gpu.dealloc async {{\[}}%[[VAL_66]]] %[[VAL_40]] : memref<?xf32>
 // CHECK:           gpu.wait {{\[}}%[[VAL_67]]]
-// CHECK:           %[[VAL_68:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_68]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_68:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_68]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @SDDMM_block(%args: tensor<?x?xf32, #BSR>,
                        %arga: tensor<?x?xf32>,
diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir
index 5876eef..12d77ec 100644
--- a/mlir/test/Dialect/SparseTensor/codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen.mlir
@@ -664,7 +664,7 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) ->
 }
 
 // CHECK-LABEL: func.func @sparse_new_coo(
-// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>) {
 // CHECK-DAG:   %[[VAL_1:.*]] = arith.constant false
 // CHECK-DAG:   %[[VAL_2:.*]] = arith.constant 2 : i32
 // CHECK-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
@@ -712,7 +712,7 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #Coo> {
 }
 
 // CHECK-LABEL: func.func @sparse_new_coo_permute_no(
-// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK-SAME:  %[[A0:.*]]: !llvm.ptr) -> (memref<?xindex>, memref<?xindex>, memref<?xf32>, !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>) {
 // CHECK-DAG:   %[[VAL_1:.*]] = arith.constant 2 : i32
 // CHECK-DAG:   %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK-DAG:   %[[VAL_3:.*]] = arith.constant 0 : index
diff --git a/mlir/test/Dialect/SparseTensor/dense.mlir b/mlir/test/Dialect/SparseTensor/dense.mlir
index 52db814..2d8dcfe 100644
--- a/mlir/test/Dialect/SparseTensor/dense.mlir
+++ b/mlir/test/Dialect/SparseTensor/dense.mlir
@@ -32,14 +32,14 @@
 // a non-annotated dense matrix as output.
 //
 // CHECK-LABEL:   func @dense1(
-// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
+// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
@@ -73,14 +73,14 @@ func.func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
 //
 // CHECK-LABEL:   func @dense2(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16xf32>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
@@ -90,8 +90,8 @@ func.func @dense1(%arga: tensor<32x16xf32, #DenseMatrix>,
 // CHECK:               memref.store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:           return %[[VAL_15]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_15]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @dense2(%arga: tensor<32x16xf32>,
                   %argx: tensor<32x16xf32, #DenseMatrix>)
@@ -116,14 +116,14 @@ func.func @dense2(%arga: tensor<32x16xf32>,
 //
 // CHECK-LABEL:   func @dense3(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32x16x8xf32>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
 // CHECK:             scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_6]] {
 // CHECK:               %[[VAL_11:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
@@ -137,8 +137,8 @@ func.func @dense2(%arga: tensor<32x16xf32>,
 // CHECK:               memref.store %[[VAL_19:.*]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:           return %[[VAL_20]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_20]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @dense3(%arga: tensor<32x16x8xf32>,
                   %argx: tensor<32x16xf32, #DenseMatrix>)
diff --git a/mlir/test/Dialect/SparseTensor/fold.mlir b/mlir/test/Dialect/SparseTensor/fold.mlir
index 73851c08..0153d44 100644
--- a/mlir/test/Dialect/SparseTensor/fold.mlir
+++ b/mlir/test/Dialect/SparseTensor/fold.mlir
@@ -21,7 +21,7 @@ func.func @sparse_dce_convert(%arg0: tensor<64xf32>) {
 }
 
 // CHECK-LABEL: func @sparse_dce_getters(
-//  CHECK-SAME: %[[A:.*]]: tensor<64xf32, #sparse_tensor.encoding<{{{.*}}}>>)
+//  CHECK-SAME: %[[A:.*]]: tensor<64xf32, #sparse{{[0-9]*}}>)
 //   CHECK-NOT: sparse_tensor.positions
 //   CHECK-NOT: sparse_tensor.coordinates
 //   CHECK-NOT: sparse_tensor.values
@@ -68,7 +68,7 @@ func.func @sparse_get_specifier_dce_fold(%arg0: !sparse_tensor.storage_specifier
 #COO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)}>
 
 // CHECK-LABEL: func @sparse_reorder_coo(
-//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>
 //   CHECK-NOT: %[[R:.*]] = sparse_tensor.reorder_coo
 //       CHECK: return %[[A]]
 func.func @sparse_reorder_coo(%arg0 : tensor<?x?xf32, #COO>) -> tensor<?x?xf32, #COO> {
diff --git a/mlir/test/Dialect/SparseTensor/one_trip.mlir b/mlir/test/Dialect/SparseTensor/one_trip.mlir
index b8ab177..9d2a125 100644
--- a/mlir/test/Dialect/SparseTensor/one_trip.mlir
+++ b/mlir/test/Dialect/SparseTensor/one_trip.mlir
@@ -13,15 +13,15 @@
 }
 
 // CHECK-LABEL: func.func @sparse_scale(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<1x1xf32, #sparse{{[0-9]*}}>)
 // CHECK-DAG:     %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK:         %[[VAL_3:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:         %[[VAL_3:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1x1xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:         %[[VAL_4:.*]] = memref.load %[[VAL_3]]{{\[}}%[[VAL_1]]] : memref<?xf32>
 // CHECK:         %[[VAL_5:.*]] = arith.mulf %[[VAL_4]], %[[VAL_2]] : f32
 // CHECK:         memref.store %[[VAL_5]], %[[VAL_3]]{{\[}}%[[VAL_1]]] : memref<?xf32>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         return %[[VAL_6]] : tensor<1x1xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<1x1xf32, #sparse{{[0-9]*}}>
+// CHECK:         return %[[VAL_6]] : tensor<1x1xf32, #sparse{{[0-9]*}}>
 func.func @sparse_scale(%argx: tensor<1x1xf32, #Dense>) -> tensor<1x1xf32, #Dense> {
   %c = arith.constant 2.0 : f32
   %0 = linalg.generic #trait_scale
diff --git a/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir b/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir
index c2cab30..5fdbb46 100644
--- a/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir
+++ b/mlir/test/Dialect/SparseTensor/pre_rewriting.mlir
@@ -27,8 +27,8 @@
 }
 
 // CHECK-LABEL: func @sparse_nop_cast(
-//  CHECK-SAME: %[[A:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>)
-//       CHECK: return %[[A]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>)
+//       CHECK: return %[[A]] : tensor<?xf32, #sparse{{[0-9]*}}>
 func.func @sparse_nop_cast(%a : tensor<?xf32, #SparseVector>) -> tensor<?xf32, #SparseVector> {
   %0 = tensor.cast %a : tensor<?xf32, #SparseVector> to tensor<?xf32, #SparseVector>
   %1 = tensor.cast %0 : tensor<?xf32, #SparseVector> to tensor<?xf32, #SparseVector>
@@ -38,18 +38,18 @@ func.func @sparse_nop_cast(%a : tensor<?xf32, #SparseVector>) -> tensor<?xf32, #
 
 // CHECK-LABEL: func @sparse_repair_cast(
 //  CHECK-SAME: %[[A:.*]]: tensor<?xf32>)
-//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[A]] : tensor<?xf32> to tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>
-//       CHECK: return %[[C]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[A]] : tensor<?xf32> to tensor<?xf32, #sparse{{[0-9]*}}>
+//       CHECK: return %[[C]] : tensor<?xf32, #sparse{{[0-9]*}}>
 func.func @sparse_repair_cast(%a : tensor<?xf32>) -> tensor<?xf32, #SparseVector> {
   %0 = tensor.cast %a : tensor<?xf32> to tensor<?xf32, #SparseVector>
   return %0 : tensor<?xf32, #SparseVector>
 }
 
 // CHECK-LABEL: func @sparse_fuse_slice(
-//  CHECK-SAME: %[[A:.*]]: tensor<2x3xi64, #sparse_tensor.encoding<{{{.*}}}>>)
-//       CHECK: %[[E:.*]] = tensor.extract_slice %[[A]][1, 0] [1, 3] [1, 1] : tensor<2x3xi64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>>
-//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[E]] : tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>>
-//       CHECK: return %[[C]] : tensor<1x3xi64, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<2x3xi64, #sparse{{[0-9]*}}>)
+//       CHECK: %[[E:.*]] = tensor.extract_slice %[[A]][1, 0] [1, 3] [1, 1] : tensor<2x3xi64, #sparse{{[0-9]*}}> to tensor<1x3xi64, #sparse{{[0-9]*}}>
+//       CHECK: %[[C:.*]] = sparse_tensor.convert %[[E]] : tensor<1x3xi64, #sparse{{[0-9]*}}> to tensor<1x3xi64, #sparse{{[0-9]*}}>
+//       CHECK: return %[[C]] : tensor<1x3xi64, #sparse{{[0-9]*}}>
 func.func @sparse_fuse_slice(%a : tensor<2x3xi64, #SortedCOO>) -> tensor<1x3xi64, #SortedCOO> {
   %extracted_slice = tensor.extract_slice %a[1, 0] [1, 3] [1, 1] : tensor<2x3xi64, #SortedCOO> to tensor<1x3xi64>
   %cast = tensor.cast %extracted_slice : tensor<1x3xi64> to tensor<1x3xi64, #Slice>
@@ -59,10 +59,10 @@ func.func @sparse_fuse_slice(%a : tensor<2x3xi64, #SortedCOO>) -> tensor<1x3xi64
 
 // CHECK-LABEL:   func.func @sparse_select(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x4xi1>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>) -> tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4x4xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse{{[0-9]*}}>) -> tensor<4x4xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f64
-// CHECK-DAG:       %[[VAL_4:.*]] = tensor.empty() : tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>
+// CHECK-DAG:       %[[VAL_4:.*]] = tensor.empty() : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK-NEXT:      %[[VAL_5:.*]] = linalg.generic {indexing_maps = [#map, #map, #map, #map], iterator_types = ["parallel", "parallel"]}
 // CHECK-SAME:      ins(%[[VAL_0]], %[[VAL_1]], %[[VAL_2]]
 // CHECK-NEXT:      ^bb0(%[[VAL_6:.*]]: i1, %[[VAL_7:.*]]: f64, %[[VAL_8:.*]]: f64, %[[VAL_9:.*]]: f64):
@@ -83,8 +83,8 @@ func.func @sparse_fuse_slice(%a : tensor<2x3xi64, #SortedCOO>) -> tensor<1x3xi64
 // CHECK-NEXT:          sparse_tensor.yield %[[VAL_17]] : f64
 // CHECK-NEXT:        }
 // CHECK-NEXT:        linalg.yield %[[VAL_10]] : f64
-// CHECK-NEXT:      } -> tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>
-// CHECK-NEXT:      return %[[VAL_18:.*]] : tensor<4x4xf64, #sparse_tensor.encoding<{{.*}}>>
+// CHECK-NEXT:      } -> tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK-NEXT:      return %[[VAL_18:.*]] : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK-NEXT:    }
 func.func @sparse_select(%cond: tensor<4x4xi1>,
                          %arga: tensor<4x4xf64, #DCSR>,
diff --git a/mlir/test/Dialect/SparseTensor/rejected.mlir b/mlir/test/Dialect/SparseTensor/rejected.mlir
index c68c576..c3cbde9 100644
--- a/mlir/test/Dialect/SparseTensor/rejected.mlir
+++ b/mlir/test/Dialect/SparseTensor/rejected.mlir
@@ -15,7 +15,7 @@
 
 // CHECK-LABEL:   func.func @sparse_reduction_subi(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK:           %[[VAL_2:.*]] = linalg.generic
 // CHECK:           ^bb0(%[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: i32):
 // CHECK:             %[[VAL_5:.*]] = arith.subi %[[VAL_3]], %[[VAL_4]] : i32
diff --git a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
index 579b0f2..11fc502 100644
--- a/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/rewriting_for_codegen.mlir
@@ -14,8 +14,8 @@
 }>
 
 // CHECK-LABEL:   func.func @sparse_new(
-// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
-// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
+// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         %[[R:.*]] = sparse_tensor.convert %[[COO]]
 // CHECK:         bufferization.dealloc_tensor %[[COO]]
 // CHECK:         return %[[R]]
@@ -25,8 +25,8 @@ func.func @sparse_new(%arg0: !llvm.ptr) -> tensor<?x?xf32, #CSR> {
 }
 
 // CHECK-LABEL:   func.func @sparse_new_csc(
-// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
-// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
+// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         %[[R:.*]] = sparse_tensor.convert %[[COO]]
 // CHECK:         bufferization.dealloc_tensor %[[COO]]
 // CHECK:         return %[[R]]
@@ -36,8 +36,8 @@ func.func @sparse_new_csc(%arg0: !llvm.ptr) -> tensor<?x?xf32, #CSC> {
 }
 
 // CHECK-LABEL:   func.func @sparse_new_coo(
-// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
-// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:    %[[A:.*]]: !llvm.ptr) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
+// CHECK:         %[[COO:.*]] = sparse_tensor.new %[[A]] : !llvm.ptr to tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         return %[[COO]]
 func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #COO> {
   %0 = sparse_tensor.new %arg0 : !llvm.ptr to tensor<?x?xf32, #COO>
@@ -45,7 +45,7 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #COO> {
 }
 
 // CHECK-LABEL:   func.func @sparse_out(
-// CHECK-SAME:    %[[A:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[A:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[B:.*]]: !llvm.ptr) {
 // CHECK-DAG:     %[[C0:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[C1:.*]] = arith.constant 1 : index
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip.mlir b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
index 5b733ec..476fa1b 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip.mlir
@@ -307,7 +307,7 @@ func.func @sparse_load_ins(%arg0: tensor<16x32xf64, #DenseMatrix>) -> tensor<16x
 #SparseVector = #sparse_tensor.encoding<{map = (d0) -> (d0 : compressed)}>
 
 // CHECK-LABEL: func @sparse_insert(
-//  CHECK-SAME: %[[A:.*]]: tensor<128xf64, #sparse_tensor.encoding<{{.*}}>>,
+//  CHECK-SAME: %[[A:.*]]: tensor<128xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME: %[[B:.*]]: index,
 //  CHECK-SAME: %[[C:.*]]: f64)
 //       CHECK: %[[T:.*]] = sparse_tensor.insert %[[C]] into %[[A]][%[[B]]] : tensor<128xf64, #{{.*}}>
@@ -362,7 +362,7 @@ func.func @sparse_push_back_n(%arg0: index, %arg1: memref<?xf64>, %arg2: f64, %a
 #SparseMatrix = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: func @sparse_expansion(
-//  CHECK-SAME: %[[A:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>>)
+//  CHECK-SAME: %[[A:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>)
 //       CHECK: %{{.*}}, %{{.*}}, %{{.*}}, %[[T:.*]] = sparse_tensor.expand %[[A]]
 //       CHECK: return %[[T]] : index
 func.func @sparse_expansion(%tensor: tensor<8x8xf64, #SparseMatrix>) -> index {
@@ -380,10 +380,10 @@ func.func @sparse_expansion(%tensor: tensor<8x8xf64, #SparseMatrix>) -> index {
 //  CHECK-SAME: %[[A1:.*1]]: memref<?xi1>,
 //  CHECK-SAME: %[[A2:.*2]]: memref<?xindex>,
 //  CHECK-SAME: %[[A3:.*3]]: index
-//  CHECK-SAME: %[[A4:.*4]]: tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>>,
+//  CHECK-SAME: %[[A4:.*4]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME: %[[A5:.*5]]: index)
 //       CHECK: %[[T:.*]] = sparse_tensor.compress %[[A0]], %[[A1]], %[[A2]], %[[A3]] into %[[A4]][%[[A5]]
-//       CHECK: return %[[T]] : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>>
+//       CHECK: return %[[T]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 func.func @sparse_compression(%values: memref<?xf64>,
                               %filled: memref<?xi1>,
                               %added: memref<?xindex>,
@@ -400,9 +400,9 @@ func.func @sparse_compression(%values: memref<?xf64>,
 #SparseMatrix = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: func @sparse_out(
-//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{.*}}>>,
+//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME: %[[B:.*]]: !llvm.ptr)
-//       CHECK: sparse_tensor.out %[[A]], %[[B]] : tensor<?x?xf64, #sparse_tensor.encoding<{{.*}}>>, !llvm.ptr
+//       CHECK: sparse_tensor.out %[[A]], %[[B]] : tensor<?x?xf64, #sparse{{[0-9]*}}>, !llvm.ptr
 //       CHECK: return
 func.func @sparse_out(%arg0: tensor<?x?xf64, #SparseMatrix>, %arg1: !llvm.ptr) {
   sparse_tensor.out %arg0, %arg1 : tensor<?x?xf64, #SparseMatrix>, !llvm.ptr
@@ -590,7 +590,7 @@ func.func @sparse_tensor_foreach(%arg0: tensor<2x4xf64, #DCSR>) -> () {
 #DCSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: func @sparse_tensor_foreach(
-//  CHECK-SAME:   %[[A0:.*]]: tensor<2x4xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+//  CHECK-SAME:   %[[A0:.*]]: tensor<2x4xf64, #sparse{{[0-9]*}}>,
 //  CHECK-SAME:   %[[A1:.*]]: f32
 //  CHECK-NEXT:   %[[RET:.*]] = sparse_tensor.foreach in %[[A0]] init(%[[A1]])
 //  CHECK-NEXT:    ^bb0(%[[TMP_1:.*]]: index, %[[TMP_2:.*]]: index, %[[TMP_v:.*]]: f64, %[[TMP_r:.*]]: f32)
@@ -640,7 +640,7 @@ func.func @sparse_sort_coo_stable(%arg0: index, %arg1: memref<?xi64>, %arg2: mem
 #OrderedCOO = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)}>
 
 // CHECK-LABEL: func @sparse_reorder_coo(
-//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+//  CHECK-SAME: %[[A:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>
 //       CHECK: %[[R:.*]] = sparse_tensor.reorder_coo quick_sort %[[A]]
 //       CHECK: return %[[R]]
 func.func @sparse_reorder_coo(%arg0 : tensor<?x?xf32, #UnorderedCOO>) -> tensor<?x?xf32, #OrderedCOO> {
diff --git a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
index db84431..20702bb 100644
--- a/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
+++ b/mlir/test/Dialect/SparseTensor/roundtrip_encoding.mlir
@@ -1,8 +1,11 @@
-// RUN: mlir-opt %s -split-input-file | mlir-opt | FileCheck %s
+// RUN: mlir-opt %s -split-input-file | mlir-opt -split-input-file | FileCheck %s
 
+#SV  = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>
+
+// CHECK: #[[$SV:.*]] = #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>
 // CHECK-LABEL: func private @sparse_1d_tensor(
-// CHECK-SAME: tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>)
-func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map = (d0) -> (d0 : compressed) }>>)
+// CHECK-SAME: tensor<32xf64, #[[$SV]]>)
+func.func private @sparse_1d_tensor(tensor<32xf64, #SV>)
 
 // -----
 
@@ -12,8 +15,9 @@ func.func private @sparse_1d_tensor(tensor<32xf64, #sparse_tensor.encoding<{ map
   crdWidth = 64
 }>
 
+// CHECK: #[[$CSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>
 // CHECK-LABEL: func private @sparse_csr(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$CSR]]>)
 func.func private @sparse_csr(tensor<?x?xf32, #CSR>)
 
 // -----
@@ -22,8 +26,9 @@ func.func private @sparse_csr(tensor<?x?xf32, #CSR>)
   map = {l0, l1} (d0 = l0, d1 = l1) -> (l0 = d0 : dense, l1 = d1 : compressed)
 }>
 
+// CHECK: #[[$CSR_EXPLICIT:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @CSR_explicit(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$CSR_EXPLICIT]]>
 func.func private @CSR_explicit(%arg0: tensor<?x?xf64, #CSR_explicit>) {
   return
 }
@@ -36,8 +41,9 @@ func.func private @CSR_explicit(%arg0: tensor<?x?xf64, #CSR_explicit>) {
   crdWidth = 0
 }>
 
+// CHECK-DAG: #[[$CSC:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed) }>
 // CHECK-LABEL: func private @sparse_csc(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : dense, d0 : compressed) }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$CSC]]>)
 func.func private @sparse_csc(tensor<?x?xf32, #CSC>)
 
 // -----
@@ -48,8 +54,9 @@ func.func private @sparse_csc(tensor<?x?xf32, #CSC>)
   crdWidth = 64
 }>
 
+// CHECK-DAG: #[[$DCSC:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : compressed, d0 : compressed), crdWidth = 64 }>
 // CHECK-LABEL: func private @sparse_dcsc(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d1 : compressed, d0 : compressed), crdWidth = 64 }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$DCSC]]>)
 func.func private @sparse_dcsc(tensor<?x?xf32, #DCSC>)
 
 // -----
@@ -58,8 +65,9 @@ func.func private @sparse_dcsc(tensor<?x?xf32, #DCSC>)
   map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered))
 }>
 
+// CHECK-DAG: #[[$COO:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>
 // CHECK-LABEL: func private @sparse_coo(
-// CHECK-SAME: tensor<?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique, nonordered), d1 : singleton(nonordered)) }>>)
+// CHECK-SAME: tensor<?x?xf32, #[[$COO]]>)
 func.func private @sparse_coo(tensor<?x?xf32, #COO>)
 
 // -----
@@ -68,8 +76,9 @@ func.func private @sparse_coo(tensor<?x?xf32, #COO>)
   map = (d0, d1, d2) -> (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton)
 }>
 
+// CHECK-DAG: #[[$BCOO:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>
 // CHECK-LABEL: func private @sparse_bcoo(
-// CHECK-SAME: tensor<?x?x?xf32, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : loose_compressed(nonunique), d2 : singleton) }>>)
+// CHECK-SAME: tensor<?x?x?xf32, #[[$BCOO]]>)
 func.func private @sparse_bcoo(tensor<?x?x?xf32, #BCOO>)
 
 // -----
@@ -78,8 +87,9 @@ func.func private @sparse_bcoo(tensor<?x?x?xf32, #BCOO>)
   map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton)
 }>
 
+// CHECK-DAG: #[[$SortedCOO:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>
 // CHECK-LABEL: func private @sparse_sorted_coo(
-// CHECK-SAME: tensor<10x10xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>)
+// CHECK-SAME: tensor<10x10xf64, #[[$SortedCOO]]>)
 func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>)
 
 // -----
@@ -93,8 +103,9 @@ func.func private @sparse_sorted_coo(tensor<10x10xf64, #SortedCOO>)
       )
 }>
 
+// CHECK-DAG: #[[$BSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>
 // CHECK-LABEL: func private @sparse_bsr(
-// CHECK-SAME: tensor<10x60xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+// CHECK-SAME: tensor<10x60xf64, #[[$BSR]]>
 func.func private @sparse_bsr(tensor<10x60xf64, #BSR>)
 
 
@@ -104,8 +115,9 @@ func.func private @sparse_bsr(tensor<10x60xf64, #BSR>)
   map = [s0](d0, d1) -> (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed)
 }>
 
+// CHECK-DAG: #[[$ELL:.*]] = #sparse_tensor.encoding<{ map = [s0](d0, d1) -> (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @sparse_ell(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = [s0](d0, d1) -> (d0 * (s0 * 4) : dense, d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$ELL]]>
 func.func private @sparse_ell(tensor<?x?xf64, #ELL>)
 
 // -----
@@ -114,8 +126,9 @@ func.func private @sparse_ell(tensor<?x?xf64, #ELL>)
   map = (d0 : #sparse_tensor<slice(1, 4, 1)>, d1 : #sparse_tensor<slice(1, 4, 2)>) -> (d0 : dense, d1 : compressed)
 }>
 
+// CHECK-DAG: #[[$CSR_SLICE:.*]] = #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, 4, 1)>, d1 : #sparse_tensor<slice(1, 4, 2)>) -> (d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @sparse_slice(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, 4, 1)>, d1 : #sparse_tensor<slice(1, 4, 2)>) -> (d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$CSR_SLICE]]>
 func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
 
 // -----
@@ -124,8 +137,9 @@ func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
   map = (d0 : #sparse_tensor<slice(1, ?, 1)>, d1 : #sparse_tensor<slice(?, 4, 2)>) -> (d0 : dense, d1 : compressed)
 }>
 
+// CHECK-DAG: #[[$CSR_SLICE:.*]] = #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, ?, 1)>, d1 : #sparse_tensor<slice(?, 4, 2)>) -> (d0 : dense, d1 : compressed) }>
 // CHECK-LABEL: func private @sparse_slice(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0 : #sparse_tensor<slice(1, ?, 1)>, d1 : #sparse_tensor<slice(?, 4, 2)>) -> (d0 : dense, d1 : compressed) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$CSR_SLICE]]>
 func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
 
 // -----
@@ -139,8 +153,9 @@ func.func private @sparse_slice(tensor<?x?xf64, #CSR_SLICE>)
   )
 }>
 
+// CHECK-DAG: #[[$BSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>
 // CHECK-LABEL: func private @BSR(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$BSR]]>
 func.func private @BSR(%arg0: tensor<?x?xf64, #BSR>) {
   return
 }
@@ -158,8 +173,9 @@ func.func private @BSR(%arg0: tensor<?x?xf64, #BSR>) {
   )
 }>
 
+// CHECK-DAG: #[[$BCSR:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 floordiv 2 : dense, d1 floordiv 3 : dense, d2 floordiv 4 : compressed, d0 mod 2 : dense, d1 mod 3 : dense, d2 mod 4 : dense) }>
 // CHECK-LABEL: func private @BCSR(
-// CHECK-SAME: tensor<?x?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 floordiv 2 : dense, d1 floordiv 3 : dense, d2 floordiv 4 : compressed, d0 mod 2 : dense, d1 mod 3 : dense, d2 mod 4 : dense) }>>
+// CHECK-SAME: tensor<?x?x?xf64, #[[$BCSR]]>
 func.func private @BCSR(%arg0: tensor<?x?x?xf64, #BCSR>) {
   return
 }
@@ -178,8 +194,9 @@ func.func private @BCSR(%arg0: tensor<?x?x?xf64, #BCSR>) {
   )
 }>
 
+// CHECK-DAG: #[[$BSR_explicit:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>
 // CHECK-LABEL: func private @BSR_explicit(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 floordiv 2 : dense, d1 floordiv 3 : compressed, d0 mod 2 : dense, d1 mod 3 : dense) }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$BSR_explicit]]>
 func.func private @BSR_explicit(%arg0: tensor<?x?xf64, #BSR_explicit>) {
   return
 }
@@ -195,8 +212,9 @@ func.func private @BSR_explicit(%arg0: tensor<?x?xf64, #BSR_explicit>) {
   crdWidth = 8  // we would even like just 2-bits
 }>
 
+// CHECK-DAG: #[[$NV_24:.*]] = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4), crdWidth = 8 }>
 // CHECK-LABEL: func private @NV_24(
-// CHECK-SAME: tensor<?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 floordiv 4 : dense, d1 mod 4 : block2_4), crdWidth = 8 }>>
+// CHECK-SAME: tensor<?x?xf64, #[[$NV_24]]>
 func.func private @NV_24(%arg0: tensor<?x?xf64, #NV_24>) {
   return
 }
@@ -212,8 +230,9 @@ func.func private @NV_24(%arg0: tensor<?x?xf64, #NV_24>) {
   )
 }>
 
+// CHECK-DAG: #[[$NV_24:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 floordiv 4 : dense, d2 mod 4 : block2_4) }>
 // CHECK-LABEL: func private @NV_24(
-// CHECK-SAME: tensor<?x?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d1 : dense, d2 floordiv 4 : dense, d2 mod 4 : block2_4) }>>
+// CHECK-SAME: tensor<?x?x?xf64, #[[$NV_24]]>
 func.func private @NV_24(%arg0: tensor<?x?x?xf64, #NV_24>) {
   return
 }
@@ -229,8 +248,9 @@ func.func private @NV_24(%arg0: tensor<?x?x?xf64, #NV_24>) {
   )
 }>
 
+// CHECK-DAG: #[[$NV_24:.*]] = #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d2 floordiv 4 : dense, d1 : dense, d2 mod 4 : block2_4) }>
 // CHECK-LABEL: func private @NV_24(
-// CHECK-SAME: tensor<?x?x?xf64, #sparse_tensor.encoding<{ map = (d0, d1, d2) -> (d0 : dense, d2 floordiv 4 : dense, d1 : dense, d2 mod 4 : block2_4) }>>
+// CHECK-SAME: tensor<?x?x?xf64, #[[$NV_24]]>
 func.func private @NV_24(%arg0: tensor<?x?x?xf64, #NV_24>) {
   return
 }
diff --git a/mlir/test/Dialect/SparseTensor/semi_ring.mlir b/mlir/test/Dialect/SparseTensor/semi_ring.mlir
index 5a936bf..814acd6 100644
--- a/mlir/test/Dialect/SparseTensor/semi_ring.mlir
+++ b/mlir/test/Dialect/SparseTensor/semi_ring.mlir
@@ -19,13 +19,13 @@ module {
   // operation, and the values can be change "in-place".
   //
   // CHECK-LABEL: func.func @add_only_where_nonzero(
-  // CHECK-SAME:    %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+  // CHECK-SAME:    %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
   // CHECK-DAG:     %[[VAL_1:.*]] = arith.constant 8 : index
   // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
   // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
   // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 2.000000e+00 : f64
-  // CHECK-DAG:     %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-  // CHECK-DAG:     %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+  // CHECK-DAG:     %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+  // CHECK-DAG:     %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
   // CHECK:         scf.for %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] {
   // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_7]]] : memref<?xindex>
   // CHECK:           %[[VAL_9:.*]] = arith.addi %[[VAL_7]], %[[VAL_3]] : index
@@ -36,8 +36,8 @@ module {
   // CHECK:             memref.store %[[VAL_13]], %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xf64>
   // CHECK:           } {"Emitted from" = "linalg.generic"}
   // CHECK:         } {"Emitted from" = "linalg.generic"}
-  // CHECK:         %[[VAL_14:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-  // CHECK:         return %[[VAL_14]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+  // CHECK:         %[[VAL_14:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
+  // CHECK:         return %[[VAL_14]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
   // CHECK:       }
   func.func @add_only_where_nonzero(%argA: tensor<8x8xf64, #SM>) -> tensor<8x8xf64, #SM> {
     %c = arith.constant 2.0 : f64
diff --git a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
index 1a8af35..91e7920 100644
--- a/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
+++ b/mlir/test/Dialect/SparseTensor/sorted_coo.mlir
@@ -37,14 +37,14 @@
 //
 
 // CHECK-LABEL:   func.func @sparse_scale(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = scf.while (%[[VAL_11:.*]] = %[[VAL_8]]) : (index) -> index {
@@ -75,8 +75,8 @@
 // CHECK:             } {"Emitted from" = "linalg.generic"}
 // CHECK:             scf.yield %[[VAL_28:.*]] : index
 // CHECK:           } attributes {"Emitted from" = "linalg.generic"}
-// CHECK:           %[[VAL_29:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_29]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_29:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_29]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_scale(%argx: tensor<?x?xf32, #SortedCOO>) -> tensor<?x?xf32, #SortedCOO> {
   %c = arith.constant 2.0 : f32
@@ -90,16 +90,16 @@ func.func @sparse_scale(%argx: tensor<?x?xf32, #SortedCOO>) -> tensor<?x?xf32, #
 }
 
 // CHECK-LABEL:   func.func @matvec(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -155,20 +155,20 @@ func.func @matvec(%arga: tensor<32x64xf64, #SortedCOO>,
 }
 
 // CHECK-LABEL:   func.func @mateltmul(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x64xf64, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32x64xf64>) -> tensor<32x64xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0.000000e+00 : f64
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex, strided<[?], offset: ?>>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xindex, strided<[?], offset: ?>>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x64xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x64xf64>
 // CHECK:           linalg.fill ins(%[[VAL_4]] : f64) outs(%[[VAL_15]] : memref<32x64xf64>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_5]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
index d29758e..8f96f33 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_1d.mlir
@@ -14,13 +14,13 @@
 }
 
 // CHECK-LABEL:   func @add_d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@@ -43,14 +43,14 @@ func.func @add_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @add_d_init(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32) -> tensor<32xf32> {
 // CHECK:           %[[VAL_2:.*]] = arith.constant 32 : index
 // CHECK:           %[[VAL_3:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_INITTENSOR:.*]] = tensor.empty() : tensor<32xf32>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_INITTENSOR]] : memref<32xf32>
 // CHECK:           linalg.fill ins(%[[VAL_3]] : f32) outs(%[[VAL_7]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
@@ -74,13 +74,13 @@ func.func @add_d_init(%arga: tensor<32xf32, #DV>, %argb: f32) -> tensor<32xf32>
 }
 
 // CHECK-LABEL:   func @mul_d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_8]] : memref<32xf32>)
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@@ -103,16 +103,16 @@ func.func @mul_d(%arga: tensor<32xf32, #DV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @add_s(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]]
@@ -158,13 +158,13 @@ func.func @add_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @repeated_add_s(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]]
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -197,14 +197,14 @@ func.func @repeated_add_s(%arga: tensor<32xf32, #SV>, %argx: tensor<32xf32>) ->
 }
 
 // CHECK-LABEL:   func @mul_s(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: f32,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -240,13 +240,13 @@ func.func @mul_s(%arga: tensor<32xf32, #SV>, %argb: f32, %argx: tensor<32xf32>)
 }
 
 // CHECK-LABEL:   func @add_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
@@ -271,13 +271,13 @@ func.func @add_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tens
 }
 
 // CHECK-LABEL:   func @mul_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_9]] : memref<32xf32>)
@@ -303,16 +303,16 @@ func.func @mul_dd(%arga: tensor<32xf32, #DV>, %argb: tensor<32xf32>, %argx: tens
 
 // CHECK-LABEL:   func @add_ds(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -362,14 +362,14 @@ func.func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens
 
 // CHECK-LABEL:   func @mul_ds(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = bufferization.to_memref %[[VAL_0]] : memref<32xf32>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -396,16 +396,16 @@ func.func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32, #SV>, %argx: tens
 }
 
 // CHECK-LABEL:   func @add_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
@@ -455,14 +455,14 @@ func.func @add_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tens
 }
 
 // CHECK-LABEL:   func @mul_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32xf32>)
@@ -490,16 +490,16 @@ func.func @mul_sd(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32>, %argx: tens
 }
 
 // CHECK-LABEL:   func @add_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -572,16 +572,16 @@ func.func @add_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx:
 }
 
 // CHECK-LABEL:   func @mul_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32xf32>)
 // CHECK-DAG:       %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -632,17 +632,17 @@ func.func @mul_ss(%arga: tensor<32xf32, #SV>, %argb: tensor<32xf32, #SV>, %argx:
 }
 
 // CHECK-LABEL:   func @two_way_inv(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: f32,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<16xf32>) -> tensor<16xf32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -724,17 +724,17 @@ func.func @two_way_inv(%arga: tensor<16xf32, #SV>, %argb: tensor<16xf32, #SV>, %
 }
 
 // CHECK-LABEL:   func @two_way_inv_alt(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: f32,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<16xf32>) -> tensor<16xf32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<16xf32>)
 // CHECK-DAG:       %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -824,12 +824,12 @@ func.func @two_way_inv_alt(%arga: tensor<16xf32, #SV>,
 }
 
 // CHECK-LABEL:   func @sum_reduction(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-DAG:       %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK-DAG:       %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -865,16 +865,16 @@ func.func @sum_reduction(%arga: tensor<?xf32, #SV>, %argx: tensor<f32>) -> tenso
 }
 
 // CHECK-LABEL:   func @sum_reduction_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK-DAG:           %[[VAL_13:.*]] = memref.load %[[VAL_11]][] : memref<f32>
 // CHECK-DAG:           %[[VAL_14:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -970,17 +970,17 @@ func.func @sum_reduction_ss(%arga: tensor<16xf32, #SV>,
 }
 
 // CHECK-LABEL:   func @sum_reduction_inv(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<f32>, %[[VAL_2:.*2]]: tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<f32>, %[[VAL_2:.*2]]: tensor<16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f32>
 // CHECK-DAG:       %[[VAL_15:.*]] = memref.load %[[VAL_13]][] : memref<f32>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_9]][] : memref<f32>
@@ -1084,19 +1084,19 @@ func.func @sum_reduction_inv(%arga: tensor<16xf32, #SV>,
 
 // CHECK-LABEL:   func @four_tensors_op(
 // CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64>,
-// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_2:.*2]]: tensor<?xf64>, %[[VAL_3:.*3]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?xf64, #sparse{{[0-9]*}}>, %[[VAL_2:.*2]]: tensor<?xf64>, %[[VAL_3:.*3]]: tensor<?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_4:.*]]: tensor<?xf64>) -> tensor<?xf64> {
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?xf64>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_3]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_3]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_16:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?xf64>
 // CHECK-DAG:       %[[VAL_18:.*]] = bufferization.to_memref %[[VAL_4]]
 // CHECK-DAG:       linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_18]] : memref<?xf64>)
@@ -1259,19 +1259,19 @@ func.func @four_tensors_op(%arga: tensor<?xf64>,
 }
 
 // CHECK-LABEL:   func @red3s(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_2:.*2]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?xf64, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<?xf64, #sparse{{[0-9]*}}>, %[[VAL_2:.*2]]: tensor<?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<f64>) -> tensor<f64> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_3]] : memref<f64>
 // CHECK-DAG:       %[[VAL_17:.*]] = memref.load %[[VAL_15]][] : memref<f64>
 // CHECK-DAG:       %[[VAL_18:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
index 22681da..57ae183 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_2d.mlir
@@ -17,14 +17,14 @@
 }
 
 // CHECK-LABEL:   func @add_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
@@ -53,7 +53,7 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -61,7 +61,7 @@ func.func @add_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_10]] : memref<32x16xi1>)
@@ -90,14 +90,14 @@ func.func @cmp_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_dd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_10]] : memref<32x16xf32>)
@@ -126,7 +126,7 @@ func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -134,9 +134,9 @@ func.func @mul_dd(%arga: tensor<32x16xf32, #Tdd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
@@ -189,7 +189,7 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -199,9 +199,9 @@ func.func @add_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
@@ -256,15 +256,15 @@ func.func @cmp_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
@@ -295,7 +295,7 @@ func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -303,9 +303,9 @@ func.func @mul_ds(%arga: tensor<32x16xf32, #Tds>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
@@ -363,7 +363,7 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
@@ -373,9 +373,9 @@ func.func @add_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_14]] : memref<32x16xi1>)
@@ -435,15 +435,15 @@ func.func @cmp_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_sd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_11]] : memref<32x16xf32>)
@@ -475,7 +475,7 @@ func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_ss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -483,11 +483,11 @@ func.func @mul_sd(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
@@ -569,7 +569,7 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func.func @cmp_ss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
@@ -579,11 +579,11 @@ func.func @add_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK:           %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_5]] : i1) outs(%[[VAL_16]] : memref<32x16xi1>)
@@ -669,16 +669,16 @@ func.func @cmp_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @mul_ss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_12]] : memref<32x16xf32>)
@@ -712,20 +712,20 @@ func.func @mul_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32>, %arg
 }
 
 // CHECK-LABEL:   func @add_ss_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -875,22 +875,22 @@ func.func @add_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func.func @cmp_ss_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xi1>) -> tensor<32x16xi1> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:           %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xi1>
 // CHECK:           linalg.fill ins(%[[VAL_3]] : i1) outs(%[[VAL_17]] : memref<32x16xi1>)
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -1051,19 +1051,19 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
   map = (d0, d1) -> (d0 : dense, d1 : loose_compressed)
 }>
 // CHECK-LABEL:   func.func @sub_ss_batched(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x3xf64, #{{.*}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<2x3xf64, #{{.*}}>>) -> tensor<2x3xf64, #{{.*}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x3xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<2x3xf64, #sparse{{[0-9]*}}>) -> tensor<2x3xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<2x3xf64, #{{.*}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x3xf64, #{{.*}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<2x3xf64, #{{.*}}>> to memref<?xf64>
-// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_5]]) -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<2x3xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] iter_args(%[[VAL_14:.*]] = %[[VAL_5]]) -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_15:.*]] = arith.muli %[[VAL_13]], %[[VAL_2]] : index
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_15]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_15]], %[[VAL_4]] : index
@@ -1076,9 +1076,9 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:               %[[VAL_27:.*]] = arith.cmpi ult, %[[VAL_24]], %[[VAL_18]] : index
 // CHECK:               %[[VAL_28:.*]] = arith.cmpi ult, %[[VAL_25]], %[[VAL_22]] : index
 // CHECK:               %[[VAL_29:.*]] = arith.andi %[[VAL_27]], %[[VAL_28]] : i1
-// CHECK:               scf.condition(%[[VAL_29]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, tensor<2x3xf64, #{{.*}}>>
+// CHECK:               scf.condition(%[[VAL_29]]) %[[VAL_24]], %[[VAL_25]], %[[VAL_26]] : index, index, tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_30:.*]]: index, %[[VAL_31:.*]]: index, %[[VAL_32:.*]]: tensor<2x3xf64, #{{.*}}>>):
+// CHECK:             ^bb0(%[[VAL_30:.*]]: index, %[[VAL_31:.*]]: index, %[[VAL_32:.*]]: tensor<2x3xf64, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_33:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_30]]] : memref<?xindex>
 // CHECK:               %[[VAL_34:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_31]]] : memref<?xindex>
 // CHECK:               %[[VAL_35:.*]] = arith.cmpi ult, %[[VAL_34]], %[[VAL_33]] : index
@@ -1086,31 +1086,31 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:               %[[VAL_37:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_38:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_39:.*]] = arith.andi %[[VAL_37]], %[[VAL_38]] : i1
-// CHECK:               %[[VAL_40:.*]] = scf.if %[[VAL_39]] -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK:               %[[VAL_40:.*]] = scf.if %[[VAL_39]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_41:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_30]]] : memref<?xf64>
 // CHECK:                 %[[VAL_42:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:                 %[[VAL_43:.*]] = arith.subf %[[VAL_41]], %[[VAL_42]] : f64
-// CHECK:                 %[[VAL_44:.*]] = sparse_tensor.insert %[[VAL_43]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:                 scf.yield %[[VAL_44]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                 %[[VAL_44:.*]] = sparse_tensor.insert %[[VAL_43]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                 scf.yield %[[VAL_44]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               } else {
 // CHECK:                 %[[VAL_45:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_36]] : index
-// CHECK:                 %[[VAL_46:.*]] = scf.if %[[VAL_45]] -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK:                 %[[VAL_46:.*]] = scf.if %[[VAL_45]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                   %[[VAL_47:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_30]]] : memref<?xf64>
-// CHECK:                   %[[VAL_48:.*]] = sparse_tensor.insert %[[VAL_47]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:                   scf.yield %[[VAL_48]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                   %[[VAL_48:.*]] = sparse_tensor.insert %[[VAL_47]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                   scf.yield %[[VAL_48]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                 } else {
 // CHECK:                   %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_36]] : index
-// CHECK:                   %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (tensor<2x3xf64, #{{.*}}>>) {
+// CHECK:                   %[[VAL_50:.*]] = scf.if %[[VAL_49]] -> (tensor<2x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                     %[[VAL_51:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_31]]] : memref<?xf64>
 // CHECK:                     %[[VAL_52:.*]] = arith.negf %[[VAL_51]] : f64
-// CHECK:                     %[[VAL_53:.*]] = sparse_tensor.insert %[[VAL_52]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:                     scf.yield %[[VAL_53]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                     %[[VAL_53:.*]] = sparse_tensor.insert %[[VAL_52]] into %[[VAL_32]]{{\[}}%[[VAL_13]], %[[VAL_36]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:                     scf.yield %[[VAL_53]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                   } else {
-// CHECK:                     scf.yield %[[VAL_32]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                     scf.yield %[[VAL_32]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                   }
-// CHECK:                   scf.yield %[[VAL_54:.*]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                   scf.yield %[[VAL_54:.*]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:                 }
-// CHECK:                 scf.yield %[[VAL_55:.*]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:                 scf.yield %[[VAL_55:.*]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               %[[VAL_56:.*]] = arith.cmpi eq, %[[VAL_33]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_57:.*]] = arith.addi %[[VAL_30]], %[[VAL_4]] : index
@@ -1118,25 +1118,25 @@ func.func @cmp_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 // CHECK:               %[[VAL_59:.*]] = arith.cmpi eq, %[[VAL_34]], %[[VAL_36]] : index
 // CHECK:               %[[VAL_60:.*]] = arith.addi %[[VAL_31]], %[[VAL_4]] : index
 // CHECK:               %[[VAL_61:.*]] = arith.select %[[VAL_59]], %[[VAL_60]], %[[VAL_31]] : index
-// CHECK:               scf.yield %[[VAL_58]], %[[VAL_61]], %[[VAL_62:.*]] : index, index, tensor<2x3xf64, #{{.*}}>>
+// CHECK:               scf.yield %[[VAL_58]], %[[VAL_61]], %[[VAL_62:.*]] : index, index, tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             } attributes
 // CHECK:             %[[VAL_63:.*]] = scf.for %[[VAL_64:.*]] = %[[VAL_65:.*]]#0 to %[[VAL_18]] step %[[VAL_4]] iter_args(%[[VAL_66:.*]] = %[[VAL_65]]#2)
 // CHECK:               %[[VAL_67:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_64]]] : memref<?xindex>
 // CHECK:               %[[VAL_68:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_64]]] : memref<?xf64>
-// CHECK:               %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:               scf.yield %[[VAL_69]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:               %[[VAL_69:.*]] = sparse_tensor.insert %[[VAL_68]] into %[[VAL_66]]{{\[}}%[[VAL_13]], %[[VAL_67]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_69]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             %[[VAL_70:.*]] = scf.for %[[VAL_71:.*]] = %[[VAL_72:.*]]#1 to %[[VAL_22]] step %[[VAL_4]] iter_args(%[[VAL_73:.*]] = %[[VAL_74:.*]])
 // CHECK:               %[[VAL_75:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_71]]] : memref<?xindex>
 // CHECK:               %[[VAL_76:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_71]]] : memref<?xf64>
 // CHECK:               %[[VAL_77:.*]] = arith.negf %[[VAL_76]] : f64
-// CHECK:               %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #{{.*}}>>
-// CHECK:               scf.yield %[[VAL_78]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:               %[[VAL_78:.*]] = sparse_tensor.insert %[[VAL_77]] into %[[VAL_73]]{{\[}}%[[VAL_13]], %[[VAL_75]]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_78]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:             scf.yield %[[VAL_79:.*]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_80:.*]] = sparse_tensor.load %[[VAL_81:.*]] hasInserts : tensor<2x3xf64, #{{.*}}>>
-// CHECK:           return %[[VAL_80]] : tensor<2x3xf64, #{{.*}}>>
+// CHECK:           %[[VAL_80:.*]] = sparse_tensor.load %[[VAL_81:.*]] hasInserts : tensor<2x3xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_80]] : tensor<2x3xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sub_ss_batched(%0: tensor<2x3xf64, #BatchedVector>, %1: tensor<2x3xf64, #BatchedVector>)
                            -> tensor<2x3xf64, #BatchedVector> {
@@ -1152,20 +1152,20 @@ func.func @sub_ss_batched(%0: tensor<2x3xf64, #BatchedVector>, %1: tensor<2x3xf6
 }
 
 // CHECK-LABEL:   func @mul_ss_ss(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_16]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -1247,19 +1247,19 @@ func.func @mul_ss_ss(%arga: tensor<32x16xf32, #Tss>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func @add_sd_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_15]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -1352,17 +1352,17 @@ func.func @add_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func @mul_sd_ds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf32>) -> tensor<32x16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf32>
 // CHECK:           linalg.fill ins(%{{.*}} : f32) outs(%[[VAL_13]] : memref<32x16xf32>)
 // CHECK:           %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -1407,15 +1407,15 @@ func.func @mul_sd_ds(%arga: tensor<32x16xf32, #Tsd>, %argb: tensor<32x16xf32, #T
 }
 
 // CHECK-LABEL:   func @matvec(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<16x32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<16xf32>) -> tensor<16xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<16x32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<16xf32>
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
@@ -1458,13 +1458,13 @@ func.func @matvec(%argA: tensor<16x32xf32, #Tds>, %argb: tensor<32xf32>, %argx:
 }
 
 // CHECK-LABEL:   func @sum_reduction(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 10 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<f32>
 // CHECK:           %[[VAL_10:.*]] = scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_3]] iter_args(%[[VAL_12:.*]] = %[[VAL_9]]) -> (f32) {
@@ -1503,15 +1503,15 @@ func.func @sum_reduction(%arga: tensor<10x20xf32, #Tds>, %argx: tensor<f32>) ->
 }
 
 // CHECK-LABEL:   func @scale(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf64>) -> tensor<?x?xf64> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 2.000000e+00 : f64
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_3]] : tensor<?x?xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf64>
 // CHECK:           linalg.fill ins(%{{.*}} : f64) outs(%[[VAL_11]] : memref<?x?xf64>)
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_3]] to %[[VAL_8]] step %[[VAL_4]] {
@@ -1552,17 +1552,17 @@ func.func @scale(%arga: tensor<?x?xf64, #Tds>, %argx: tensor<?x?xf64>) -> tensor
 }
 
 // CHECK-LABEL:   func.func @sampled_dense_dense(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?x?xf32>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<?x?xf32>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = tensor.dim %[[VAL_1]], %[[VAL_4]] : tensor<?x?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
@@ -1621,24 +1621,24 @@ func.func @sampled_dense_dense(%args: tensor<?x?xf32, #Tss>,
 }
 
 // CHECK-LABEL:   func @sum_kernel_with_inv(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_2:.*2]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<?x?xf32, #sparse{{[0-9]*}}>, %[[VAL_2:.*2]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<?xf32>,
 // CHECK-SAME:      %[[VAL_4:.*4]]: tensor<f32>,
 // CHECK-SAME:      %[[VAL_5:.*5]]: tensor<?xf32>) -> tensor<?xf32> {
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_19:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?xf32>
 // CHECK-DAG:       %[[VAL_21:.*]] = bufferization.to_memref %[[VAL_4]] : memref<f32>
 // CHECK-DAG:       %[[VAL_22:.*]] = sparse_tensor.lvl %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
index f2daa77..4911c78b 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_3d.mlir
@@ -23,7 +23,7 @@
 }
 
 // CHECK-LABEL:   func @add_ddd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -32,7 +32,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
@@ -65,7 +65,7 @@ func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_ddd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -74,7 +74,7 @@ func.func @add_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_11]] : memref<32x16x8xf32>)
@@ -107,7 +107,7 @@ func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_dds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -117,9 +117,9 @@ func.func @mul_ddd(%arga: tensor<32x16x8xf32, #Tddd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
@@ -176,7 +176,7 @@ func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_dds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -184,9 +184,9 @@ func.func @add_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
@@ -221,7 +221,7 @@ func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_dsd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -231,9 +231,9 @@ func.func @mul_dds(%arga: tensor<32x16x8xf32, #Tdds>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -294,7 +294,7 @@ func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_dsd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -302,9 +302,9 @@ func.func @add_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
@@ -339,7 +339,7 @@ func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_dss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -349,11 +349,11 @@ func.func @mul_dsd(%arga: tensor<32x16x8xf32, #Tdsd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
@@ -438,18 +438,18 @@ func.func @add_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_dss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 32 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -486,7 +486,7 @@ func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_sdd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -496,9 +496,9 @@ func.func @mul_dss(%arga: tensor<32x16x8xf32, #Tdss>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -564,7 +564,7 @@ func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_sdd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -572,9 +572,9 @@ func.func @add_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_12]] : memref<32x16x8xf32>)
@@ -610,7 +610,7 @@ func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_sds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -620,11 +620,11 @@ func.func @mul_sdd(%arga: tensor<32x16x8xf32, #Tsdd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_17]] : memref<32x16x8xf32>)
@@ -714,18 +714,18 @@ func.func @add_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_sds(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 16 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_14]] : memref<32x16x8xf32>)
@@ -763,7 +763,7 @@ func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_ssd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -773,11 +773,11 @@ func.func @mul_sds(%arga: tensor<32x16x8xf32, #Tsds>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_16]] : memref<32x16x8xf32>)
@@ -871,18 +871,18 @@ func.func @add_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_ssd(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_13]] : memref<32x16x8xf32>)
@@ -920,7 +920,7 @@ func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @add_sss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
@@ -930,13 +930,13 @@ func.func @mul_ssd(%arga: tensor<32x16x8xf32, #Tssd>, %argb: tensor<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_17:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_19:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_19]] : memref<32x16x8xf32>)
@@ -1054,19 +1054,19 @@ func.func @add_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>
 }
 
 // CHECK-LABEL:   func @mul_sss(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16x8xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x16x8xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16x8xf32>) -> tensor<32x16x8xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16x8xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32x16x8xf32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16x8xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_15]] : memref<32x16x8xf32>)
@@ -1118,18 +1118,18 @@ func.func @mul_sss(%arga: tensor<32x16x8xf32, #Tsss>, %argb: tensor<32x16x8xf32>
 
 // CHECK-LABEL:   func @kernel_3d(
 // CHECK-SAME:      %[[VAL_0:.*0]]: tensor<?x?xf32>,
-// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*1]]: tensor<?x?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<?x?xf32>,
 // CHECK-SAME:      %[[VAL_3:.*3]]: tensor<?x?xf32>) -> tensor<?x?xf32> {
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_6]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_6]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<?x?xf32>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_3]] : memref<?x?xf32>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_5]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.lvl %[[VAL_1]], %[[VAL_5]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_14:.*]] = tensor.dim %[[VAL_2]], %[[VAL_6]] : tensor<?x?xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?xf32>
 // CHECK:           scf.for %[[VAL_17:.*]] = %[[VAL_5]] to %[[VAL_13]] step %[[VAL_6]] {
@@ -1183,14 +1183,14 @@ func.func @kernel_3d(%arga: tensor<?x?xf32>,
 }
 
 // CHECK-LABEL:   func @sum_reduction(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20x30xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_10]][] : memref<f32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -1239,7 +1239,7 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>)
 
 // CHECK-LABEL:   func @sum_reduction_inv(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
@@ -1248,7 +1248,7 @@ func.func @sum_reduction(%arga: tensor<10x20x30xf32, #Tsss>, %argx: tensor<f32>)
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.dim %[[VAL_0]], %[[VAL_4]] : tensor<?x?x?xf32>
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<?x?x?xf32>
 // CHECK-DAG:       %[[VAL_9:.*]] = tensor.dim %[[VAL_0]], %[[VAL_5]] : tensor<?x?x?xf32>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_12]][] : memref<f32>
 // CHECK:           %[[VAL_14:.*]] = scf.for %[[VAL_15:.*]] = %[[VAL_5]] to %[[VAL_9]] step %[[VAL_3]] iter_args(%[[VAL_16:.*]] = %[[VAL_13]]) -> (f32) {
@@ -1294,7 +1294,7 @@ func.func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
 }
 
 // CHECK-LABEL:   func @invariants(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<20xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<30xf32>,
 // CHECK-SAME:      %[[VAL_3:.*]]: tensor<10x20x30xf32>) -> tensor<10x20x30xf32> {
@@ -1304,7 +1304,7 @@ func.func @sum_reduction_inv(%arga: tensor<?x?x?xf32>,
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 30 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<30xf32>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_3]] : memref<10x20x30xf32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
index ca55d2e..aa75e20 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_affine.mlir
@@ -16,15 +16,15 @@
 }
 
 // CHECK-LABEL:   func @mul_inv_dense1d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<4xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xf32>) -> tensor<32xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 3 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<4xf32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<4xf32>
@@ -56,16 +56,16 @@ func.func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
 }
 
 // CHECK-LABEL:   func.func @mul_inv_enc_dense1d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<4xf32, #sparse{{[0-9]*}}>) -> tensor<32xf32, #sparse{{[0-9]*}}> {
 // CHECK:           %[[VAL_2:.*]] = arith.constant 32 : index
 // CHECK:           %[[VAL_3:.*]] = arith.constant 3 : index
 // CHECK:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_6:.*]] = tensor.empty() : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_6:.*]] = tensor.empty() : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<4xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_6]] : tensor<32xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xf32>
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_2]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
@@ -74,8 +74,8 @@ func.func @mul_inv_dense1d(%arga: tensor<32xf32, #SpVec>,
 // CHECK:             %[[VAL_15:.*]] = arith.addf %[[VAL_12]], %[[VAL_14]] : f32
 // CHECK:             memref.store %[[VAL_15]], %[[VAL_9]]{{\[}}%[[VAL_11]]] : memref<?xf32>
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_6]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_16]] : tensor<32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_6]] : tensor<32xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_16]] : tensor<32xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>,
                             %argb: tensor<4xf32, #EncDenseVec>) -> tensor<32xf32, #EncDenseVec> {
@@ -102,16 +102,16 @@ func.func @mul_inv_enc_dense1d(%arga: tensor<32xf32, #EncDenseVec>,
 }
 
 // CHECK-LABEL:   func @and_affine_dense1d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xi32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<34xi32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32xi32>) -> tensor<32xi32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0 : i32
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34xi32>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : i32) outs(%[[VAL_11]] : memref<32xi32>)
@@ -152,7 +152,7 @@ func.func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
 }
 
 // CHECK-LABEL:   func @mul_affine_dense2d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<34x19xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
@@ -160,9 +160,9 @@ func.func @and_affine_dense1d(%arga: tensor<32xi32, #SpVec>,
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 3 : index
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<34x19xf64>
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_5]] to %[[VAL_4]] step %[[VAL_3]] {
@@ -209,20 +209,20 @@ func.func @mul_affine_dense2d(%arga: tensor<32x16xf64, #CSR>,
 }
 
 // CHECK-LABEL:   func.func @mul_affine_dense_dim_2d(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<34x16xf64, #sparse_tensor.encoding
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<34x16xf64, #sparse{{[0-9]*}}>
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 19 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 3 : index
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -274,19 +274,19 @@ func.func @mul_affine_dense_dim_2d(%arga: tensor<34x16xf64, #CSR>,
 
 // CHECK-LABEL:   func.func @mul_const_affine_dense_dim_2d(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<34x16xf64,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<32x19xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<32x16xf64>) -> tensor<32x16xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 19 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 3 : index
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<34x16xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<32x19xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32x16xf64>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_6]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
index 86dc9a1..a323fa8 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_concat.mlir
@@ -7,19 +7,19 @@
 #DCSR = #sparse_tensor.encoding<{map = (d0, d1) -> (d0 : compressed, d1 : compressed)}>
 
 // CHECK-LABEL: @concat_sparse_sparse(
-//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor
+//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse>
 //       CHECK:  %[[TMP_c0:.*]] = arith.constant 0 : index
 //       CHECK:  %[[TMP_c1:.*]] = arith.constant 1 : index
 //       CHECK:  %[[TMP_c5:.*]] = arith.constant 5 : index
 //       CHECK:  %[[TMP_c2:.*]] = arith.constant 2 : index
-//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor() : tensor<9x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor() : tensor<9x4xf64, #sparse>
+//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse>
 //       CHECK:  %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]])
@@ -30,16 +30,16 @@
 //       CHECK:    %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]])
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref<?xf64>
-//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<9x4xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_1]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_4]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse>
 //       CHECK:  %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]])
@@ -51,16 +51,16 @@
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
-//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_2]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_5]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse>
 //       CHECK:  %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]])
@@ -72,13 +72,13 @@
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index
-//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<9x4xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_3]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_6]]
 //       CHECK:  }
 //       CHECK:  %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts
-//       CHECK:  return %[[TMP_23]] : tensor<9x4xf64, #sparse_tensor
+//       CHECK:  return %[[TMP_23]] : tensor<9x4xf64, #sparse>
 func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
                                 %arg1: tensor<3x4xf64, #DCSR>,
                                 %arg2: tensor<4x4xf64, #DCSR>)
@@ -91,21 +91,21 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 }
 
 // CHECK-LABEL: @concat_sparse_sparse_dynamic(
-//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse_tensor
-//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse_tensor
+//  CHECK-SAME:  %[[TMP_arg0:.*]]: tensor<2x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg1:.*]]: tensor<3x4xf64, #sparse>
+//  CHECK-SAME:  %[[TMP_arg2:.*]]: tensor<4x4xf64, #sparse>
 //   CHECK-DAG:  %[[TMP_c0:.*]] = arith.constant 0 : index
 //   CHECK-DAG:  %[[TMP_c1:.*]] = arith.constant 1 : index
 //   CHECK-DAG:  %[[TMP_c5:.*]] = arith.constant 5 : index
 //   CHECK-DAG:  %[[TMP_c2:.*]] = arith.constant 2 : index
 //   CHECK-DAG:  %[[TMP_c9:.*]] = arith.constant 9 : index
 //   CHECK-DAG:  %[[TMP_c4:.*]] = arith.constant 4 : index
-//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor(%[[TMP_c9]], %[[TMP_c4]]) : tensor<?x?xf64, #sparse_tensor
-//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_0:.*]] = bufferization.alloc_tensor(%[[TMP_c9]], %[[TMP_c4]]) : tensor<?x?xf64, #sparse>
+//       CHECK:  %[[TMP_1:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_2:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_3:.*]] = sparse_tensor.positions %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_4:.*]] = sparse_tensor.coordinates %[[TMP_arg0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+//       CHECK:  %[[TMP_5:.*]] = sparse_tensor.values %[[TMP_arg0]] : tensor<2x4xf64, #sparse>
 //       CHECK:  %[[TMP_6:.*]] = memref.load %[[TMP_1]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_7:.*]] = memref.load %[[TMP_1]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_1:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_6]] to %[[TMP_7]] step %[[TMP_c1]] iter_args(%[[A0:.*]] = %[[TMP_0]])
@@ -116,16 +116,16 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 //       CHECK:    %[[RET_4:.*]] = scf.for %[[TMP_arg4:.*]] = %[[TMP_25]] to %[[TMP_26]] step %[[TMP_c1]] iter_args(%[[A1:.*]] = %[[A0]])
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_4]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_5]][%[[TMP_arg4]]] : memref<?xf64>
-//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:      %[[NEW_1:.*]] = tensor.insert %[[TMP_28]] into %[[A1]][%[[TMP_23]], %[[TMP_27]]] : tensor<?x?xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_1]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_4]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_8:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_9:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_10:.*]] = sparse_tensor.positions %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_11:.*]] = sparse_tensor.coordinates %[[TMP_arg1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+//       CHECK:  %[[TMP_12:.*]] = sparse_tensor.values %[[TMP_arg1]] : tensor<3x4xf64, #sparse>
 //       CHECK:  %[[TMP_13:.*]] = memref.load %[[TMP_8]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_14:.*]] = memref.load %[[TMP_8]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_2:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_13]] to %[[TMP_14]] step %[[TMP_c1]] iter_args(%[[A2:.*]] = %[[RET_1]])
@@ -137,16 +137,16 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_11]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_12]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c2]] : index
-//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:      %[[NEW_2:.*]] = tensor.insert %[[TMP_28]] into %[[A3]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_2]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_5]]
 //       CHECK:  }
-//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse_tensor
+//       CHECK:  %[[TMP_15:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_16:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_17:.*]] = sparse_tensor.positions %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_18:.*]] = sparse_tensor.coordinates %[[TMP_arg2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+//       CHECK:  %[[TMP_19:.*]] = sparse_tensor.values %[[TMP_arg2]] : tensor<4x4xf64, #sparse>
 //       CHECK:  %[[TMP_20:.*]] = memref.load %[[TMP_15]][%[[TMP_c0]]] : memref<?xindex>
 //       CHECK:  %[[TMP_21:.*]] = memref.load %[[TMP_15]][%[[TMP_c1]]] : memref<?xindex>
 //       CHECK:  %[[RET_3:.*]] = scf.for %[[TMP_arg3:.*]] = %[[TMP_20]] to %[[TMP_21]] step %[[TMP_c1]] iter_args(%[[A4:.*]] = %[[RET_2]])
@@ -158,13 +158,13 @@ func.func @concat_sparse_sparse(%arg0: tensor<2x4xf64, #DCSR>,
 //       CHECK:      %[[TMP_27:.*]] = memref.load %[[TMP_18]][%[[TMP_arg4]]] : memref<?xindex>
 //       CHECK:      %[[TMP_28:.*]] = memref.load %[[TMP_19]][%[[TMP_arg4]]] : memref<?xf64>
 //       CHECK:      %[[TMP_29:.*]] = arith.addi %[[TMP_23]], %[[TMP_c5]] : index
-//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:      %[[NEW_3:.*]] = tensor.insert %[[TMP_28]] into %[[A5]][%[[TMP_29]], %[[TMP_27]]] : tensor<?x?xf64, #sparse>
 //       CHECK:      scf.yield %[[NEW_3]]
 //       CHECK:    }
 //       CHECK:    scf.yield %[[RET_6]]
 //       CHECK:  }
 //       CHECK:  %[[TMP_23:.*]] = sparse_tensor.load %[[RET_3]] hasInserts
-//       CHECK:  return %[[TMP_23]] : tensor<?x?xf64, #sparse_tensor
+//       CHECK:  return %[[TMP_23]] : tensor<?x?xf64, #sparse>
 func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
                                 %arg1: tensor<3x4xf64, #DCSR>,
                                 %arg2: tensor<4x4xf64, #DCSR>)
@@ -177,9 +177,9 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 }
 
 // CHECK-LABEL:   func.func @concat_sparse_sparse_dense(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64, #sparse_tensor
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x4xf64, #sparse_tensor
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse_tensor
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64, #sparse>
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x4xf64, #sparse>
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<4x4xf64, #sparse>
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 4 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 9 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 5 : index
@@ -189,11 +189,11 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 2 : index
 // CHECK:           %[[VAL_10:.*]] = bufferization.alloc_tensor(%[[VAL_4]], %[[VAL_3]]) : tensor<?x?xf64>
 // CHECK:           %[[VAL_11:.*]] = linalg.fill ins(%[[VAL_6]] : f64) outs(%[[VAL_10]] : tensor<?x?xf64>) -> tensor<?x?xf64>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<2x4xf64, #sparse>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<2x4xf64, #sparse>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_8]] iter_args(%[[VAL_21:.*]] = %[[VAL_11]]) -> (tensor<?x?xf64>) {
@@ -209,11 +209,11 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 // CHECK:             }
 // CHECK:             scf.yield %[[VAL_26]] : tensor<?x?xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_32:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_33:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_34:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_35:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x4xf64, #sparse>
+// CHECK:           %[[VAL_36:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x4xf64, #sparse>
 // CHECK:           %[[VAL_37:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_38:.*]] = memref.load %[[VAL_32]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_39:.*]] = scf.for %[[VAL_40:.*]] = %[[VAL_37]] to %[[VAL_38]] step %[[VAL_8]] iter_args(%[[VAL_41:.*]] = %[[VAL_19]]) -> (tensor<?x?xf64>) {
@@ -230,11 +230,11 @@ func.func @concat_sparse_sparse_dynamic(%arg0: tensor<2x4xf64, #DCSR>,
 // CHECK:             }
 // CHECK:             scf.yield %[[VAL_46]] : tensor<?x?xf64>
 // CHECK:           }
-// CHECK:           %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse_tensor
-// CHECK:           %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse_tensor
+// CHECK:           %[[VAL_53:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_54:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 0 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_55:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_56:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<4x4xf64, #sparse>
+// CHECK:           %[[VAL_57:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<4x4xf64, #sparse>
 // CHECK:           %[[VAL_58:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_7]]] : memref<?xindex>
 // CHECK:           %[[VAL_59:.*]] = memref.load %[[VAL_53]]{{\[}}%[[VAL_8]]] : memref<?xindex>
 // CHECK:           %[[VAL_60:.*]] = scf.for %[[VAL_61:.*]] = %[[VAL_58]] to %[[VAL_59]] step %[[VAL_8]] iter_args(%[[VAL_62:.*]] = %[[VAL_39]]) -> (tensor<?x?xf64>) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
index de0582e..0cd57ac 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_conv_2d_slice_based.mlir
@@ -7,8 +7,8 @@
 #DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
 
 // CHECK-LABEL:   func.func @conv2d_all_sparse_CSR(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32>) -> tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xi32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32>) -> tensor<6x6xi32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant -2 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 8 : index
@@ -19,12 +19,12 @@
 // CHECK-DAG:       %[[VAL_9:.*]] = arith.constant 4 : index
 // CHECK-DAG:       %[[VAL_10:.*]] = arith.constant 0 : i32
 // CHECK-DAG:       %[[VAL_11:.*]] = arith.constant false
-// CHECK-DAG:       %[[VAL_12:.*]] = tensor.empty() : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xi32, #sparse_tensor.encoding<{{.*}}>> to memref<?xi32>
+// CHECK-DAG:       %[[VAL_12:.*]] = tensor.empty() : tensor<6x6xi32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-DAG:       %[[VAL_18:.*]] = memref.alloca() : memref<11xindex>
 // CHECK-DAG:       %[[VAL_19:.*]] = memref.alloca() : memref<5xindex>
 // CHECK-DAG:       %[[VAL_20:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_7]]] : memref<?xindex>
@@ -38,10 +38,10 @@
 // CHECK:           %[[VAL_24:.*]] = arith.andi %[[VAL_21]], %[[VAL_23]] : i1
 // CHECK:           %[[VAL_25:.*]] = arith.addi %[[VAL_22]], %[[VAL_3]] : index
 // CHECK:           %[[VAL_26:.*]] = arith.select %[[VAL_24]], %[[VAL_25]], %[[VAL_6]] : index
-// CHECK:           %[[VAL_27:.*]]:3 = scf.while (%[[VAL_28:.*]] = %[[VAL_21]], %[[VAL_29:.*]] = %[[VAL_22]], %[[VAL_30:.*]] = %[[VAL_26]], %[[VAL_31:.*]] = %[[VAL_12]]) : (i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) -> (index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) {
-// CHECK:             scf.condition(%[[VAL_28]]) %[[VAL_29]], %[[VAL_30]], %[[VAL_31]] : index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_27:.*]]:3 = scf.while (%[[VAL_28:.*]] = %[[VAL_21]], %[[VAL_29:.*]] = %[[VAL_22]], %[[VAL_30:.*]] = %[[VAL_26]], %[[VAL_31:.*]] = %[[VAL_12]]) : (i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) {
+// CHECK:             scf.condition(%[[VAL_28]]) %[[VAL_29]], %[[VAL_30]], %[[VAL_31]] : index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:           } do {
-// CHECK:           ^bb0(%[[VAL_32:.*]]: index, %[[VAL_33:.*]]: index, %[[VAL_34:.*]]: tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>):
+// CHECK:           ^bb0(%[[VAL_32:.*]]: index, %[[VAL_33:.*]]: index, %[[VAL_34:.*]]: tensor<6x6xi32, #sparse{{[0-9]*}}>):
 // CHECK:             %[[VAL_35:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_6]]] : memref<5xindex>
 // CHECK:             %[[VAL_36:.*]]:4 = scf.for %[[VAL_37:.*]] = %[[VAL_8]] to %[[VAL_35]] step %[[VAL_5]] iter_args(%[[VAL_38:.*]] = %[[VAL_11]], %[[VAL_39:.*]] = %[[VAL_4]], %[[VAL_40:.*]] = %[[VAL_8]], %[[VAL_41:.*]] = %[[VAL_6]]) -> (i1, index, index, index) {
 // CHECK:               %[[VAL_42:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_37]]] : memref<5xindex>
@@ -90,10 +90,10 @@
 // CHECK:             %[[VAL_79:.*]] = arith.andi %[[VAL_77]]#0, %[[VAL_78]] : i1
 // CHECK:             %[[VAL_80:.*]] = arith.addi %[[VAL_77]]#1, %[[VAL_3]] : index
 // CHECK:             %[[VAL_81:.*]] = arith.select %[[VAL_79]], %[[VAL_80]], %[[VAL_6]] : index
-// CHECK:             %[[VAL_82:.*]]:3 = scf.while (%[[VAL_83:.*]] = %[[VAL_77]]#0, %[[VAL_84:.*]] = %[[VAL_77]]#1, %[[VAL_85:.*]] = %[[VAL_81]], %[[VAL_86:.*]] = %[[VAL_34]]) : (i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) -> (index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) {
-// CHECK:               scf.condition(%[[VAL_83]]) %[[VAL_84]], %[[VAL_85]], %[[VAL_86]] : index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:             %[[VAL_82:.*]]:3 = scf.while (%[[VAL_83:.*]] = %[[VAL_77]]#0, %[[VAL_84:.*]] = %[[VAL_77]]#1, %[[VAL_85:.*]] = %[[VAL_81]], %[[VAL_86:.*]] = %[[VAL_34]]) : (i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>) {
+// CHECK:               scf.condition(%[[VAL_83]]) %[[VAL_84]], %[[VAL_85]], %[[VAL_86]] : index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_87:.*]]: index, %[[VAL_88:.*]]: index, %[[VAL_89:.*]]: tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>):
+// CHECK:             ^bb0(%[[VAL_87:.*]]: index, %[[VAL_88:.*]]: index, %[[VAL_89:.*]]: tensor<6x6xi32, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_90:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<5xindex>
 // CHECK:               %[[VAL_91:.*]] = arith.addi %[[VAL_90]], %[[VAL_8]] : index
 // CHECK:               %[[VAL_92:.*]] = arith.addi %[[VAL_90]], %[[VAL_5]] : index
@@ -145,11 +145,11 @@
 // CHECK:                 memref.store %[[VAL_112]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<11xindex>
 // CHECK:                 scf.yield %[[VAL_133]], %[[VAL_136:.*]]#1, %[[VAL_2]] : index, i32, i1
 // CHECK:               }
-// CHECK:               %[[VAL_137:.*]] = scf.if %[[VAL_138:.*]]#2 -> (tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>) {
-// CHECK:                 %[[VAL_139:.*]] = sparse_tensor.insert %[[VAL_138]]#1 into %[[VAL_89]]{{\[}}%[[VAL_33]], %[[VAL_88]]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:                 scf.yield %[[VAL_139]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:               %[[VAL_137:.*]] = scf.if %[[VAL_138:.*]]#2 -> (tensor<6x6xi32, #sparse{{[0-9]*}}>) {
+// CHECK:                 %[[VAL_139:.*]] = sparse_tensor.insert %[[VAL_138]]#1 into %[[VAL_89]]{{\[}}%[[VAL_33]], %[[VAL_88]]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
+// CHECK:                 scf.yield %[[VAL_139]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:               } else {
-// CHECK:                 scf.yield %[[VAL_89]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:                 scf.yield %[[VAL_89]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               memref.store %[[VAL_6]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<5xindex>
 // CHECK:               memref.store %[[VAL_6]], %[[VAL_18]]{{\[}}%[[VAL_7]]] : memref<11xindex>
@@ -202,7 +202,7 @@
 // CHECK:               %[[VAL_175:.*]] = arith.addi %[[VAL_174]], %[[VAL_5]] : index
 // CHECK:               %[[VAL_176:.*]] = arith.cmpi ule, %[[VAL_175]], %[[VAL_4]] : index
 // CHECK:               %[[VAL_177:.*]] = arith.andi %[[VAL_173]]#1, %[[VAL_176]] : i1
-// CHECK:               scf.yield %[[VAL_177]], %[[VAL_173]]#0, %[[VAL_174]], %[[VAL_178:.*]] : i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:               scf.yield %[[VAL_177]], %[[VAL_173]]#0, %[[VAL_174]], %[[VAL_178:.*]] : i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             memref.store %[[VAL_6]], %[[VAL_19]]{{\[}}%[[VAL_7]]] : memref<5xindex>
 // CHECK:             %[[VAL_179:.*]] = arith.cmpi ugt, %[[VAL_32]], %[[VAL_33]] : index
@@ -254,10 +254,10 @@
 // CHECK:             %[[VAL_214:.*]] = arith.addi %[[VAL_213]], %[[VAL_5]] : index
 // CHECK:             %[[VAL_215:.*]] = arith.cmpi ule, %[[VAL_214]], %[[VAL_4]] : index
 // CHECK:             %[[VAL_216:.*]] = arith.andi %[[VAL_212]]#1, %[[VAL_215]] : i1
-// CHECK:             scf.yield %[[VAL_216]], %[[VAL_212]]#0, %[[VAL_213]], %[[VAL_217:.*]]#2 : i1, index, index, tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:             scf.yield %[[VAL_216]], %[[VAL_212]]#0, %[[VAL_213]], %[[VAL_217:.*]]#2 : i1, index, index, tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_218:.*]] = sparse_tensor.load %[[VAL_219:.*]]#2 hasInserts : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
-// CHECK:           return %[[VAL_218]] : tensor<6x6xi32, #sparse_tensor.encoding<{{.*}}>>
+// CHECK:           %[[VAL_218:.*]] = sparse_tensor.load %[[VAL_219:.*]]#2 hasInserts : tensor<6x6xi32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_218]] : tensor<6x6xi32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @conv2d_all_sparse_CSR(%arg0: tensor<8x8xi32, #DCSR>,
                                  %arg1: tensor<3x3xi32>) -> tensor<6x6xi32, #DCSR> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir b/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir
index ae53f4d..c501a09 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_extract_slice.mlir
@@ -12,7 +12,7 @@
 // CHECK-SAME:                            %[[VAL_0:.*0]]: memref<?xindex>,
 // CHECK-SAME:                            %[[VAL_1:.*1]]: memref<?xindex>,
 // CHECK-SAME:                            %[[VAL_2:.*2]]: memref<?xf64>,
-// CHECK-SAME:                            %[[VAL_3:.*3]]: !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK-SAME:                            %[[VAL_3:.*3]]: !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>)
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.storage_specifier.init with %[[VAL_3]]
 // CHECK:           %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK:           %[[VAL_6:.*]] = arith.constant 4 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir b/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir
index 5983289..eb61115 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_foreach.mlir
@@ -143,13 +143,13 @@ func.func @foreach_print_slice(%A: tensor<4x4xf64, #CSR_SLICE>) {
 }>
 
 // CHECK-LABEL:   func.func @foreach_bcoo(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x4x4xf64, #{{.*}}>>) {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 4 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x4x4xf64, #{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x4x4xf64, #{{.*}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x4x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x4x4xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           scf.for %[[VAL_7:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] {
 // CHECK:             %[[VAL_8:.*]] = arith.muli %[[VAL_7]], %[[VAL_4]] : index
 // CHECK:             %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
index f25a564..8c09523 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_fp_ops.mlir
@@ -31,13 +31,13 @@
 }
 
 // CHECK-LABEL: func @abs(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -63,13 +63,13 @@ func.func @abs(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @ceil(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -95,13 +95,13 @@ func.func @ceil(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @floor(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -127,13 +127,13 @@ func.func @floor(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @neg(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -159,16 +159,16 @@ func.func @neg(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @add(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>,
 // CHECK-SAME:    %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:     %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:         %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -219,16 +219,16 @@ func.func @add(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @sub(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>,
 // CHECK-SAME:    %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:     %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:         %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:         %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -281,14 +281,14 @@ func.func @sub(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @mul(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>,
 // CHECK-SAME:    %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK:         %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -317,14 +317,14 @@ func.func @mul(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL: func @divbyc(
-// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:    %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:    %[[VAL_1:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 2.000000e+00 : f64
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:         %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xf64>
 // CHECK:         %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:         %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -351,13 +351,13 @@ func.func @divbyc(%arga: tensor<32xf64, #SV>,
 }
 
 // CHECK-LABEL:   func.func @zero_preserving_math(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xf64, #sparse{{[0-9]*}}>) -> tensor<32xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_3:.*]] = tensor.empty() : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_3:.*]] = tensor.empty() : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_7:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[T:.*]] = scf.for %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_2]] {{.*}} {
@@ -371,11 +371,11 @@ func.func @divbyc(%arga: tensor<32xf64, #SV>,
 // CHECK:             %[[VAL_17:.*]] = math.log1p %[[VAL_16]] : f64
 // CHECK:             %[[VAL_18:.*]] = math.sin %[[VAL_17]] : f64
 // CHECK:             %[[VAL_19:.*]] = math.tanh %[[VAL_18]] : f64
-// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_19]] into %{{.*}}[%[[VAL_10]]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_19]] into %{{.*}}[%[[VAL_10]]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:             scf.yield %[[Y]]
 // CHECK:           }
-// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_20]] : tensor<32xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_20:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_20]] : tensor<32xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @zero_preserving_math(%arga: tensor<32xf64, #SV>) -> tensor<32xf64, #SV> {
   %c32 = arith.constant 32 : index
@@ -398,25 +398,25 @@ func.func @zero_preserving_math(%arga: tensor<32xf64, #SV>) -> tensor<32xf64, #S
 }
 
 // CHECK-LABEL:   func.func @complex_divbyc(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32xcomplex<f64>, #sparse{{.*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK:           %[[VAL_3:.*]] = complex.constant [0.000000e+00, 1.000000e+00] : complex<f64>
-// CHECK:           %[[VAL_4:.*]] = tensor.empty() : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xcomplex<f64>>
+// CHECK:           %[[VAL_4:.*]] = tensor.empty() : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}> to memref<?xcomplex<f64>>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[T:.*]] = scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_2]] {{.*}} {
 // CHECK:             %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xindex>
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_10]]] : memref<?xcomplex<f64>>
 // CHECK:             %[[VAL_13:.*]] = complex.div %[[VAL_12]], %[[VAL_3]] : complex<f64>
-// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_13]] into %{{.*}}[%[[VAL_11]]] : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[Y:.*]] = sparse_tensor.insert %[[VAL_13]] into %{{.*}}[%[[VAL_11]]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
 // CHECK:             scf.yield %[[Y]]
 // CHECK:           }
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_14]] : tensor<32xcomplex<f64>, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_14]] : tensor<32xcomplex<f64>, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @complex_divbyc(%arg0: tensor<32xcomplex<f64>, #SV>) -> tensor<32xcomplex<f64>, #SV> {
   %c = complex.constant [0.0, 1.0] : complex<f64>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
index e49c89e..b09bd0a 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_index.mlir
@@ -18,17 +18,17 @@
 }
 
 // CHECK-LABEL:   func.func @dense_index(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_2]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_24:.*]] = sparse_tensor.lvl %[[VAL_5]], %[[VAL_2]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_5]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_2]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_24:.*]] = sparse_tensor.lvl %[[VAL_5]], %[[VAL_2]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_5]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_1]] to %[[VAL_7]] step %[[VAL_2]] {
 // CHECK:             scf.for %[[VAL_11:.*]] = %[[VAL_1]] to %[[VAL_8]] step %[[VAL_2]] {
 // CHECK:               %[[VAL_12:.*]] = arith.muli %[[VAL_8]], %[[VAL_10]] : index
@@ -43,8 +43,8 @@
 // CHECK:               memref.store %[[VAL_20]], %[[VAL_9]]{{\[}}%[[VAL_15]]] : memref<?xi64>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_21:.*]] = sparse_tensor.load %[[VAL_5]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK:           return %[[VAL_21]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK:           %[[VAL_21:.*]] = sparse_tensor.load %[[VAL_5]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_21]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @dense_index(%arga: tensor<?x?xi64, #DenseMatrix>)
                       -> tensor<?x?xi64, #DenseMatrix> {
@@ -70,17 +70,17 @@ func.func @dense_index(%arga: tensor<?x?xi64, #DenseMatrix>)
 
 
 // CHECK-LABEL:   func.func @sparse_index(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.lvl %[[VAL_0]], %[[VAL_1]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty(%[[VAL_3]], %[[VAL_4]]) : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[T:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_11]] to %[[VAL_12]] step %[[VAL_2]] {{.*}} {
@@ -95,13 +95,13 @@ func.func @dense_index(%arga: tensor<?x?xi64, #DenseMatrix>)
 // CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xi64>
 // CHECK:               %[[VAL_23:.*]] = arith.muli %[[VAL_21]], %[[VAL_22]] : i64
 // CHECK:               %[[VAL_24:.*]] = arith.muli %[[VAL_20]], %[[VAL_23]] : i64
-// CHECK:               %[[Y:.*]] = sparse_tensor.insert %[[VAL_24]] into %{{.*}}[%[[VAL_14]], %[[VAL_19]]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK:               %[[Y:.*]] = sparse_tensor.insert %[[VAL_24]] into %{{.*}}[%[[VAL_14]], %[[VAL_19]]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:               scf.yield %[[Y]]
 // CHECK:             }
 // CHECK:             scf.yield %[[L]]
 // CHECK:           }
-// CHECK:           %[[VAL_25:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<?x?xi64, #sparse_tensor.encoding
-// CHECK:           return %[[VAL_25]] : tensor<?x?xi64, #sparse_tensor.encoding
+// CHECK:           %[[VAL_25:.*]] = sparse_tensor.load %[[T]] hasInserts : tensor<?x?xi64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_25]] : tensor<?x?xi64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_index(%arga: tensor<?x?xi64, #SparseMatrix>)
                        -> tensor<?x?xi64, #SparseMatrix> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
index da7d45b..868b11c 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_int_ops.mlir
@@ -23,16 +23,16 @@
 }
 
 // CHECK-LABEL:   func @add(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -83,7 +83,7 @@ func.func @add(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @sub(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
@@ -91,9 +91,9 @@ func.func @add(%arga: tensor<32xi64, #SV>,
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:           %[[VAL_7:.*]] = arith.constant 0 : i64
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -146,14 +146,14 @@ func.func @sub(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @mul(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -182,14 +182,14 @@ func.func @mul(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @divsbyc(
-// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                  %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -216,14 +216,14 @@ func.func @divsbyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @divubyc(
-// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                  %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -250,14 +250,14 @@ func.func @divubyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @and(
-// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -286,16 +286,16 @@ func.func @and(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @or(
-// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:             %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:             %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -346,16 +346,16 @@ func.func @or(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @xor(
-// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:             %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:             %[[VAL_1:.*]]: tensor<32xi64>,
 // CHECK-SAME:             %[[VAL_2:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_5:.*]] = arith.constant true
 // CHECK-DAG:           %[[VAL_6:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xi64>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -406,14 +406,14 @@ func.func @xor(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @ashrbyc(
-// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                  %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                  %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -440,14 +440,14 @@ func.func @ashrbyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @lsrbyc(
-// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -474,14 +474,14 @@ func.func @lsrbyc(%arga: tensor<32xi64, #SV>,
 }
 
 // CHECK-LABEL:   func @lslbyc(
-// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                 %[[VAL_0:.*]]: tensor<32xi64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                 %[[VAL_1:.*]]: tensor<32xi64>) -> tensor<32xi64> {
 // CHECK-DAG:           %[[VAL_2:.*]] = arith.constant 2 : i64
 // CHECK-DAG:           %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:           %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_1]] : memref<32xi64>
 // CHECK:           %[[VAL_9:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
index 276a864..912d8bf 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_kernels.mlir
@@ -7,17 +7,17 @@
 #DCSR = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed, d1 : compressed) }>
 
 // CHECK-LABEL:   func.func @matmul1(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<20x30xf32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<10x30xf32>) -> tensor<10x30xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 30 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30xf32>
 // CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x30xf32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -53,7 +53,7 @@ func.func @matmul1(%a: tensor<10x20xf32, #DCSR>,
 
 // CHECK-LABEL:   func.func @matmul_sparse_rhs(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<20x30xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<10x30xf32>) -> tensor<10x30xf32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 10 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
@@ -102,40 +102,40 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // Computes C = A x B with all matrices sparse (SpMSpM) in DCSR.
 //
 // CHECK-LABEL:   func.func @matmul2(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<4x8xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse{{[0-9]*}}>) -> tensor<4x4xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
-// CHECK-DAG:       %[[VAL_6:.*]] = tensor.empty() : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<8x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_6:.*]] = tensor.empty() : tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<4x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<8x4xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_3]] iter_args(%[[VAL_21:.*]] = %[[VAL_6]]) -> (tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_17]] to %[[VAL_18]] step %[[VAL_3]] iter_args(%[[VAL_21:.*]] = %[[VAL_6]]) -> (tensor<4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
-// CHECK:             %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.expand %[[VAL_6]] : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.expand %[[VAL_6]] : tensor<4x4xf64, #sparse{{[0-9]*}}> to memref<?xf64>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_27:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:             %[[VAL_28:.*]] = arith.addi %[[VAL_20]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_28]]] : memref<?xindex>
 // CHECK:             %[[VAL_30:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:             %[[VAL_31:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:             %[[VAL_32:.*]]:4 = scf.while (%[[VAL_33:.*]] = %[[VAL_27]], %[[VAL_34:.*]] = %[[VAL_30]], %[[VAL_35:.*]] = %[[VAL_26]], %[[VAL_36:.*]] = %[[VAL_21]]) : (index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_32:.*]]:4 = scf.while (%[[VAL_33:.*]] = %[[VAL_27]], %[[VAL_34:.*]] = %[[VAL_30]], %[[VAL_35:.*]] = %[[VAL_26]], %[[VAL_36:.*]] = %[[VAL_21]]) : (index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>) -> (index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_37:.*]] = arith.cmpi ult, %[[VAL_33]], %[[VAL_29]] : index
 // CHECK:               %[[VAL_38:.*]] = arith.cmpi ult, %[[VAL_34]], %[[VAL_31]] : index
 // CHECK:               %[[VAL_39:.*]] = arith.andi %[[VAL_37]], %[[VAL_38]] : i1
-// CHECK:               scf.condition(%[[VAL_39]]) %[[VAL_33]], %[[VAL_34]], %[[VAL_35]], %[[VAL_36]] : index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.condition(%[[VAL_39]]) %[[VAL_33]], %[[VAL_34]], %[[VAL_35]], %[[VAL_36]] : index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_40:.*]]: index, %[[VAL_41:.*]]: index, %[[VAL_42:.*]]: index, %[[VAL_43:.*]]: tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:             ^bb0(%[[VAL_40:.*]]: index, %[[VAL_41:.*]]: index, %[[VAL_42:.*]]: index, %[[VAL_43:.*]]: tensor<4x4xf64, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_44:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_40]]] : memref<?xindex>
 // CHECK:               %[[VAL_45:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_41]]] : memref<?xindex>
 // CHECK:               %[[VAL_46:.*]] = arith.cmpi ult, %[[VAL_45]], %[[VAL_44]] : index
@@ -143,7 +143,7 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // CHECK:               %[[VAL_48:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_49:.*]] = arith.cmpi eq, %[[VAL_45]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_50:.*]] = arith.andi %[[VAL_48]], %[[VAL_49]] : i1
-// CHECK:               %[[VAL_51:.*]]:2 = scf.if %[[VAL_50]] -> (index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:               %[[VAL_51:.*]]:2 = scf.if %[[VAL_50]] -> (index, tensor<4x4xf64, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_52:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_40]]] : memref<?xf64>
 // CHECK:                 %[[VAL_53:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_41]]] : memref<?xindex>
 // CHECK:                 %[[VAL_54:.*]] = arith.addi %[[VAL_41]], %[[VAL_3]] : index
@@ -167,9 +167,9 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // CHECK:                   memref.store %[[VAL_63]], %[[VAL_23]]{{\[}}%[[VAL_59]]] : memref<?xf64>
 // CHECK:                   scf.yield %[[VAL_68:.*]] : index
 // CHECK:                 }
-// CHECK:                 scf.yield %[[VAL_69:.*]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_69:.*]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:               } else {
-// CHECK:                 scf.yield %[[VAL_42]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_42]], %[[VAL_43]] : index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               %[[VAL_70:.*]] = arith.cmpi eq, %[[VAL_44]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_71:.*]] = arith.addi %[[VAL_40]], %[[VAL_3]] : index
@@ -177,13 +177,13 @@ func.func @matmul_sparse_rhs(%a: tensor<10x20xf32>,
 // CHECK:               %[[VAL_73:.*]] = arith.cmpi eq, %[[VAL_45]], %[[VAL_47]] : index
 // CHECK:               %[[VAL_74:.*]] = arith.addi %[[VAL_41]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_75:.*]] = arith.select %[[VAL_73]], %[[VAL_74]], %[[VAL_41]] : index
-// CHECK:               scf.yield %[[VAL_72]], %[[VAL_75]], %[[VAL_76:.*]]#0, %[[VAL_76]]#1 : index, index, index, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_72]], %[[VAL_75]], %[[VAL_76:.*]]#0, %[[VAL_76]]#1 : index, index, index, tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             %[[VAL_77:.*]] = sparse_tensor.compress %[[VAL_23]], %[[VAL_24]], %[[VAL_25]], %[[VAL_78:.*]]#2 into %[[VAL_78]]#3{{\[}}%[[VAL_22]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_77]] : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_77:.*]] = sparse_tensor.compress %[[VAL_23]], %[[VAL_24]], %[[VAL_25]], %[[VAL_78:.*]]#2 into %[[VAL_78]]#3{{\[}}%[[VAL_22]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_77]] : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_79:.*]] = sparse_tensor.load %[[VAL_80:.*]] hasInserts : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_79]] : tensor<4x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_79:.*]] = sparse_tensor.load %[[VAL_80:.*]] hasInserts : tensor<4x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_79]] : tensor<4x4xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @matmul2(%A: tensor<4x8xf64, #DCSR>,
               %B: tensor<8x4xf64, #DCSR>) -> tensor<4x4xf64, #DCSR> {
@@ -198,18 +198,18 @@ func.func @matmul2(%A: tensor<4x8xf64, #DCSR>,
 
 // CHECK-LABEL:   func.func @conv2d(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xi32>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x3xi32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<6x6xi32>) -> tensor<6x6xi32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 6 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<8x8xi32>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x3xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
-// CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<6x6xi32>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x3xi32, #sparse{{[0-9]*}}> to memref<?xi32>
+// CHECK:           %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<6x6xi32>
 // CHECK:           scf.for %[[VAL_13:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:               %[[VAL_15:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_13]], %[[VAL_14]]] : memref<6x6xi32>
@@ -249,18 +249,18 @@ func.func @conv2d(%input:  tensor<8x8xi32>,
 
 // CHECK-LABEL:   func.func @quantized_matmul(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<5x3xi8>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<3x6xi8, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<5x6xi64>) -> tensor<5x6xi64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 5 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 2 : i64
 // CHECK-DAG:       %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_0]] : memref<5x3xi8>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x6xi8, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi8>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<3x6xi8, #sparse{{[0-9]*}}> to memref<?xi8>
 // CHECK:           %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_2]] : memref<5x6xi64>
 // CHECK:           scf.for %[[VAL_14:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -299,16 +299,16 @@ func.func @quantized_matmul(%input1: tensor<5x3xi8>,
 }
 
 // CHECK-LABEL:   func.func @sparse_dot(
-// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>>, %[[VAL_1:.*1]]: tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*0]]: tensor<1024xf32, #sparse{{[0-9]*}}>, %[[VAL_1:.*1]]: tensor<1024xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*2]]: tensor<f32>) -> tensor<f32> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<1024xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<1024xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<f32>
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_11]][] : memref<f32>
 // CHECK:           %[[VAL_13:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
index 788dd9c..6112856 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower.mlir
@@ -21,15 +21,15 @@
 }
 
 // CHECK-HIR-LABEL:   func @matvec(
-// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>
 // CHECK-HIR-SAME:      %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-HIR-SAME:      %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-HIR-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-HIR-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}>
 // CHECK-HIR-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
 // CHECK-HIR-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-HIR:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
index da08f53..401da15 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_col.mlir
@@ -23,7 +23,7 @@
 }
 
 // CHECK-HIR-LABEL:   func @matvec(
-// CHECK-HIR-SAME:                 %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-HIR-SAME:                 %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-HIR-SAME:                 %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-HIR-SAME:                 %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 64 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
index eb69203..d769876 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_lower_inplace.mlir
@@ -21,15 +21,15 @@
 }
 
 // CHECK-HIR-LABEL:   func @matvec(
-// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<32x64xf64, #sparse{{[0-9]*}}>,
 // CHECK-HIR-SAME:      %[[VAL_1:.*]]: tensor<64xf64>,
 // CHECK-HIR-SAME:      %[[VAL_2:.*]]: tensor<32xf64>) -> tensor<32xf64> {
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 32 : index
 // CHECK-HIR-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-HIR-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-HIR:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR:           %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR:           %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x64xf64, #sparse{{[0-9]*}}>
+// CHECK-HIR:           %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x64xf64, #sparse{{[0-9]*}}>
 // CHECK-HIR:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<64xf64>
 // CHECK-HIR:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<32xf64>
 // CHECK-HIR:           scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
index 27716a8..50fec5b 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_nd.mlir
@@ -24,7 +24,7 @@
 
 // CHECK-LABEL:   func @mul(
 // CHECK-SAME:              %[[VAL_0:.*]]: tensor<10x20x30x40x50x60x70x80xf32>,
-// CHECK-SAME:              %[[VAL_1:.*]]: tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:              %[[VAL_1:.*]]: tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_2:.*]]: tensor<10x20x30x40x50x60x70x80xf32>) -> tensor<10x20x30x40x50x60x70x80xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 10 : index
@@ -36,11 +36,11 @@
 // CHECK-DAG:       %[[VAL_11:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_12:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_13:.*]] = bufferization.to_memref %[[VAL_0]] : memref<10x20x30x40x50x60x70x80xf32>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 3 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 4 : index} : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<80x70x60x50x40x30x20x10xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_20:.*]] = bufferization.to_memref %[[VAL_2]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_20]] : memref<10x20x30x40x50x60x70x80xf32>
 // CHECK:           scf.for %[[VAL_21:.*]] = %[[VAL_11]] to %[[VAL_10]] step %[[VAL_12]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_out.mlir b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
index 8bf0625..b1795ff 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_out.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_out.mlir
@@ -21,13 +21,13 @@
 }
 
 // CHECK-LABEL:   func.func @sparse_simply_dynamic1(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_7:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_9:.*]] = %[[VAL_7]] to %[[VAL_8]] step %[[VAL_2]] {
@@ -40,8 +40,8 @@
 // CHECK:               memref.store %[[VAL_15]], %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %c = arith.constant 2.0 : f32
@@ -55,12 +55,12 @@ func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x
 }
 
 // CHECK-LABEL:   func.func @sparse_simply_dynamic2(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<32x16xf32, #sparse{{[0-9]*}}>) -> tensor<32x16xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_3:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_6:.*]] = memref.load %[[VAL_3]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_7:.*]] = memref.load %[[VAL_3]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           scf.for %[[VAL_8:.*]] = %[[VAL_6]] to %[[VAL_7]] step %[[VAL_2]] {
@@ -74,8 +74,8 @@ func.func @sparse_simply_dynamic1(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x
 // CHECK:               memref.store %[[VAL_15]], %[[VAL_5]]{{\[}}%[[VAL_12]]] : memref<?xf32>
 // CHECK:             }
 // CHECK:           }
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.load %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_16]] : tensor<32x16xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x16xf32, #DCSR> {
   %0 = linalg.generic #trait_scale_inpl
@@ -97,30 +97,30 @@ func.func @sparse_simply_dynamic2(%argx: tensor<32x16xf32, #DCSR>) -> tensor<32x
 }
 
 // CHECK-LABEL:   func.func @sparse_truly_dynamic(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<10x20xf32, #sparse{{[0-9]*}}>) -> tensor<10x20xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 10 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2.000000e+00 : f32
-// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_9:.*]] = scf.for %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_5]]) -> (tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK-DAG:       %[[VAL_5:.*]] = tensor.empty() : tensor<10x20xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<10x20xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_9:.*]] = scf.for %[[VAL_10:.*]] = %[[VAL_2]] to %[[VAL_1]] step %[[VAL_3]] iter_args(%[[VAL_11:.*]] = %[[VAL_5]]) -> (tensor<10x20xf32, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_12:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xindex>
 // CHECK:             %[[VAL_13:.*]] = arith.addi %[[VAL_10]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_14:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
-// CHECK:             %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_12]] to %[[VAL_14]] step %[[VAL_3]] iter_args(%[[VAL_17:.*]] = %[[VAL_11]]) -> (tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_15:.*]] = scf.for %[[VAL_16:.*]] = %[[VAL_12]] to %[[VAL_14]] step %[[VAL_3]] iter_args(%[[VAL_17:.*]] = %[[VAL_11]]) -> (tensor<10x20xf32, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_16]]] : memref<?xindex>
 // CHECK:               %[[VAL_19:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_16]]] : memref<?xf32>
 // CHECK:               %[[VAL_20:.*]] = arith.mulf %[[VAL_19]], %[[VAL_4]] : f32
-// CHECK:               %[[VAL_21:.*]] = sparse_tensor.insert %[[VAL_20]] into %[[VAL_17]]{{\[}}%[[VAL_10]], %[[VAL_18]]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:               scf.yield %[[VAL_21]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               %[[VAL_21:.*]] = sparse_tensor.insert %[[VAL_20]] into %[[VAL_17]]{{\[}}%[[VAL_10]], %[[VAL_18]]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_21]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             scf.yield %[[VAL_22:.*]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.yield %[[VAL_22:.*]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_24:.*]] hasInserts : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_23]] : tensor<10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_24:.*]] hasInserts : tensor<10x20xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_23]] : tensor<10x20xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20xf32, #DCSR> {
   %s = arith.constant 2.0 : f32
@@ -146,41 +146,41 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 }
 
 // CHECK-LABEL:   func.func @sumred(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xi32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?x?xi32, #sparse{{[0-9]*}}>) -> tensor<?x?xi32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : i32
 // CHECK-DAG:       %[[VAL_FALSE:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_TRUE:.*]] = arith.constant true
-// CHECK:           %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = tensor.empty(%[[VAL_5]], %[[VAL_6]]) : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_19:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_21:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK:           %[[VAL_5:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_3]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = tensor.empty(%[[VAL_5]], %[[VAL_6]]) : tensor<?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_19:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_20:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 2 : index} : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_21:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?x?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK:           %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_23:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
 // CHECK:           %[[VAL_24:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_25:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_26:.*]]:3 = scf.while (%[[VAL_27:.*]] = %[[VAL_22]], %[[VAL_28:.*]] = %[[VAL_24]], %[[VAL_29:.*]] = %[[VAL_7]]) : (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_26:.*]]:3 = scf.while (%[[VAL_27:.*]] = %[[VAL_22]], %[[VAL_28:.*]] = %[[VAL_24]], %[[VAL_29:.*]] = %[[VAL_7]]) : (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_30:.*]] = arith.cmpi ult, %[[VAL_27]], %[[VAL_23]] : index
 // CHECK:             %[[VAL_31:.*]] = arith.cmpi ult, %[[VAL_28]], %[[VAL_25]] : index
 // CHECK:             %[[VAL_32:.*]] = arith.andi %[[VAL_30]], %[[VAL_31]] : i1
-// CHECK:             scf.condition(%[[VAL_32]]) %[[VAL_27]], %[[VAL_28]], %[[VAL_29]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.condition(%[[VAL_32]]) %[[VAL_27]], %[[VAL_28]], %[[VAL_29]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:           } do {
-// CHECK:           ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index, %[[VAL_35:.*]]: tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:           ^bb0(%[[VAL_33:.*]]: index, %[[VAL_34:.*]]: index, %[[VAL_35:.*]]: tensor<?x?xi32, #sparse{{[0-9]*}}>):
 // CHECK:             %[[VAL_36:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:             %[[VAL_37:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_34]]] : memref<?xindex>
 // CHECK:             %[[VAL_38:.*]] = arith.cmpi ult, %[[VAL_37]], %[[VAL_36]] : index
@@ -188,20 +188,20 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:             %[[VAL_40:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_41:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_42:.*]] = arith.andi %[[VAL_40]], %[[VAL_41]] : i1
-// CHECK:             %[[VAL_43:.*]] = scf.if %[[VAL_42]] -> (tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_43:.*]] = scf.if %[[VAL_42]] -> (tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_44:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_33]]] : memref<?xindex>
 // CHECK:               %[[VAL_45:.*]] = arith.addi %[[VAL_33]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_46:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_45]]] : memref<?xindex>
 // CHECK:               %[[VAL_47:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_34]]] : memref<?xindex>
 // CHECK:               %[[VAL_48:.*]] = arith.addi %[[VAL_34]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_49:.*]] = memref.load %[[VAL_17]]{{\[}}%[[VAL_48]]] : memref<?xindex>
-// CHECK:               %[[VAL_50:.*]]:3 = scf.while (%[[VAL_51:.*]] = %[[VAL_44]], %[[VAL_52:.*]] = %[[VAL_47]], %[[VAL_53:.*]] = %[[VAL_35]]) : (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:               %[[VAL_50:.*]]:3 = scf.while (%[[VAL_51:.*]] = %[[VAL_44]], %[[VAL_52:.*]] = %[[VAL_47]], %[[VAL_53:.*]] = %[[VAL_35]]) : (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) -> (index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_54:.*]] = arith.cmpi ult, %[[VAL_51]], %[[VAL_46]] : index
 // CHECK:                 %[[VAL_55:.*]] = arith.cmpi ult, %[[VAL_52]], %[[VAL_49]] : index
 // CHECK:                 %[[VAL_56:.*]] = arith.andi %[[VAL_54]], %[[VAL_55]] : i1
-// CHECK:                 scf.condition(%[[VAL_56]]) %[[VAL_51]], %[[VAL_52]], %[[VAL_53]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.condition(%[[VAL_56]]) %[[VAL_51]], %[[VAL_52]], %[[VAL_53]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:               } do {
-// CHECK:               ^bb0(%[[VAL_57:.*]]: index, %[[VAL_58:.*]]: index, %[[VAL_59:.*]]: tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:               ^bb0(%[[VAL_57:.*]]: index, %[[VAL_58:.*]]: index, %[[VAL_59:.*]]: tensor<?x?xi32, #sparse{{[0-9]*}}>):
 // CHECK:                 %[[VAL_60:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_57]]] : memref<?xindex>
 // CHECK:                 %[[VAL_61:.*]] = memref.load %[[VAL_18]]{{\[}}%[[VAL_58]]] : memref<?xindex>
 // CHECK:                 %[[VAL_62:.*]] = arith.cmpi ult, %[[VAL_61]], %[[VAL_60]] : index
@@ -209,20 +209,20 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                 %[[VAL_64:.*]] = arith.cmpi eq, %[[VAL_60]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_65:.*]] = arith.cmpi eq, %[[VAL_61]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_66:.*]] = arith.andi %[[VAL_64]], %[[VAL_65]] : i1
-// CHECK:                 %[[VAL_67:.*]] = scf.if %[[VAL_66]] -> (tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:                 %[[VAL_67:.*]] = scf.if %[[VAL_66]] -> (tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                   %[[VAL_68:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_57]]] : memref<?xindex>
 // CHECK:                   %[[VAL_69:.*]] = arith.addi %[[VAL_57]], %[[VAL_3]] : index
 // CHECK:                   %[[VAL_70:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_69]]] : memref<?xindex>
 // CHECK:                   %[[VAL_71:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_58]]] : memref<?xindex>
 // CHECK:                   %[[VAL_72:.*]] = arith.addi %[[VAL_58]], %[[VAL_3]] : index
 // CHECK:                   %[[VAL_73:.*]] = memref.load %[[VAL_19]]{{\[}}%[[VAL_72]]] : memref<?xindex>
-// CHECK:                   %[[VAL_74:.*]]:5 = scf.while (%[[VAL_75:.*]] = %[[VAL_68]], %[[VAL_76:.*]] = %[[VAL_71]], %[[VAL_77:.*]] = %[[VAL_4]], %[[VAL_200:.*]] = %[[VAL_FALSE]], %[[VAL_78:.*]] = %[[VAL_59]]) : (index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:                   %[[VAL_74:.*]]:5 = scf.while (%[[VAL_75:.*]] = %[[VAL_68]], %[[VAL_76:.*]] = %[[VAL_71]], %[[VAL_77:.*]] = %[[VAL_4]], %[[VAL_200:.*]] = %[[VAL_FALSE]], %[[VAL_78:.*]] = %[[VAL_59]]) : (index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>) -> (index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                     %[[VAL_79:.*]] = arith.cmpi ult, %[[VAL_75]], %[[VAL_70]] : index
 // CHECK:                     %[[VAL_80:.*]] = arith.cmpi ult, %[[VAL_76]], %[[VAL_73]] : index
 // CHECK:                     %[[VAL_81:.*]] = arith.andi %[[VAL_79]], %[[VAL_80]] : i1
-// CHECK:                     scf.condition(%[[VAL_81]]) %[[VAL_75]], %[[VAL_76]], %[[VAL_77]], %[[VAL_200]], %[[VAL_78]] : index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                     scf.condition(%[[VAL_81]]) %[[VAL_75]], %[[VAL_76]], %[[VAL_77]], %[[VAL_200]], %[[VAL_78]] : index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   } do {
-// CHECK:                   ^bb0(%[[VAL_82:.*]]: index, %[[VAL_83:.*]]: index, %[[VAL_84:.*]]: i32, %[[VAL_201:.*]]: i1, %[[VAL_85:.*]]: tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:                   ^bb0(%[[VAL_82:.*]]: index, %[[VAL_83:.*]]: index, %[[VAL_84:.*]]: i32, %[[VAL_201:.*]]: i1, %[[VAL_85:.*]]: tensor<?x?xi32, #sparse{{[0-9]*}}>):
 // CHECK:                     %[[VAL_86:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_82]]] : memref<?xindex>
 // CHECK:                     %[[VAL_87:.*]] = memref.load %[[VAL_20]]{{\[}}%[[VAL_83]]] : memref<?xindex>
 // CHECK:                     %[[VAL_88:.*]] = arith.cmpi ult, %[[VAL_87]], %[[VAL_86]] : index
@@ -230,14 +230,14 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                     %[[VAL_90:.*]] = arith.cmpi eq, %[[VAL_86]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_91:.*]] = arith.cmpi eq, %[[VAL_87]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_92:.*]] = arith.andi %[[VAL_90]], %[[VAL_91]] : i1
-// CHECK:                     %[[VAL_93:.*]]:3 = scf.if %[[VAL_92]] -> (i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:                     %[[VAL_93:.*]]:3 = scf.if %[[VAL_92]] -> (i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>) {
 // CHECK:                       %[[VAL_94:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_82]]] : memref<?xi32>
 // CHECK:                       %[[VAL_95:.*]] = memref.load %[[VAL_21]]{{\[}}%[[VAL_83]]] : memref<?xi32>
 // CHECK:                       %[[VAL_96:.*]] = arith.muli %[[VAL_94]], %[[VAL_95]] : i32
 // CHECK:                       %[[VAL_97:.*]] = arith.addi %[[VAL_84]], %[[VAL_96]] : i32
-// CHECK:                       scf.yield %[[VAL_97]], %[[VAL_TRUE]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                       scf.yield %[[VAL_97]], %[[VAL_TRUE]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                     } else {
-// CHECK:                       scf.yield %[[VAL_84]], %[[VAL_201]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                       scf.yield %[[VAL_84]], %[[VAL_201]], %[[VAL_85]] : i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                     }
 // CHECK:                     %[[VAL_98:.*]] = arith.cmpi eq, %[[VAL_86]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_99:.*]] = arith.addi %[[VAL_82]], %[[VAL_3]] : index
@@ -245,17 +245,17 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                     %[[VAL_101:.*]] = arith.cmpi eq, %[[VAL_87]], %[[VAL_89]] : index
 // CHECK:                     %[[VAL_102:.*]] = arith.addi %[[VAL_83]], %[[VAL_3]] : index
 // CHECK:                     %[[VAL_103:.*]] = arith.select %[[VAL_101]], %[[VAL_102]], %[[VAL_83]] : index
-// CHECK:                     scf.yield %[[VAL_100]], %[[VAL_103]], %[[VAL_104:.*]]#0, %[[VAL_104]]#1, %[[VAL_104]]#2 : index, index, i32, i1, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                     scf.yield %[[VAL_100]], %[[VAL_103]], %[[VAL_104:.*]]#0, %[[VAL_104]]#1, %[[VAL_104]]#2 : index, index, i32, i1, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   }
-// CHECK:                   %[[VAL_202:.*]] = scf.if %[[VAL_74]]#3 -> (tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>) {
-// CHECK:                     %[[VAL_105:.*]] = sparse_tensor.insert %[[VAL_74]]#2 into %[[VAL_74]]#4{{\[}}%[[VAL_39]], %[[VAL_63]]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:                     scf.yield %[[VAL_105]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                   %[[VAL_202:.*]] = scf.if %[[VAL_74]]#3 -> (tensor<?x?xi32, #sparse{{[0-9]*}}>) {
+// CHECK:                     %[[VAL_105:.*]] = sparse_tensor.insert %[[VAL_74]]#2 into %[[VAL_74]]#4{{\[}}%[[VAL_39]], %[[VAL_63]]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:                     scf.yield %[[VAL_105]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   } else {
-// CHECK:                     scf.yield %[[VAL_74]]#4 : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                     scf.yield %[[VAL_74]]#4 : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                   }
-// CHECK:                   scf.yield %[[VAL_202]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                   scf.yield %[[VAL_202]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                 } else {
-// CHECK:                   scf.yield %[[VAL_59]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                   scf.yield %[[VAL_59]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:                 }
 // CHECK:                 %[[VAL_107:.*]] = arith.cmpi eq, %[[VAL_60]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_108:.*]] = arith.addi %[[VAL_57]], %[[VAL_3]] : index
@@ -263,11 +263,11 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:                 %[[VAL_110:.*]] = arith.cmpi eq, %[[VAL_61]], %[[VAL_63]] : index
 // CHECK:                 %[[VAL_111:.*]] = arith.addi %[[VAL_58]], %[[VAL_3]] : index
 // CHECK:                 %[[VAL_112:.*]] = arith.select %[[VAL_110]], %[[VAL_111]], %[[VAL_58]] : index
-// CHECK:                 scf.yield %[[VAL_109]], %[[VAL_112]], %[[VAL_113:.*]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_109]], %[[VAL_112]], %[[VAL_113:.*]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:               }
-// CHECK:               scf.yield %[[VAL_114:.*]]#2 : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_114:.*]]#2 : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:             } else {
-// CHECK:               scf.yield %[[VAL_35]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_35]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:             }
 // CHECK:             %[[VAL_115:.*]] = arith.cmpi eq, %[[VAL_36]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_116:.*]] = arith.addi %[[VAL_33]], %[[VAL_3]] : index
@@ -275,10 +275,10 @@ func.func @sparse_truly_dynamic(%arga: tensor<10x20xf32, #CSR>) -> tensor<10x20x
 // CHECK:             %[[VAL_118:.*]] = arith.cmpi eq, %[[VAL_37]], %[[VAL_39]] : index
 // CHECK:             %[[VAL_119:.*]] = arith.addi %[[VAL_34]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_120:.*]] = arith.select %[[VAL_118]], %[[VAL_119]], %[[VAL_34]] : index
-// CHECK:             scf.yield %[[VAL_117]], %[[VAL_120]], %[[VAL_121:.*]] : index, index, tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.yield %[[VAL_117]], %[[VAL_120]], %[[VAL_121:.*]] : index, index, tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_122:.*]] = sparse_tensor.load %[[VAL_123:.*]]#2 hasInserts : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_122]] : tensor<?x?xi32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_122:.*]] = sparse_tensor.load %[[VAL_123:.*]]#2 hasInserts : tensor<?x?xi32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_122]] : tensor<?x?xi32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
              %argb: tensor<?x?x?xi32, #SparseTensor>) -> tensor<?x?xi32, #DCSR> {
@@ -310,42 +310,42 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 }
 
 // CHECK-LABEL:   func.func @matmat(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>) -> tensor<?x?xf32, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant true
-// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_8:.*]] = tensor.empty(%[[VAL_6]], %[[VAL_7]]) : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK:           %[[VAL_6:.*]] = tensor.dim %[[VAL_0]], %[[VAL_2]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_7:.*]] = tensor.dim %[[VAL_1]], %[[VAL_3]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_8:.*]] = tensor.empty(%[[VAL_6]], %[[VAL_7]]) : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 0 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_16:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_17:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_18:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:           %[[VAL_20:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:           %[[VAL_21:.*]] = scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_20]] step %[[VAL_3]] iter_args(%[[VAL_23:.*]] = %[[VAL_8]]) -> (tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_21:.*]] = scf.for %[[VAL_22:.*]] = %[[VAL_19]] to %[[VAL_20]] step %[[VAL_3]] iter_args(%[[VAL_23:.*]] = %[[VAL_8]]) -> (tensor<?x?xf32, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_24:.*]] = memref.load %[[VAL_10]]{{\[}}%[[VAL_22]]] : memref<?xindex>
-// CHECK:             %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]], %[[VAL_28:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]], %[[VAL_28:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<?x?xf32, #sparse{{[0-9]*}}> to memref<?xf32>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_29:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_22]]] : memref<?xindex>
 // CHECK:             %[[VAL_30:.*]] = arith.addi %[[VAL_22]], %[[VAL_3]] : index
 // CHECK:             %[[VAL_31:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<?xindex>
 // CHECK:             %[[VAL_32:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:             %[[VAL_33:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:             %[[VAL_34:.*]]:4 = scf.while (%[[VAL_35:.*]] = %[[VAL_29]], %[[VAL_36:.*]] = %[[VAL_32]], %[[VAL_37:.*]] = %[[VAL_28]], %[[VAL_38:.*]] = %[[VAL_23]]) : (index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> (index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_34:.*]]:4 = scf.while (%[[VAL_35:.*]] = %[[VAL_29]], %[[VAL_36:.*]] = %[[VAL_32]], %[[VAL_37:.*]] = %[[VAL_28]], %[[VAL_38:.*]] = %[[VAL_23]]) : (index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>) -> (index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_39:.*]] = arith.cmpi ult, %[[VAL_35]], %[[VAL_31]] : index
 // CHECK:               %[[VAL_40:.*]] = arith.cmpi ult, %[[VAL_36]], %[[VAL_33]] : index
 // CHECK:               %[[VAL_41:.*]] = arith.andi %[[VAL_39]], %[[VAL_40]] : i1
-// CHECK:               scf.condition(%[[VAL_41]]) %[[VAL_35]], %[[VAL_36]], %[[VAL_37]], %[[VAL_38]] : index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.condition(%[[VAL_41]]) %[[VAL_35]], %[[VAL_36]], %[[VAL_37]], %[[VAL_38]] : index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:             } do {
-// CHECK:             ^bb0(%[[VAL_42:.*]]: index, %[[VAL_43:.*]]: index, %[[VAL_44:.*]]: index, %[[VAL_45:.*]]: tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>):
+// CHECK:             ^bb0(%[[VAL_42:.*]]: index, %[[VAL_43:.*]]: index, %[[VAL_44:.*]]: index, %[[VAL_45:.*]]: tensor<?x?xf32, #sparse{{[0-9]*}}>):
 // CHECK:               %[[VAL_46:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_42]]] : memref<?xindex>
 // CHECK:               %[[VAL_47:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_43]]] : memref<?xindex>
 // CHECK:               %[[VAL_48:.*]] = arith.cmpi ult, %[[VAL_47]], %[[VAL_46]] : index
@@ -353,7 +353,7 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 // CHECK:               %[[VAL_50:.*]] = arith.cmpi eq, %[[VAL_46]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_51:.*]] = arith.cmpi eq, %[[VAL_47]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_52:.*]] = arith.andi %[[VAL_50]], %[[VAL_51]] : i1
-// CHECK:               %[[VAL_53:.*]]:2 = scf.if %[[VAL_52]] -> (index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:               %[[VAL_53:.*]]:2 = scf.if %[[VAL_52]] -> (index, tensor<?x?xf32, #sparse{{[0-9]*}}>) {
 // CHECK:                 %[[VAL_54:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_42]]] : memref<?xf32>
 // CHECK:                 %[[VAL_55:.*]] = memref.load %[[VAL_16]]{{\[}}%[[VAL_43]]] : memref<?xindex>
 // CHECK:                 %[[VAL_56:.*]] = arith.addi %[[VAL_43]], %[[VAL_3]] : index
@@ -377,9 +377,9 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 // CHECK:                   memref.store %[[VAL_65]], %[[VAL_25]]{{\[}}%[[VAL_61]]] : memref<?xf32>
 // CHECK:                   scf.yield %[[VAL_70:.*]] : index
 // CHECK:                 }
-// CHECK:                 scf.yield %[[VAL_71:.*]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_71:.*]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:               } else {
-// CHECK:                 scf.yield %[[VAL_44]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:                 scf.yield %[[VAL_44]], %[[VAL_45]] : index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:               }
 // CHECK:               %[[VAL_72:.*]] = arith.cmpi eq, %[[VAL_46]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_73:.*]] = arith.addi %[[VAL_42]], %[[VAL_3]] : index
@@ -387,13 +387,13 @@ func.func @sumred(%arga: tensor<?x?x?xi32, #SparseTensor>,
 // CHECK:               %[[VAL_75:.*]] = arith.cmpi eq, %[[VAL_47]], %[[VAL_49]] : index
 // CHECK:               %[[VAL_76:.*]] = arith.addi %[[VAL_43]], %[[VAL_3]] : index
 // CHECK:               %[[VAL_77:.*]] = arith.select %[[VAL_75]], %[[VAL_76]], %[[VAL_43]] : index
-// CHECK:               scf.yield %[[VAL_74]], %[[VAL_77]], %[[VAL_78:.*]]#0, %[[VAL_78]]#1 : index, index, index, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               scf.yield %[[VAL_74]], %[[VAL_77]], %[[VAL_78:.*]]#0, %[[VAL_78]]#1 : index, index, index, tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             %[[VAL_79:.*]] = sparse_tensor.compress %[[VAL_25]], %[[VAL_26]], %[[VAL_27]], %[[VAL_80:.*]]#2 into %[[VAL_80]]#3{{\[}}%[[VAL_24]]] : memref<?xf32>, memref<?xi1>, memref<?xindex>, tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_79]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_79:.*]] = sparse_tensor.compress %[[VAL_25]], %[[VAL_26]], %[[VAL_27]], %[[VAL_80:.*]]#2 into %[[VAL_80]]#3{{\[}}%[[VAL_24]]] : memref<?xf32>, memref<?xi1>, memref<?xindex>, tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_79]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_81:.*]] = sparse_tensor.load %[[VAL_82:.*]] hasInserts : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_81]] : tensor<?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_81:.*]] = sparse_tensor.load %[[VAL_82:.*]] hasInserts : tensor<?x?xf32, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_81]] : tensor<?x?xf32, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @matmat(%arga: tensor<?x?xf32, #DCSR>,
              %argb: tensor<?x?xf32, #DCSR>) -> tensor<?x?xf32, #DCSR> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir
index 80cfa3c..7cb6990 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_pack.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_pack.mlir
@@ -40,7 +40,7 @@ func.func @sparse_pack(%values: tensor<6xf64>, %pos:tensor<2xindex>, %coordinate
 // CHECK-SAME:      %[[VAL_0:.*]]: memref<?xindex>,
 // CHECK-SAME:      %[[VAL_1:.*]]: memref<?xi32>,
 // CHECK-SAME:      %[[VAL_2:.*]]: memref<?xf64>,
-// CHECK-SAME:      %[[VAL_3:.*]]: !sparse_tensor.storage_specifier<#sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_3:.*]]: !sparse_tensor.storage_specifier<#sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_4:.*]]: tensor<6xf64>,
 // CHECK-SAME:      %[[VAL_5:.*]]: tensor<2xindex>,
 // CHECK-SAME:      %[[VAL_6:.*]]: tensor<6x2xi32>) -> (tensor<6xf64>, tensor<2xindex>, tensor<6x2xi32>) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
index 8118eec..7a35e0f 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_parallel_reduce.mlir
@@ -15,7 +15,7 @@
   doc = "x(i) += A(i,j) * b(j)"
 }
 // CHECK-LABEL:  func.func @matvec(
-//  CHECK-SAME:    %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+//  CHECK-SAME:    %[[TMP_arg0:.*]]: tensor<16x32xf32, #sparse{{[0-9]*}}>,
 //  CHECK-SAME:    %[[TMP_arg1:.*]]: tensor<32xf32>,
 //  CHECK-SAME:    %[[TMP_arg2:.*]]: tensor<16xf32>) -> tensor<16xf32> {
 //   CHECK-DAG:  %[[TMP_c16:.*]] = arith.constant 16 : index
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
index 83cbfa7..e1e474e 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm.mlir
@@ -14,7 +14,7 @@
 }
 
 // CHECK-LABEL:   func @sparse_static_dims(
-// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<10x20x30xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<20x30x10xf32>) -> tensor<20x30x10xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 20 : index
@@ -23,7 +23,7 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]]
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<30x10x20xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<30x10x20xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<20x30x10xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_9]] : memref<20x30x10xf32>)
 // CHECK:           scf.for %[[VAL_10:.*]] = %[[VAL_5]] to %[[VAL_3]] step %[[VAL_6]] {
@@ -53,17 +53,17 @@ func.func @sparse_static_dims(%arga: tensor<10x20x30xf32, #X>,
 }
 
 // CHECK-LABEL:   func @sparse_dynamic_dims(
-// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:                          %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:                          %[[VAL_1:.*]]: tensor<?x?x?xf32>) -> tensor<?x?x?xf32> {
 // CHECK-DAG:       %[[ZERO:.*]] = arith.constant 0.000000e+00 : f32
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 2 : index
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 1 : index
 // CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]]
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<?x?x?xf32>
 // CHECK:           linalg.fill ins(%[[ZERO]] : f32) outs(%[[VAL_10]] : memref<?x?x?xf32>)
 // CHECK:           scf.for %[[VAL_11:.*]] = %[[VAL_3]] to %[[VAL_7]] step %[[VAL_4]] {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
index 1078e5c..3ec2c89 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_perm_lower.mlir
@@ -16,16 +16,16 @@
 }
 
 // CHECK-HIR-LABEL:   func @sparse_dynamic_dims(
-// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-HIR-SAME:      %[[VAL_0:.*]]: tensor<?x?x?xf32, #sparse{{[0-9]*}}>,
 // CHECK-HIR-SAME:      %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
 // CHECK-HIR-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
 // CHECK-HIR-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-HIR-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
 // CHECK-HIR:           %[[DEMAP:. *]] = sparse_tensor.reinterpret_map %[[VAL_0]]
-// CHECK-HIR-DAG:       %[[VAL_5:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-HIR-DAG:       %[[VAL_5:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_3]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_6:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_2]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_7:.*]] = sparse_tensor.lvl %[[DEMAP]], %[[VAL_4]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
+// CHECK-HIR-DAG:       %[[VAL_8:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<?x?x?xf32, #sparse{{[0-9]*}}>
 // CHECK-HIR-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-HIR:           %[[VAL_11:.*]] = tensor.extract %[[VAL_1]][] : tensor<f32>
 // CHECK-HIR:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_3]] to %[[VAL_5]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_11]]) -> (f32) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
index c4931c6..c6a3a36 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reinterpret_map.mlir
@@ -25,11 +25,11 @@
 // CHECK-LABEL: func @mul(
 // CHECK-SAME:  %[[A0:.*0]]: tensor<32x32xf32>,
 // CHECK-SAME:  %[[A1:.*1]]: tensor<32x32xf32>,
-// CHECK-SAME:  %[[A2:.*2]]: tensor<32x32xf32, #sparse_tensor.encoding<{{{.*}}}>>)
+// CHECK-SAME:  %[[A2:.*2]]: tensor<32x32xf32, #sparse{{[0-9]*}}>)
 // CHECK:       %[[T0:.*]] = sparse_tensor.reinterpret_map %[[A2]]
 // CHECK:       %[[T1:.*]] = linalg.generic {doc = {{.*}} indexing_maps = [#[[$map0]], #[[$map1]], #[[$map2]]], iterator_types = ["parallel", "parallel", "parallel", "parallel"]}
 // CHECK:       %[[T2:.*]] = sparse_tensor.reinterpret_map %[[T1]]
-// CHECK:       return %[[T2]] : tensor<32x32xf32, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:       return %[[T2]] : tensor<32x32xf32, #sparse{{[0-9]*}}>
 func.func @mul(%arg0: tensor<32x32xf32>,
                %arg1: tensor<32x32xf32>,
                %arg2: tensor<32x32xf32, #BSR>) -> tensor<32x32xf32, #BSR> {
@@ -56,17 +56,17 @@ func.func @mul(%arg0: tensor<32x32xf32>,
 }>
 
 // CHECK-LABEL:   func.func @sparse_foreach_reinterpret_map(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64
-// CHECK:           %[[VAL_1:.*]] = bufferization.alloc_tensor() : tensor<1x2x2x2xf64
-// CHECK:           %[[VAL_2:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] : tensor<2x4xf64
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_1:.*]] = bufferization.alloc_tensor() : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_2:.*]] = sparse_tensor.reinterpret_map %[[VAL_0]] : tensor<2x4xf64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_4:.*]] = sparse_tensor.foreach in %[[VAL_2]] init(%[[VAL_1]])
-// CHECK:           ^bb0(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index, %[[VAL_8:.*]]: index, %[[VAL_9:.*]]: f64, %[[VAL_10:.*]]: tensor<1x2x2x2xf64
-// CHECK:             %[[VAL_11:.*]] = sparse_tensor.insert %[[VAL_9]] into %[[VAL_10]]{{\[}}%[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]]] : tensor<1x2x2x2xf64
-// CHECK:             sparse_tensor.yield %[[VAL_11]] : tensor<1x2x2x2xf64
+// CHECK:           ^bb0(%[[VAL_5:.*]]: index, %[[VAL_6:.*]]: index, %[[VAL_7:.*]]: index, %[[VAL_8:.*]]: index, %[[VAL_9:.*]]: f64, %[[VAL_10:.*]]: tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:             %[[VAL_11:.*]] = sparse_tensor.insert %[[VAL_9]] into %[[VAL_10]]{{\[}}%[[VAL_5]], %[[VAL_6]], %[[VAL_7]], %[[VAL_8]]] : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:             sparse_tensor.yield %[[VAL_11]] : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<1x2x2x2xf64
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.load %[[VAL_12]] hasInserts : tensor<2x4xf64
-// CHECK:           return %[[VAL_13]] : tensor<2x4xf64
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<1x2x2x2xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.load %[[VAL_12]] hasInserts : tensor<2x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_13]] : tensor<2x4xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_foreach_reinterpret_map(%6 : tensor<2x4xf64, #BSR>) -> tensor<2x4xf64, #BSR> {
   %7 = bufferization.alloc_tensor() : tensor<2x4xf64, #BSR>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
index d3d6d8c..eea77c6 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_reshape.mlir
@@ -11,9 +11,9 @@
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @sparse_expand(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[E]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<100xf64, #sparse{{[0-9]*}}>) -> tensor<10x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<100xf64, #sparse{{[0-9]*}}> into tensor<10x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[E]] : tensor<10x10xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @sparse_expand(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -36,7 +36,7 @@
 // CHECK:         }
 // CHECK:         %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:     sparse_tensor.convert
-// CHECK:         return %[[NT1]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         return %[[NT1]] : tensor<10x10xf64, #sparse{{[0-9]*}}>
 //
 func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10xf64, #SparseMatrix> {
   %0 = tensor.expand_shape %arg0 [[0, 1]] :
@@ -48,9 +48,9 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @sparse_collapse(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[C]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x10xf64, #sparse{{[0-9]*}}>) -> tensor<100xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x10xf64, #sparse{{[0-9]*}}> into tensor<100xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[C]] : tensor<100xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @sparse_collapse(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -82,7 +82,7 @@ func.func @sparse_expand(%arg0: tensor<100xf64, #SparseVector>) -> tensor<10x10x
 // CHECK:         }
 // CHECK:        %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:    sparse_tensor.convert
-// CHECK:        return %[[NT1]] : tensor<100xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:        return %[[NT1]] : tensor<100xf64, #sparse{{[0-9]*}}>
 //
 func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<100xf64, #SparseVector> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] :
@@ -94,9 +94,9 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @dynamic_sparse_expand(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[E]] : tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<?xf64, #sparse{{[0-9]*}}>) -> tensor<?x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[E:.*]] = tensor.expand_shape %[[A]] {{\[\[}}0, 1]] : tensor<?xf64, #sparse{{[0-9]*}}> into tensor<?x10xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[E]] : tensor<?x10xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @dynamic_sparse_expand(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -125,7 +125,7 @@ func.func @sparse_collapse(%arg0: tensor<10x10xf64, #SparseMatrix>) -> tensor<10
 // CHECK:         }
 // CHECK:         %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:     sparse_tensor.convert
-// CHECK:         return %[[NT1]] : tensor<?x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         return %[[NT1]] : tensor<?x10xf64, #sparse{{[0-9]*}}>
 //
 func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<?x10xf64, #SparseMatrix> {
   %0 = tensor.expand_shape %arg0 [[0, 1]] :
@@ -137,9 +137,9 @@ func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<
 // roundtrip:
 //
 // CHECK-ROUND-LABEL: func.func @dynamic_sparse_collapse(
-// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x?xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x?xf64, #sparse_tensor.encoding<{{{.*}}}>> into tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
-//      CHECK-ROUND:  return %[[C]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-ROUND-SAME:  %[[A:.*]]: tensor<10x?xf64, #sparse{{[0-9]*}}>) -> tensor<?xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  %[[C:.*]] = tensor.collapse_shape %[[A]] {{\[\[}}0, 1]] : tensor<10x?xf64, #sparse{{[0-9]*}}> into tensor<?xf64, #sparse{{[0-9]*}}>
+//      CHECK-ROUND:  return %[[C]] : tensor<?xf64, #sparse{{[0-9]*}}>
 //
 // CHECK-LABEL:   func.func @dynamic_sparse_collapse(
 // CHECK-SAME:    %[[S:.*0]]:
@@ -176,7 +176,7 @@ func.func @dynamic_sparse_expand(%arg0: tensor<?xf64, #SparseVector>) -> tensor<
 // CHECK:        }
 // CHECK:        %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
 // CHECK-NOT:    sparse_tensor.convert
-// CHECK:        return %[[NT1]] : tensor<?xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:        return %[[NT1]] : tensor<?xf64, #sparse{{[0-9]*}}>
 //
 func.func @dynamic_sparse_collapse(%arg0: tensor<10x?xf64, #SparseMatrix>) -> tensor<?xf64, #SparseVector> {
   %0 = tensor.collapse_shape %arg0 [[0, 1]] :
diff --git a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
index 4ca577b..666882e 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_scalars.mlir
@@ -19,7 +19,7 @@
 }
 
 // CHECK-LABEL:   func @mul(
-// CHECK-SAME:              %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>>,
+// CHECK-SAME:              %[[VAL_0:.*0]]: tensor<32x16xf32, #sparse{{[0-9]*}}>,
 // CHECK-SAME:              %[[VAL_1:.*1]]: tensor<f32>,
 // CHECK-SAME:              %[[VAL_2:.*2]]: f32,
 // CHECK-SAME:              %[[VAL_3:.*3]]: f32,
@@ -28,11 +28,11 @@
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.addf %[[VAL_2]], %[[VAL_3]] : f32
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse_tensor.encoding<{{.*}}>> to memref<?xf32>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_10:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<32x16xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-DAG:       %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_1]] : memref<f32>
 // CHECK-DAG:       %[[VAL_15:.*]] = bufferization.to_memref %[[VAL_4]] : memref<32x16xf32>
 // CHECK-DAG:       %[[VAL_16:.*]] = memref.load %[[VAL_14]][] : memref<f32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
index 1d95fe8..6fc2db4 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm.mlir
@@ -55,7 +55,7 @@ func.func @fold_yield_direct_zero() -> tensor<32xf64> {
 }
 
 // CHECK-LABEL:   func.func @sampled_dd_unfused(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
@@ -66,11 +66,11 @@ func.func @fold_yield_direct_zero() -> tensor<32xf64> {
 // CHECK-DAG:       %[[VAL_8:.*]] = bufferization.alloc_tensor() copy(%[[VAL_6]]) {memory_space = 0 : i64} : tensor<8x8xf64>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
-// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{.*}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_16:.*]] = bufferization.to_memref %[[VAL_8]] : memref<8x8xf64>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -121,9 +121,9 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
 }
 
 // CHECK-LABEL:   func.func @sparse_sampled_dd_unfused(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
@@ -131,19 +131,19 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_8:.*]] = arith.constant dense<0.000000e+00> : tensor<8x8xf64>
 // CHECK-DAG:       %[[VAL_9:.*]] = bufferization.alloc_tensor() copy(%[[VAL_8]]) : tensor<8x8xf64>
-// CHECK-DAG:       %[[VAL_10:.*]] = tensor.empty() : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK-DAG:       %[[VAL_10:.*]] = tensor.empty() : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK-DAG:       %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK-DAG:       %[[VAL_12:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
-// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_15:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_16:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_17:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_18:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_19:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_20:.*]] = scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_22:.*]] = %[[VAL_10]]) -> (tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_20:.*]] = scf.for %[[VAL_21:.*]] = %[[VAL_18]] to %[[VAL_19]] step %[[VAL_5]] iter_args(%[[VAL_22:.*]] = %[[VAL_10]]) -> (tensor<8x8xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_23:.*]] = memref.load %[[VAL_14]]{{\[}}%[[VAL_21]]] : memref<?xindex>
-// CHECK:             %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]] = sparse_tensor.expand %[[VAL_10]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_24:.*]], %[[VAL_25:.*]], %[[VAL_26:.*]], %[[VAL_27:.*]] = sparse_tensor.expand %[[VAL_10]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_28:.*]] = scf.for %[[VAL_29:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] iter_args(%[[VAL_30:.*]] = %[[VAL_27]]) -> (index) {
 // CHECK:               %[[VAL_31:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_23]], %[[VAL_29]]] : memref<8x8xf64>
 // CHECK:               %[[VAL_32:.*]] = memref.load %[[VAL_15]]{{\[}}%[[VAL_21]]] : memref<?xindex>
@@ -172,11 +172,11 @@ func.func @sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
 // CHECK:               }
 // CHECK:               scf.yield %[[VAL_35]] : index
 // CHECK:             }
-// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_24]], %[[VAL_25]], %[[VAL_26]], %[[VAL_28]] into %[[VAL_22]]{{\[}}%[[VAL_23]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_24]], %[[VAL_25]], %[[VAL_26]], %[[VAL_28]] into %[[VAL_22]]{{\[}}%[[VAL_23]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_50:.*]] = sparse_tensor.load %[[VAL_20]] hasInserts : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_50]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_50:.*]] = sparse_tensor.load %[[VAL_20]] hasInserts : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_50]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
                                      %arga: tensor<8x8xf64>,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
index 79bedcf..5fa332f 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_sddmm_org.mlir
@@ -21,27 +21,27 @@
 }
 
 // CHECK-LABEL:   func.func @sparse_sampled_dd_unfused(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>,
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8x8xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x8xf64>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<8x8xf64>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant false
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant true
-// CHECK:           %[[VAL_8:.*]] = tensor.empty() : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_8:.*]] = tensor.empty() : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           %[[VAL_9:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
 // CHECK:           %[[VAL_10:.*]] = bufferization.to_memref %[[VAL_2]] : memref<8x8xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_14:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_15:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_16:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_4]]] : memref<?xindex>
 // CHECK:           %[[VAL_17:.*]] = memref.load %[[VAL_11]]{{\[}}%[[VAL_5]]] : memref<?xindex>
-// CHECK:           %[[VAL_18:.*]] = scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_20:.*]] = %[[VAL_8]]) -> (tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_18:.*]] = scf.for %[[VAL_19:.*]] = %[[VAL_16]] to %[[VAL_17]] step %[[VAL_5]] iter_args(%[[VAL_20:.*]] = %[[VAL_8]]) -> (tensor<8x8xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_21:.*]] = memref.load %[[VAL_12]]{{\[}}%[[VAL_19]]] : memref<?xindex>
-// CHECK:             %[[VAL_22:.*]], %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>, memref<?xi1>, memref<?xindex>
+// CHECK:             %[[VAL_22:.*]], %[[VAL_23:.*]], %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.expand %[[VAL_8]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>, memref<?xi1>, memref<?xindex>
 // CHECK:             %[[VAL_26:.*]] = scf.for %[[VAL_27:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] iter_args(%[[VAL_28:.*]] = %[[VAL_25]]) -> (index) {
 // CHECK:               %[[VAL_29:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_21]], %[[VAL_27]]] : memref<8x8xf64>
 // CHECK:               %[[VAL_30:.*]] = memref.load %[[VAL_13]]{{\[}}%[[VAL_19]]] : memref<?xindex>
@@ -70,11 +70,11 @@
 // CHECK:               } {"Emitted from" = "linalg.generic"}
 // CHECK:               scf.yield %[[VAL_48:.*]] : index
 // CHECK:             } {"Emitted from" = "linalg.generic"}
-// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_22]], %[[VAL_23]], %[[VAL_24]], %[[VAL_50:.*]] into %[[VAL_20]]{{\[}}%[[VAL_21]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             %[[VAL_49:.*]] = sparse_tensor.compress %[[VAL_22]], %[[VAL_23]], %[[VAL_24]], %[[VAL_50:.*]] into %[[VAL_20]]{{\[}}%[[VAL_21]]] : memref<?xf64>, memref<?xi1>, memref<?xindex>, tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:             scf.yield %[[VAL_49]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:           } {"Emitted from" = "linalg.generic"}
-// CHECK:           %[[VAL_51:.*]] = sparse_tensor.load %[[VAL_52:.*]] hasInserts : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_51]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_51:.*]] = sparse_tensor.load %[[VAL_52:.*]] hasInserts : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_51]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_sampled_dd_unfused(%args: tensor<8x8xf64, #SM>,
                                      %arga: tensor<8x8xf64>,
diff --git a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
index 339d65c..47d24eb 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_tensor_reshape.mlir
@@ -35,7 +35,7 @@
 // CHECK:            scf.yield %[[RET_1]]
 // CHECK:         }
 // CHECK:        %[[NT1:.*]] = sparse_tensor.load %[[RET]] hasInserts
-// CHECK:        return %[[NT1]] : tensor<10x10xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:        return %[[NT1]] : tensor<10x10xf64, #sparse{{[0-9]*}}>
 //
 func.func @sparse_reshape(%arg0: tensor<4x25xf64, #SparseMatrix>) -> tensor<10x10xf64, #SparseMatrix> {
   %shape = arith.constant dense <[ 10, 10 ]> : tensor<2xi32>
diff --git a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
index fd41fed..80a989d 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_transpose.mlir
@@ -16,34 +16,34 @@
 // TODO: improve auto-conversion followed by yield
 
 // CHECK-LABEL:   func.func @sparse_transpose_auto(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<3x4xf64, #sparse{{[0-9]*}}>) -> tensor<4x3xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant 1 : index
-// CHECK-DAG:       %[[VAL_3:.*]] = tensor.empty() : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.convert %[[VAL_0]] : tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<3x4xf64, #sparse_tensor.encoding<{{{.*}}}>> to tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:       %[[VAL_3:.*]] = tensor.empty() : tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_4:.*]] = sparse_tensor.convert %[[VAL_0]] : tensor<3x4xf64, #sparse{{[0-9]*}}> to tensor<3x4xf64, #sparse{{[0-9]*}}>
+// CHECK:           %[[DEMAP:.*]] = sparse_tensor.reinterpret_map %[[VAL_4]] : tensor<3x4xf64, #sparse{{[0-9]*}}> to tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK-DAG:       %[[VAL_5:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_6:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 0 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_7:.*]] = sparse_tensor.positions %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_8:.*]] = sparse_tensor.coordinates %[[DEMAP]] {level = 1 : index} : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:       %[[VAL_9:.*]] = sparse_tensor.values %[[DEMAP]] : tensor<4x3xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_10:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_1]]] : memref<?xindex>
 // CHECK:           %[[VAL_11:.*]] = memref.load %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:           %[[VAL_12:.*]] = scf.for %[[VAL_13:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_2]] iter_args(%[[VAL_14:.*]] = %[[VAL_3]]) -> (tensor<4x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:             %[[VAL_15:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_16:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_13]]] : memref<?xindex>
 // CHECK:             %[[VAL_17:.*]] = arith.addi %[[VAL_13]], %[[VAL_2]] : index
 // CHECK:             %[[VAL_18:.*]] = memref.load %[[VAL_7]]{{\[}}%[[VAL_17]]] : memref<?xindex>
-// CHECK:             %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_2]] iter_args(%[[VAL_21:.*]] = %[[VAL_14]]) -> (tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>) {
+// CHECK:             %[[VAL_19:.*]] = scf.for %[[VAL_20:.*]] = %[[VAL_16]] to %[[VAL_18]] step %[[VAL_2]] iter_args(%[[VAL_21:.*]] = %[[VAL_14]]) -> (tensor<4x3xf64, #sparse{{[0-9]*}}>) {
 // CHECK:               %[[VAL_22:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_20]]] : memref<?xindex>
 // CHECK:               %[[VAL_23:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_20]]] : memref<?xf64>
-// CHECK:               %[[VAL_24:.*]] = sparse_tensor.insert %[[VAL_23]] into %[[VAL_21]]{{\[}}%[[VAL_15]], %[[VAL_22]]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:               scf.yield %[[VAL_24]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:               %[[VAL_24:.*]] = sparse_tensor.insert %[[VAL_23]] into %[[VAL_21]]{{\[}}%[[VAL_15]], %[[VAL_22]]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK:               scf.yield %[[VAL_24]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:             }
-// CHECK:             scf.yield %[[VAL_25:.*]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:             scf.yield %[[VAL_25:.*]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:           }
-// CHECK:           %[[VAL_26:.*]] = sparse_tensor.load %[[VAL_27:.*]] hasInserts : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_26]] : tensor<4x3xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:           %[[VAL_26:.*]] = sparse_tensor.load %[[VAL_27:.*]] hasInserts : tensor<4x3xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_26]] : tensor<4x3xf64, #sparse{{[0-9]*}}>
 // CHECK:         }
 func.func @sparse_transpose_auto(%arga: tensor<3x4xf64, #DCSR>)
                                      -> tensor<4x3xf64, #DCSR> {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
index 269a492..e3508f1 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_chain.mlir
@@ -18,19 +18,19 @@
 //
 // CHECK-LABEL:   func.func @sparse_matrix_sum(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<f64>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>,
-// CHECK-SAME:      %[[VAL_2:.*]]: tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f64> {
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<64x32xf64, #sparse{{[0-9]*}}>,
+// CHECK-SAME:      %[[VAL_2:.*]]: tensor<64x32xf64, #sparse{{[0-9]*}}>) -> tensor<f64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant dense<0.000000e+00> : vector<8xf64>
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 64 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = arith.constant 1 : index
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
-// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_1]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
+// CHECK:           %[[VAL_11:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_12:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_13:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<64x32xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:           %[[VAL_14:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f64>
 // CHECK:           %[[VAL_15:.*]] = memref.load %[[VAL_14]][] : memref<f64>
 // CHECK:           %[[VAL_16:.*]] = scf.for %[[VAL_17:.*]] = %[[VAL_6]] to %[[VAL_5]] step %[[VAL_7]] iter_args(%[[VAL_18:.*]] = %[[VAL_15]]) -> (f64) {
diff --git a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
index 9bc24fd..c9d4329 100644
--- a/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
+++ b/mlir/test/Dialect/SparseTensor/sparse_vector_index.mlir
@@ -17,7 +17,7 @@
 }
 
 // CHECK-LABEL: func.func @sparse_index_1d_conj(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<8xi64> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse{{[0-9]*}}>) -> tensor<8xi64> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant dense<0> : vector<8xi64>
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xindex>
@@ -25,9 +25,9 @@
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 0 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.empty() : tensor<8xi64>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
 // CHECK:           linalg.fill ins(%[[VAL_4]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_5]]] : memref<?xindex>
@@ -59,7 +59,7 @@ func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8
 }
 
 // CHECK-LABEL: func.func @sparse_index_1d_disj(
-// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<8xi64> {
+// CHECK-SAME:      %[[VAL_0:.*]]: tensor<8xi64, #sparse{{[0-9]*}}>) -> tensor<8xi64> {
 // CHECK-DAG:       %[[VAL_1:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_2:.*]] = arith.constant dense<[0, 1, 2, 3, 4, 5, 6, 7]> : vector<8xindex>
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 0 : i64
@@ -67,9 +67,9 @@ func.func @sparse_index_1d_conj(%arga: tensor<8xi64, #SparseVector>) -> tensor<8
 // CHECK-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:       %[[VAL_6:.*]] = arith.constant true
 // CHECK-DAG:       %[[VAL_7:.*]] = tensor.empty() : tensor<8xi64>
-// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi64>
+// CHECK:           %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_0]] {level = 0 : index} : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK:           %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_0]] : tensor<8xi64, #sparse{{[0-9]*}}> to memref<?xi64>
 // CHECK:           %[[VAL_11:.*]] = bufferization.to_memref %[[VAL_7]] : memref<8xi64>
 // CHECK:           linalg.fill ins(%[[VAL_3]] : i64) outs(%[[VAL_11]] : memref<8xi64>)
 // CHECK:           %[[VAL_12:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
diff --git a/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir b/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir
index a421199..287b62e 100644
--- a/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir
+++ b/mlir/test/Dialect/SparseTensor/spy_sddmm.mlir
@@ -20,15 +20,15 @@
 // CHECK-LABEL: func.func @sparse_sampled_dd(
 // CHECK-SAME:    %[[VAL_0:.*0]]: tensor<8x8xf64>,
 // CHECK-SAME:    %[[VAL_1:.*1]]: tensor<8x8xf64>,
-// CHECK-SAME:    %[[VAL_2:.*2]]: tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> {
+// CHECK-SAME:    %[[VAL_2:.*2]]: tensor<8x8xf64, #sparse{{[0-9]*}}>) -> tensor<8x8xf64, #sparse{{[0-9]*}}> {
 // CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:     %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-DAG:     %[[VAL_5:.*]] = arith.constant 1 : index
 // CHECK-DAG:     %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<8x8xf64>
 // CHECK-DAG:     %[[VAL_7:.*]] = bufferization.to_memref %[[VAL_1]] : memref<8x8xf64>
-// CHECK-DAG:     %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:     %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-DAG:     %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf64>
+// CHECK-DAG:     %[[VAL_8:.*]] = sparse_tensor.positions %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:     %[[VAL_9:.*]] = sparse_tensor.coordinates %[[VAL_2]] {level = 1 : index} : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-DAG:     %[[VAL_10:.*]] = sparse_tensor.values %[[VAL_2]] : tensor<8x8xf64, #sparse{{[0-9]*}}> to memref<?xf64>
 // CHECK:         scf.for %[[VAL_11:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:           scf.for %[[VAL_12:.*]] = %[[VAL_4]] to %[[VAL_3]] step %[[VAL_5]] {
 // CHECK:             %[[VAL_13:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_11]]] : memref<?xindex>
@@ -45,8 +45,8 @@
 // CHECK:             } {"Emitted from" = "linalg.generic"}
 // CHECK:           } {"Emitted from" = "linalg.generic"}
 // CHECK:         } {"Emitted from" = "linalg.generic"}
-// CHECK:         %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_2]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
-// CHECK:           return %[[VAL_23]] : tensor<8x8xf64, #sparse_tensor.encoding<{{{.*}}}>>
+// CHECK:         %[[VAL_23:.*]] = sparse_tensor.load %[[VAL_2]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
+// CHECK:           return %[[VAL_23]] : tensor<8x8xf64, #sparse{{[0-9]*}}>
 // CHECK:       }
 func.func @sparse_sampled_dd(%argA: tensor<8x8xf64>,
                              %argB: tensor<8x8xf64>,
diff --git a/mlir/test/Dialect/SparseTensor/unused-tensor.mlir b/mlir/test/Dialect/SparseTensor/unused-tensor.mlir
index 330bb9f..f85acb9 100644
--- a/mlir/test/Dialect/SparseTensor/unused-tensor.mlir
+++ b/mlir/test/Dialect/SparseTensor/unused-tensor.mlir
@@ -21,7 +21,7 @@
 
 // CHECK-LABEL:   func.func @b_ununsed(
 // CHECK-SAME:      %[[VAL_0:.*]]: tensor<2x4xf64>,
-// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse_tensor.encoding<{{.*}}>>,
+// CHECK-SAME:      %[[VAL_1:.*]]: tensor<8x4xf64, #sparse{{[0-9]*}}>,
 // CHECK-SAME:      %[[VAL_2:.*]]: tensor<2x4xf64>) -> tensor<2x4xf64> {
 // CHECK-DAG:       %[[VAL_3:.*]] = arith.constant 8 : index
 // CHECK-DAG:       %[[VAL_4:.*]] = arith.constant 2 : index
diff --git a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
index ef3c0f8..d2b5132 100644
--- a/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
+++ b/mlir/test/Dialect/SparseTensor/vectorize_reduction.mlir
@@ -9,13 +9,13 @@
 
 // CHECK-ON-LABEL:   func.func @sparse_reduction_ori(
 // CHECK-ON-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-ON-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:       %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi13>
 // CHECK-ON-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-ON:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-ON:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i13>
 // CHECK-ON:           %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -37,11 +37,11 @@
 //
 // CHECK-OFF-LABEL:   func.func @sparse_reduction_ori(
 // CHECK-OFF-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-OFF-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-OFF:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-OFF:           %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i13>
 // CHECK-OFF:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -86,13 +86,13 @@ func.func @sparse_reduction_ori(%argx: tensor<i13>,
 
 // CHECK-ON-LABEL:   func.func @sparse_reduction_ori_accumulator_on_rhs(
 // CHECK-ON-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-ON-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:       %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi13>
 // CHECK-ON-DAG:       %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-ON:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-ON:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i13>
 // CHECK-ON:           %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -114,11 +114,11 @@ func.func @sparse_reduction_ori(%argx: tensor<i13>,
 //
 // CHECK-OFF-LABEL:   func.func @sparse_reduction_ori_accumulator_on_rhs(
 // CHECK-OFF-SAME:      %[[VAL_0:.*]]: tensor<i13>,
-// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i13> {
+// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi13, #sparse{{[0-9]*}}>) -> tensor<i13> {
 // CHECK-OFF-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi13>
+// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi13, #sparse{{[0-9]*}}> to memref<?xi13>
 // CHECK-OFF:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i13>
 // CHECK-OFF:           %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i13>
 // CHECK-OFF:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -161,13 +161,13 @@ func.func @sparse_reduction_ori_accumulator_on_rhs(%argx: tensor<i13>,
 //
 // CHECK-ON-LABEL:   func.func @sparse_reduction_subi(
 // CHECK-ON-SAME:      %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-ON-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-ON-DAG:       %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:       %[[VAL_3:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:       %[[VAL_4:.*]] = arith.constant dense<0> : vector<8xi32>
 // CHECK-ON-DAG:       %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-ON:           %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:           %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-ON:           %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-ON:           %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i32>
 // CHECK-ON:           %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
@@ -189,11 +189,11 @@ func.func @sparse_reduction_ori_accumulator_on_rhs(%argx: tensor<i13>,
 //
 // CHECK-OFF-LABEL:   func.func @sparse_reduction_subi(
 // CHECK-OFF-SAME:      %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-OFF-SAME:      %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-OFF-DAG:       %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:       %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-OFF:           %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:           %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-OFF:           %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-OFF:           %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i32>
 // CHECK-OFF:           %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -236,13 +236,13 @@ func.func @sparse_reduction_subi(%argx: tensor<i32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_xor(
 // CHECK-ON-SAME: %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-ON-SAME: %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-ON-SAME: %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-ON-DAG:  %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:  %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi32>
 // CHECK-ON-DAG:  %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:  %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:  %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:  %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-ON:  %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:  %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-ON:  %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-ON:  %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i32>
 // CHECK-ON:  %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -264,11 +264,11 @@ func.func @sparse_reduction_subi(%argx: tensor<i32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_xor(
 // CHECK-OFF-SAME:  %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -312,13 +312,13 @@ func.func @sparse_reduction_xor(%argx: tensor<i32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_addi(
 // CHECK-ON-SAME:   %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-ON-DAG:   %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:   %[[VAL_3:.*]] = arith.constant dense<0> : vector<8xi32>
 // CHECK-ON-DAG:   %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:   %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-ON:   %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-ON:   %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<i32>
 // CHECK-ON:   %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -340,11 +340,11 @@ func.func @sparse_reduction_xor(%argx: tensor<i32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_addi(
 // CHECK-OFF-SAME:   %[[VAL_0:.*]]: tensor<i32>,
-// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<i32> {
+// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xi32, #sparse{{[0-9]*}}>) -> tensor<i32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xi32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xi32, #sparse{{[0-9]*}}> to memref<?xi32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<i32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<i32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -388,13 +388,13 @@ func.func @sparse_reduction_addi(%argx: tensor<i32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_subf(
 // CHECK-ON-SAME:   %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-ON-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-ON-DAG:   %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:   %[[VAL_3:.*]] = arith.constant dense<0.000000e+00> : vector<8xf32>
 // CHECK-ON-DAG:   %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:   %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-ON:   %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-ON:   %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<f32>
 // CHECK-ON:   %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -416,11 +416,11 @@ func.func @sparse_reduction_addi(%argx: tensor<i32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_subf(
 // CHECK-OFF-SAME:   %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-OFF-SAME:   %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<f32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
@@ -464,13 +464,13 @@ func.func @sparse_reduction_subf(%argx: tensor<f32>,
 
 // CHECK-ON-LABEL: func.func @sparse_reduction_addf(
 // CHECK-ON-SAME:  %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-ON-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-ON-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-ON-DAG:   %[[VAL_2:.*]] = arith.constant 8 : index
 // CHECK-ON-DAG:   %[[VAL_3:.*]] = arith.constant dense<0.000000e+00> : vector<8xf32>
 // CHECK-ON-DAG:   %[[VAL_4:.*]] = arith.constant 0 : index
 // CHECK-ON-DAG:   %[[VAL_5:.*]] = arith.constant 1 : index
-// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-ON:   %[[VAL_6:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-ON:   %[[VAL_7:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-ON:   %[[VAL_8:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-ON:   %[[VAL_9:.*]] = memref.load %[[VAL_8]][] : memref<f32>
 // CHECK-ON:   %[[VAL_10:.*]] = memref.load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
@@ -492,11 +492,11 @@ func.func @sparse_reduction_subf(%argx: tensor<f32>,
 //
 // CHECK-OFF-LABEL: func.func @sparse_reduction_addf(
 // CHECK-OFF-SAME:  %[[VAL_0:.*]]: tensor<f32>,
-// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>>) -> tensor<f32> {
+// CHECK-OFF-SAME:  %[[VAL_1:.*]]: tensor<?xf32, #sparse{{[0-9]*}}>) -> tensor<f32> {
 // CHECK-OFF-DAG:   %[[VAL_2:.*]] = arith.constant 0 : index
 // CHECK-OFF-DAG:   %[[VAL_3:.*]] = arith.constant 1 : index
-// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xindex>
-// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse_tensor.encoding<{{{.*}}}>> to memref<?xf32>
+// CHECK-OFF:   %[[VAL_4:.*]] = sparse_tensor.positions %[[VAL_1]] {level = 0 : index} : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xindex>
+// CHECK-OFF:   %[[VAL_5:.*]] = sparse_tensor.values %[[VAL_1]] : tensor<?xf32, #sparse{{[0-9]*}}> to memref<?xf32>
 // CHECK-OFF:   %[[VAL_6:.*]] = bufferization.to_memref %[[VAL_0]] : memref<f32>
 // CHECK-OFF:   %[[VAL_7:.*]] = memref.load %[[VAL_6]][] : memref<f32>
 // CHECK-OFF:   %[[VAL_8:.*]] = memref.load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
-- 
cgit v1.1


From cb678708e620c21aaf4bb75823b18943937902d7 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Thu, 16 Nov 2023 16:26:43 -0800
Subject: [SLP][NFC]Add TreeEntry-based add member functions and use them,
 where possible, NFC.

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3af683d..d7967175 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -10276,6 +10276,16 @@ public:
         VecTy, PoisonValue::get(PointerType::getUnqual(VecTy->getContext())),
         MaybeAlign());
   }
+  /// Adds 2 input vectors (in form of tree entries) and the mask for their
+  /// shuffling.
+  void add(const TreeEntry &E1, const TreeEntry &E2, ArrayRef<int> Mask) {
+    add(E1.VectorizedValue, E2.VectorizedValue, Mask);
+  }
+  /// Adds single input vector (in form of tree entry) and the mask for its
+  /// shuffling.
+  void add(const TreeEntry &E1, ArrayRef<int> Mask) {
+    add(E1.VectorizedValue, Mask);
+  }
   /// Adds 2 input vectors and the mask for their shuffling.
   void add(Value *V1, Value *V2, ArrayRef<int> Mask) {
     assert(V1 && V2 && !Mask.empty() && "Expected non-empty input vectors.");
@@ -10690,7 +10700,7 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
             Mask[I] = FrontTE->findLaneForValue(V);
           }
         }
-        ShuffleBuilder.add(FrontTE->VectorizedValue, Mask);
+        ShuffleBuilder.add(*FrontTE, Mask);
         Res = ShuffleBuilder.finalize(E->getCommonMask());
         return Res;
       }
@@ -10868,17 +10878,14 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
         VecMask.assign(VecMask.size(), PoisonMaskElem);
         copy(SubMask, std::next(VecMask.begin(), I * SliceSize));
         if (TEs.size() == 1) {
-          IsUsedInExpr &= FindReusedSplat(
-              VecMask,
-              cast<FixedVectorType>(TEs.front()->VectorizedValue->getType())
-                  ->getNumElements());
-          ShuffleBuilder.add(TEs.front()->VectorizedValue, VecMask);
+          IsUsedInExpr &=
+              FindReusedSplat(VecMask, TEs.front()->getVectorFactor());
+          ShuffleBuilder.add(*TEs.front(), VecMask);
           IsNonPoisoned &=
               isGuaranteedNotToBePoison(TEs.front()->VectorizedValue);
         } else {
           IsUsedInExpr = false;
-          ShuffleBuilder.add(TEs.front()->VectorizedValue,
-                             TEs.back()->VectorizedValue, VecMask);
+          ShuffleBuilder.add(*TEs.front(), *TEs.back(), VecMask);
           IsNonPoisoned &=
               isGuaranteedNotToBePoison(TEs.front()->VectorizedValue) &&
               isGuaranteedNotToBePoison(TEs.back()->VectorizedValue);
-- 
cgit v1.1


From c6f7b631a9c0757130a8a7bb6b0ccc10da340e42 Mon Sep 17 00:00:00 2001
From: Jakub Kuderski <jakub@nod-labs.com>
Date: Thu, 16 Nov 2023 19:34:00 -0500
Subject: [mlir][spirv] Fix VectorShuffle assembly format (#72568)

Align with the rest of the spirv dialect by using a functional type
syntax.

Regex for updating existing code:
`spirv\.VectorShuffle (\[.+\]) (%[^:]+): ([^,]+), (%[^:]+): ([^\s]+) ->(.+)`
 ==>
`spirv.VectorShuffle $1 $2, $4 : $3, $5 ->$6`
---
 mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td | 13 ++++++-------
 mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir  |  4 ++--
 mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir |  6 +++---
 mlir/test/Dialect/SPIRV/IR/composite-ops.mlir           |  8 ++++----
 mlir/test/Target/SPIRV/composite-op.mlir                |  4 ++--
 5 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td
index 33078b7..74fbea6 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVCompositeOps.td
@@ -308,8 +308,8 @@ def SPIRV_VectorShuffleOp : SPIRV_Op<"VectorShuffle", [
     Vector 1 and Vector 2 must both have vector types, with the same
     Component Type as Result Type. They do not have to have the same number
     of components as Result Type or with each other. They are logically
-    concatenated, forming a single vector with Vector 1’s components
-    appearing before Vector 2’s. The components of this logical vector are
+    concatenated, forming a single vector with Vector 1's components
+    appearing before Vector 2's. The components of this logical vector are
     logically numbered with a single consecutive set of numbers from 0 to N
     - 1, where N is the total number of components.
 
@@ -331,9 +331,8 @@ def SPIRV_VectorShuffleOp : SPIRV_Op<"VectorShuffle", [
     #### Example:
 
     ```mlir
-    %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32]
-                           %vector1: vector<4xf32>, %vector2: vector<2xf32>
-                        -> vector<3xf32>
+    %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32] %vector1, %vector2 :
+      vector<4xf32>, vector<2xf32> -> vector<3xf32>
     ```
   }];
 
@@ -348,8 +347,8 @@ def SPIRV_VectorShuffleOp : SPIRV_Op<"VectorShuffle", [
   );
 
   let assemblyFormat = [{
-    attr-dict $components $vector1 `:` type($vector1) `,`
-                          $vector2 `:` type($vector2) `->` type($result)
+    attr-dict $components $vector1 `,` $vector2 `:`
+      type($vector1) `,` type($vector2) `->` type($result)
   }];
 }
 
diff --git a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
index 13bde6e..9fe1e53 100644
--- a/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
+++ b/mlir/test/Conversion/SPIRVToLLVM/misc-ops-to-llvm.mlir
@@ -65,7 +65,7 @@ spirv.func @select_vector(%arg0: vector<2xi1>, %arg1: vector<2xi32>) "None" {
 spirv.func @vector_shuffle_same_size(%vector1: vector<2xf32>, %vector2: vector<2xf32>) -> vector<3xf32> "None" {
   //      CHECK: %[[res:.*]] = llvm.shufflevector {{.*}} [0, 2, -1] : vector<2xf32>
   // CHECK-NEXT: return %[[res]] : vector<3xf32>
-  %0 = spirv.VectorShuffle [0: i32, 2: i32, 0xffffffff: i32] %vector1: vector<2xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [0: i32, 2: i32, 0xffffffff: i32] %vector1, %vector2 : vector<2xf32>, vector<2xf32> -> vector<3xf32>
   spirv.ReturnValue %0: vector<3xf32>
 }
 
@@ -80,7 +80,7 @@ spirv.func @vector_shuffle_different_size(%vector1: vector<3xf32>, %vector2: vec
   // CHECK-NEXT: %[[EXT1:.*]] = llvm.extractelement {{.*}}[%[[C1_1]] : i32] : vector<2xf32>
   // CHECK-NEXT: %[[RES:.*]] = llvm.insertelement %[[EXT1]], %[[INSERT0]][%[[C1_0]] : i32] : vector<3xf32>
   // CHECK-NEXT: llvm.return %[[RES]] : vector<3xf32>
-  %0 = spirv.VectorShuffle [0: i32, 4: i32, 0xffffffff: i32] %vector1: vector<3xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [0: i32, 4: i32, 0xffffffff: i32] %vector1, %vector2 : vector<3xf32>, vector<2xf32> -> vector<3xf32>
   spirv.ReturnValue %0: vector<3xf32>
 }
 
diff --git a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
index eba763e..6265a05 100644
--- a/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
+++ b/mlir/test/Conversion/VectorToSPIRV/vector-to-spirv.mlir
@@ -266,7 +266,7 @@ func.func @extract_element_0d_vector(%arg0 : f32) -> f32 {
 
 // CHECK-LABEL: @extract_strided_slice
 //  CHECK-SAME: %[[ARG:.+]]: vector<4xf32>
-//       CHECK:   spirv.VectorShuffle [1 : i32, 2 : i32] %[[ARG]] : vector<4xf32>, %[[ARG]] : vector<4xf32> -> vector<2xf32>
+//       CHECK:   spirv.VectorShuffle [1 : i32, 2 : i32] %[[ARG]], %[[ARG]] : vector<4xf32>, vector<4xf32> -> vector<2xf32>
 //       CHECK:   spirv.CompositeExtract %[[ARG]][1 : i32] : vector<4xf32>
 func.func @extract_strided_slice(%arg0: vector<4xf32>) -> (vector<2xf32>, vector<1xf32>) {
   %0 = vector.extract_strided_slice %arg0 {offsets = [1], sizes = [2], strides = [1]} : vector<4xf32> to vector<2xf32>
@@ -339,7 +339,7 @@ func.func @insert_element_0d_vector(%scalar: f32, %vector : vector<f32>) -> vect
 
 // CHECK-LABEL: @insert_strided_slice
 //  CHECK-SAME: %[[PART:.+]]: vector<2xf32>, %[[ALL:.+]]: vector<4xf32>
-//       CHECK:   spirv.VectorShuffle [0 : i32, 4 : i32, 5 : i32, 3 : i32] %[[ALL]] : vector<4xf32>, %[[PART]] : vector<2xf32> -> vector<4xf32>
+//       CHECK:   spirv.VectorShuffle [0 : i32, 4 : i32, 5 : i32, 3 : i32] %[[ALL]], %[[PART]] : vector<4xf32>, vector<2xf32> -> vector<4xf32>
 func.func @insert_strided_slice(%arg0: vector<2xf32>, %arg1: vector<4xf32>) -> vector<4xf32> {
   %0 = vector.insert_strided_slice %arg0, %arg1 {offsets = [1], strides = [1]} : vector<2xf32> into vector<4xf32>
   return %0 : vector<4xf32>
@@ -425,7 +425,7 @@ func.func @shuffle_index_vector(%v0 : vector<1xindex>, %v1: vector<1xindex>) ->
 
 // CHECK-LABEL:  func @shuffle
 //  CHECK-SAME:  %[[V0:.+]]: vector<3xf32>, %[[V1:.+]]: vector<3xf32>
-//       CHECK:    spirv.VectorShuffle [3 : i32, 2 : i32, 5 : i32, 1 : i32] %[[V0]] : vector<3xf32>, %[[V1]] : vector<3xf32> -> vector<4xf32>
+//       CHECK:    spirv.VectorShuffle [3 : i32, 2 : i32, 5 : i32, 1 : i32] %[[V0]], %[[V1]] : vector<3xf32>, vector<3xf32> -> vector<4xf32>
 func.func @shuffle(%v0 : vector<3xf32>, %v1: vector<3xf32>) -> vector<4xf32> {
   %shuffle = vector.shuffle %v0, %v1 [3, 2, 5, 1] : vector<3xf32>, vector<3xf32>
   return %shuffle : vector<4xf32>
diff --git a/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir b/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir
index 2891513..b10677f 100644
--- a/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/composite-ops.mlir
@@ -338,8 +338,8 @@ func.func @vector_dynamic_insert(%val: f32, %vec: vector<4xf32>, %id : i32) -> v
 //===----------------------------------------------------------------------===//
 
 func.func @vector_shuffle(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> {
-  // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}} : vector<4xf32>, %arg1 : vector<2xf32> -> vector<3xf32>
-  %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}}, %arg1 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
   return %0: vector<3xf32>
 }
 
@@ -347,7 +347,7 @@ func.func @vector_shuffle(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> v
 
 func.func @vector_shuffle_extra_selector(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> {
   // expected-error @+1 {{result type element count (3) mismatch with the number of component selectors (4)}}
-  %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32, 2: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [1: i32, 3: i32, 5: i32, 2: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
   return %0: vector<3xf32>
 }
 
@@ -355,6 +355,6 @@ func.func @vector_shuffle_extra_selector(%vector1: vector<4xf32>, %vector2: vect
 
 func.func @vector_shuffle_extra_selector(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> {
   // expected-error @+1 {{component selector 7 out of range: expected to be in [0, 6) or 0xffffffff}}
-  %0 = spirv.VectorShuffle [1: i32, 7: i32, 5: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+  %0 = spirv.VectorShuffle [1: i32, 7: i32, 5: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
   return %0: vector<3xf32>
 }
diff --git a/mlir/test/Target/SPIRV/composite-op.mlir b/mlir/test/Target/SPIRV/composite-op.mlir
index 5a31216..5f302fd 100644
--- a/mlir/test/Target/SPIRV/composite-op.mlir
+++ b/mlir/test/Target/SPIRV/composite-op.mlir
@@ -22,8 +22,8 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Shader], []> {
     spirv.ReturnValue %0: vector<4xf32>
   }
   spirv.func @vector_shuffle(%vector1: vector<4xf32>, %vector2: vector<2xf32>) -> vector<3xf32> "None" {
-    // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}} : vector<4xf32>, %arg1 : vector<2xf32> -> vector<3xf32>
-    %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1: vector<4xf32>, %vector2: vector<2xf32> -> vector<3xf32>
+    // CHECK: %{{.+}} = spirv.VectorShuffle [1 : i32, 3 : i32, -1 : i32] %{{.+}}, %arg1 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
+    %0 = spirv.VectorShuffle [1: i32, 3: i32, 0xffffffff: i32] %vector1, %vector2 : vector<4xf32>, vector<2xf32> -> vector<3xf32>
     spirv.ReturnValue %0: vector<3xf32>
   }
 }
-- 
cgit v1.1


From 9365ed1e10e92c48ad3dbe4b257b0fdc045b74a3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?=
 =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?=
 =?UTF-8?q?=E3=83=B3=29?= <clementval@gmail.com>
Date: Thu, 16 Nov 2023 16:41:50 -0800
Subject: [flang][openacc] Add ability to link acc.declare_enter with
 acc.declare_exit ops (#72476)

---
 flang/lib/Lower/OpenACC.cpp                        | 45 ++++++++++++++--------
 flang/test/Lower/OpenACC/HLFIR/acc-declare.f90     | 28 +++++++-------
 mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td    |  8 +++-
 .../mlir/Dialect/OpenACC/OpenACCOpsTypes.td        |  9 +++++
 mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp            | 10 +++--
 5 files changed, 64 insertions(+), 36 deletions(-)

diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index e470154..8c6c222 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -163,7 +163,8 @@ static void createDeclareAllocFuncWithArg(mlir::OpBuilder &modBuilder,
       builder, loc, boxAddrOp.getResult(), asFortran, bounds,
       /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType());
   builder.create<mlir::acc::DeclareEnterOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
+      mlir::ValueRange(entryOp.getAccPtr()));
 
   modBuilder.setInsertionPointAfter(registerFuncOp);
   builder.restoreInsertionPoint(crtInsPt);
@@ -195,7 +196,7 @@ static void createDeclareDeallocFuncWithArg(
           /*structured=*/false, /*implicit=*/false, clause,
           boxAddrOp.getType());
   builder.create<mlir::acc::DeclareExitOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));
 
   mlir::Value varPtr;
   if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
@@ -2762,7 +2763,13 @@ static void createDeclareGlobalOp(mlir::OpBuilder &modBuilder,
   EntryOp entryOp = createDataEntryOp<EntryOp>(
       builder, loc, addrOp.getResTy(), asFortran, bounds,
       /*structured=*/false, implicit, clause, addrOp.getResTy().getType());
-  builder.create<DeclareOp>(loc, mlir::ValueRange(entryOp.getAccPtr()));
+  if constexpr (std::is_same_v<DeclareOp, mlir::acc::DeclareEnterOp>)
+    builder.create<DeclareOp>(
+        loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
+        mlir::ValueRange(entryOp.getAccPtr()));
+  else
+    builder.create<DeclareOp>(loc, mlir::Value{},
+                              mlir::ValueRange(entryOp.getAccPtr()));
   mlir::Value varPtr;
   if constexpr (std::is_same_v<GlobalOp, mlir::acc::GlobalDestructorOp>) {
     builder.create<ExitOp>(entryOp.getLoc(), entryOp.getAccPtr(), varPtr,
@@ -2812,7 +2819,8 @@ static void createDeclareAllocFunc(mlir::OpBuilder &modBuilder,
       builder, loc, boxAddrOp.getResult(), asFortran, bounds,
       /*structured=*/false, /*implicit=*/false, clause, boxAddrOp.getType());
   builder.create<mlir::acc::DeclareEnterOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::acc::DeclareTokenType::get(entryOp.getContext()),
+      mlir::ValueRange(entryOp.getAccPtr()));
 
   modBuilder.setInsertionPointAfter(registerFuncOp);
 }
@@ -2850,7 +2858,7 @@ static void createDeclareDeallocFunc(mlir::OpBuilder &modBuilder,
           boxAddrOp.getType());
 
   builder.create<mlir::acc::DeclareExitOp>(
-      loc, mlir::ValueRange(entryOp.getAccPtr()));
+      loc, mlir::Value{}, mlir::ValueRange(entryOp.getAccPtr()));
 
   mlir::Value varPtr;
   if constexpr (std::is_same_v<ExitOp, mlir::acc::CopyoutOp> ||
@@ -3092,34 +3100,37 @@ genDeclareInFunction(Fortran::lower::AbstractConverter &converter,
 
   mlir::func::FuncOp funcOp = builder.getFunction();
   auto ops = funcOp.getOps<mlir::acc::DeclareEnterOp>();
+  mlir::Value declareToken;
   if (ops.empty()) {
-    builder.create<mlir::acc::DeclareEnterOp>(loc, dataClauseOperands);
+    declareToken = builder.create<mlir::acc::DeclareEnterOp>(
+        loc, mlir::acc::DeclareTokenType::get(builder.getContext()),
+        dataClauseOperands);
   } else {
     auto declareOp = *ops.begin();
     auto newDeclareOp = builder.create<mlir::acc::DeclareEnterOp>(
-        loc, declareOp.getDataClauseOperands());
+        loc, mlir::acc::DeclareTokenType::get(builder.getContext()),
+        declareOp.getDataClauseOperands());
     newDeclareOp.getDataClauseOperandsMutable().append(dataClauseOperands);
+    declareToken = newDeclareOp.getToken();
     declareOp.erase();
   }
 
   openAccCtx.attachCleanup([&builder, loc, createEntryOperands,
                             copyEntryOperands, copyoutEntryOperands,
-                            deviceResidentEntryOperands]() {
+                            deviceResidentEntryOperands, declareToken]() {
     llvm::SmallVector<mlir::Value> operands;
     operands.append(createEntryOperands);
     operands.append(deviceResidentEntryOperands);
     operands.append(copyEntryOperands);
     operands.append(copyoutEntryOperands);
 
-    if (!operands.empty()) {
-      mlir::func::FuncOp funcOp = builder.getFunction();
-      auto ops = funcOp.getOps<mlir::acc::DeclareExitOp>();
-      if (ops.empty()) {
-        builder.create<mlir::acc::DeclareExitOp>(loc, operands);
-      } else {
-        auto declareOp = *ops.begin();
-        declareOp.getDataClauseOperandsMutable().append(operands);
-      }
+    mlir::func::FuncOp funcOp = builder.getFunction();
+    auto ops = funcOp.getOps<mlir::acc::DeclareExitOp>();
+    if (ops.empty()) {
+      builder.create<mlir::acc::DeclareExitOp>(loc, declareToken, operands);
+    } else {
+      auto declareOp = *ops.begin();
+      declareOp.getDataClauseOperandsMutable().append(operands);
     }
 
     genDataExitOperations<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
diff --git a/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90 b/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90
index 489f81d..b0a78fb 100644
--- a/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90
+++ b/flang/test/Lower/OpenACC/HLFIR/acc-declare.f90
@@ -24,11 +24,11 @@ module acc_declare
 ! ALL: %[[BOUND:.*]] = acc.bounds   lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
 ! FIR: %[[COPYIN:.*]] = acc.copyin varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! HLFIR: %[[COPYIN:.*]] = acc.copyin varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copy>, name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
 
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}} = %{{.*}}) -> (index, i32) {
 ! ALL: }
-! ALL: acc.declare_exit dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)
 ! FIR: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>)   bounds(%[[BOUND]]) to varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) {dataClause = #acc<data_clause acc_copy>, name = "a"}
 ! HLFIR: acc.copyout accPtr(%[[COPYIN]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) to varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>) {dataClause = #acc<data_clause acc_copy>, name = "a"}
 
@@ -51,11 +51,11 @@ module acc_declare
 ! ALL: %[[BOUND:.*]] = acc.bounds   lowerbound(%{{.*}} : index) upperbound(%{{.*}} : index) extent(%{{.*}} : index) stride(%[[C1]] : index) startIdx(%[[C1]] : index)
 ! FIR: %[[CREATE:.*]] = acc.create varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
 ! HLFIR: %[[CREATE:.*]] = acc.create varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>)   bounds(%[[BOUND]]) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%{{.*}} = %{{.*}}) -> (index, i32) {
 ! ALL: }
-! ALL: acc.declare_exit dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: acc.delete accPtr(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>) bounds(%[[BOUND]]) {dataClause = #acc<data_clause acc_create>, name = "a"}
 ! ALL: return
 
@@ -119,10 +119,10 @@ module acc_declare
 ! HLFIR: %[[ADECL:.*]]:2 = hlfir.declare %[[A]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_copyout>, uniq_name = "_QMacc_declareFacc_declare_copyoutEa"} : (!fir.ref<!fir.array<100xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xi32>>, !fir.ref<!fir.array<100xi32>>)
 ! FIR: %[[CREATE:.*]] = acc.create varPtr(%[[ADECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copyout>, name = "a"}
 ! HLFIR: %[[CREATE:.*]] = acc.create varPtr(%[[ADECL]]#1 : !fir.ref<!fir.array<100xi32>>)   bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {dataClause = #acc<data_clause acc_copyout>, name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%arg{{.*}} = %{{.*}}) -> (index, i32)
 
-! ALL: acc.declare_exit dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)
 ! FIR: acc.copyout accPtr(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) to varPtr(%[[ADECL]] : !fir.ref<!fir.array<100xi32>>) {name = "a"}
 ! HLFIR: acc.copyout accPtr(%[[CREATE]] : !fir.ref<!fir.array<100xi32>>)   bounds(%{{.*}}) to varPtr(%[[ADECL]]#1 : !fir.ref<!fir.array<100xi32>>) {name = "a"}
 ! ALL: return
@@ -178,9 +178,9 @@ module acc_declare
 ! HLFIR: %[[DECL:.*]]:2 = hlfir.declare %[[ARG0]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_declare_device_resident>, uniq_name = "_QMacc_declareFacc_declare_device_residentEa"} : (!fir.ref<!fir.array<100xi32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xi32>>, !fir.ref<!fir.array<100xi32>>)
 ! FIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
 ! HLFIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xi32>>)   bounds(%{{.*}}) -> !fir.ref<!fir.array<100xi32>> {name = "a"}
-! ALL: acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: %{{.*}}:2 = fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} iter_args(%arg{{.*}} = %{{.*}}) -> (index, i32)
-! ALL: acc.declare_exit dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>)
 ! ALL: acc.delete accPtr(%[[DEVICERES]] : !fir.ref<!fir.array<100xi32>>) bounds(%{{.*}}) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "a"}
 
   subroutine acc_declare_device_resident2()
@@ -195,8 +195,8 @@ module acc_declare
 ! HLFIR: %[[DECL:.*]]:2 = hlfir.declare %[[ALLOCA]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_declare_device_resident>, uniq_name = "_QMacc_declareFacc_declare_device_resident2Edataparam"} : (!fir.ref<!fir.array<100xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<100xf32>>, !fir.ref<!fir.array<100xf32>>)
 ! FIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "dataparam"}
 ! HLFIR: %[[DEVICERES:.*]] = acc.declare_device_resident varPtr(%[[DECL]]#1 : !fir.ref<!fir.array<100xf32>>)   bounds(%{{.*}}) -> !fir.ref<!fir.array<100xf32>> {name = "dataparam"}
-! ALL: acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
-! ALL: acc.declare_exit dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
+! ALL: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
+! ALL: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>)
 ! ALL: acc.delete accPtr(%[[DEVICERES]] : !fir.ref<!fir.array<100xf32>>) bounds(%{{.*}}) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "dataparam"}
 
   subroutine acc_declare_link2()
@@ -234,10 +234,10 @@ module acc_declare
 
 ! ALL-LABEL: func.func @_QMacc_declarePacc_declare_in_func() -> f32 {
 ! HLFIR: %[[DEVICE_RESIDENT:.*]] = acc.declare_device_resident varPtr(%{{.*}}#1 : !fir.ref<!fir.array<1024xf32>>) bounds(%{{.*}}) -> !fir.ref<!fir.array<1024xf32>> {name = "a"}
-! HLFIR: acc.declare_enter dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
 
 ! HLFIR: %[[LOAD:.*]] = fir.load %{{.*}}#1 : !fir.ref<f32>
-! HLFIR: acc.declare_exit dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR: acc.delete accPtr(%[[DEVICE_RESIDENT]] : !fir.ref<!fir.array<1024xf32>>) bounds(%6) {dataClause = #acc<data_clause acc_declare_device_resident>, name = "a"}
 ! HLFIR: return %[[LOAD]] : f32
 ! ALL: }
@@ -254,10 +254,10 @@ module acc_declare
 ! HLFIR: %[[ALLOCA_A:.*]] = fir.alloca !fir.array<1024xf32> {bindc_name = "a", uniq_name = "_QMacc_declareFacc_declare_in_func2Ea"}
 ! HLFIR: %[[DECL_A:.*]]:2 = hlfir.declare %[[ALLOCA_A]](%{{.*}}) {acc.declare = #acc.declare<dataClause =  acc_create>, uniq_name = "_QMacc_declareFacc_declare_in_func2Ea"} : (!fir.ref<!fir.array<1024xf32>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xf32>>, !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR: %[[CREATE:.*]] = acc.create varPtr(%[[DECL_A]]#1 : !fir.ref<!fir.array<1024xf32>>) bounds(%7) -> !fir.ref<!fir.array<1024xf32>> {name = "a"}
-! HLFIR: acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: %[[TOKEN:.*]] = acc.declare_enter dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR:   cf.br ^bb1
 ! HLFIR: ^bb1:
-! HLFIR: acc.declare_exit dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
+! HLFIR: acc.declare_exit token(%[[TOKEN]]) dataOperands(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>)
 ! HLFIR: acc.delete accPtr(%[[CREATE]] : !fir.ref<!fir.array<1024xf32>>) bounds(%7) {dataClause = #acc<data_clause acc_create>, name = "a"}
 ! ALL:   return %{{.*}} : f32
 ! ALL: }
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index d0b52a0..391e77e 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1430,6 +1430,7 @@ def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter", []> {
   }];
 
   let arguments = (ins Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+  let results = (outs OpenACC_DeclareTokenType:$token);
 
   let assemblyFormat = [{
     oilist(
@@ -1441,7 +1442,7 @@ def OpenACC_DeclareEnterOp : OpenACC_Op<"declare_enter", []> {
   let hasVerifier = 1;
 }
 
-def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", []> {
+def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", [AttrSizedOperandSegments]> {
   let summary = "declare directive - exit from implicit data region";
 
   let description = [{
@@ -1458,10 +1459,13 @@ def OpenACC_DeclareExitOp : OpenACC_Op<"declare_exit", []> {
     ```
   }];
 
-  let arguments = (ins Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
+  let arguments = (ins
+      Optional<OpenACC_DeclareTokenType>:$token,
+      Variadic<OpenACC_PointerLikeTypeInterface>:$dataClauseOperands);
 
   let assemblyFormat = [{
     oilist(
+        `token` `(` $token `)` |
         `dataOperands` `(` $dataClauseOperands `:` type($dataClauseOperands) `)`
     )
     attr-dict-with-keyword
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
index 4a930ad..92ea71a7 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOpsTypes.td
@@ -24,4 +24,13 @@ def OpenACC_DataBoundsType : OpenACC_Type<"DataBounds", "data_bounds_ty"> {
   let summary = "Type for representing acc data clause bounds information";
 }
 
+def OpenACC_DeclareTokenType : OpenACC_Type<"DeclareToken", "declare_token"> {
+  let summary = "declare token type";
+  let description = [{
+    `acc.declare_token` is a type returned by a `declare_enter` operation and
+    can be passed to a `declare_exit` operation to represent an implicit
+    data region.
+  }];
+}
+
 #endif // OPENACC_OPS_TYPES
diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
index 6e5df70..08e83ca 100644
--- a/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
+++ b/mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
@@ -1087,9 +1087,10 @@ LogicalResult AtomicCaptureOp::verifyRegions() { return verifyRegionsCommon(); }
 //===----------------------------------------------------------------------===//
 
 template <typename Op>
-static LogicalResult checkDeclareOperands(Op &op,
-                                          const mlir::ValueRange &operands) {
-  if (operands.empty())
+static LogicalResult
+checkDeclareOperands(Op &op, const mlir::ValueRange &operands,
+                     bool requireAtLeastOneOperand = true) {
+  if (operands.empty() && requireAtLeastOneOperand)
     return emitError(
         op->getLoc(),
         "at least one operand must appear on the declare operation");
@@ -1151,6 +1152,9 @@ LogicalResult acc::DeclareEnterOp::verify() {
 //===----------------------------------------------------------------------===//
 
 LogicalResult acc::DeclareExitOp::verify() {
+  if (getToken())
+    return checkDeclareOperands(*this, this->getDataClauseOperands(),
+                                /*requireAtLeastOneOperand=*/false);
   return checkDeclareOperands(*this, this->getDataClauseOperands());
 }
 
-- 
cgit v1.1


From d97981c98a70ebeaa8901f34921b0a69a068ff5d Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Nov 2023 16:45:29 -0800
Subject: [ORC-RT] Add missing cstdint include.

This should have been included in b2bbe8cc1c7. Adding it should fix the bot
failures in https://lab.llvm.org/buildbot/#/builders/85/builds/20288
---
 compiler-rt/lib/orc/stl_extras.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler-rt/lib/orc/stl_extras.h b/compiler-rt/lib/orc/stl_extras.h
index 1eef565..80a6cd1 100644
--- a/compiler-rt/lib/orc/stl_extras.h
+++ b/compiler-rt/lib/orc/stl_extras.h
@@ -13,6 +13,7 @@
 #ifndef ORC_RT_STL_EXTRAS_H
 #define ORC_RT_STL_EXTRAS_H
 
+#include <cstdint>
 #include <utility>
 #include <tuple>
 
-- 
cgit v1.1


From 83cf0dc98234bbd8cb0d0959baa570477a8daf92 Mon Sep 17 00:00:00 2001
From: Aart Bik <39774503+aartbik@users.noreply.github.com>
Date: Thu, 16 Nov 2023 17:17:41 -0800
Subject: [mlir][sparse] implement direct IR alloc/empty/new for
 non-permutations (#72585)

This change implements the correct *level* sizes set up for the direct
IR codegen fields in the sparse storage scheme. This brings libgen and
codegen together again.

This is step 3 out of 3 to make sparse_tensor.new work for BSR
---
 .../mlir/ExecutionEngine/SparseTensor/File.h       |   2 +-
 .../Transforms/SparseTensorCodegen.cpp             | 135 +++++++++++----------
 mlir/test/Dialect/SparseTensor/codegen.mlir        |  94 +++++++-------
 .../codegen_buffer_initialization.mlir             |   6 +-
 .../Dialect/SparseTensor/CPU/block.mlir            |   9 +-
 .../SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir    |   3 +-
 6 files changed, 128 insertions(+), 121 deletions(-)

diff --git a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
index 85bbfe0..6b4a174 100644
--- a/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
+++ b/mlir/include/mlir/ExecutionEngine/SparseTensor/File.h
@@ -1,4 +1,4 @@
-//===- File.h - Reading sparse tensors from files --------------*- C++ -*-===//
+//===- File.h - Reading sparse tensors from files ---------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
index cfc8eb1..9f41db7 100644
--- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
+++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorCodegen.cpp
@@ -159,41 +159,46 @@ static Value createAllocation(OpBuilder &builder, Location loc,
   return buffer;
 }
 
+/// Creates the dim sizes array, filling in from dynamic sizes.
+static void createDimSizes(OpBuilder &builder, Location loc,
+                           SparseTensorType stt, ValueRange dynSizes,
+                           /*out*/ SmallVectorImpl<Value> &dimSizesValues) {
+  const Dimension dimRank = stt.getDimRank();
+  dimSizesValues.clear();
+  dimSizesValues.reserve(dimRank);
+  unsigned i = 0;
+  for (const Size sz : stt.getDimShape())
+    dimSizesValues.push_back(ShapedType::isDynamic(sz)
+                                 ? dynSizes[i++]
+                                 : constantIndex(builder, loc, sz));
+}
+
 /// Creates allocation for each field in sparse tensor type. Note that
 /// for all dynamic memrefs in the sparse tensor stroage layout, the
 /// memory size is really the capacity of the "vector", while the actual
 /// size resides in the sizes array.
 static void createAllocFields(OpBuilder &builder, Location loc,
-                              SparseTensorType stt, ValueRange dynSizes,
-                              bool enableInit, SmallVectorImpl<Value> &fields,
-                              Value sizeHint) {
-  // Build original sizes.
-  assert((dynSizes.size() == static_cast<size_t>(stt.getNumDynamicDims())) &&
-         "Got wrong number of dynamic sizes");
-  const Dimension dimRank = stt.getDimRank();
-  SmallVector<Value> dimSizes;
-  dimSizes.reserve(dimRank);
-  unsigned i = 0; // cumulative index into `dynSizes`.
-  for (const Size sh : stt.getDimShape())
-    dimSizes.push_back(ShapedType::isDynamic(sh)
-                           ? dynSizes[i++]
-                           : constantIndex(builder, loc, sh));
-
+                              SparseTensorType stt, bool enableInit,
+                              Value sizeHint,
+                              SmallVectorImpl<Value> &lvlSizesValues,
+                              /*out*/ SmallVectorImpl<Value> &fields) {
+  Level lvlRank = stt.getLvlRank();
   // Set up some heuristic sizes. We try to set the initial
   // size based on available information. Otherwise we just
   // initialize a few elements to start the reallocation chain.
   // TODO: refine this
   Value posHeuristic, crdHeuristic, valHeuristic;
   if (stt.isAllDense()) {
-    valHeuristic = dimSizes[0];
-    for (const Value sz : ArrayRef<Value>{dimSizes}.drop_front())
-      valHeuristic = builder.create<arith::MulIOp>(loc, valHeuristic, sz);
+    valHeuristic = lvlSizesValues[0];
+    for (Level lvl = 1; lvl < lvlRank; lvl++)
+      valHeuristic =
+          builder.create<arith::MulIOp>(loc, valHeuristic, lvlSizesValues[lvl]);
   } else if (sizeHint) {
     if (getCOOStart(stt.getEncoding()) == 0) {
       posHeuristic = constantIndex(builder, loc, 2);
       crdHeuristic = builder.create<arith::MulIOp>(
-          loc, constantIndex(builder, loc, dimRank), sizeHint); // AOS
-    } else if (dimRank == 2 && stt.isDenseLvl(0) && stt.isCompressedLvl(1)) {
+          loc, constantIndex(builder, loc, lvlRank), sizeHint); // AOS
+    } else if (lvlRank == 2 && stt.isDenseLvl(0) && stt.isCompressedLvl(1)) {
       posHeuristic = builder.create<arith::AddIOp>(
           loc, sizeHint, constantIndex(builder, loc, 1));
       crdHeuristic = sizeHint;
@@ -205,7 +210,6 @@ static void createAllocFields(OpBuilder &builder, Location loc,
     posHeuristic = crdHeuristic = valHeuristic =
         constantIndex(builder, loc, 16);
   }
-
   // Initializes all fields. An initial storage specifier and allocated
   // positions/coordinates/values memrefs (with heuristic capacity).
   foreachFieldAndTypeInSparseTensor(
@@ -237,16 +241,13 @@ static void createAllocFields(OpBuilder &builder, Location loc,
         // Returns true to continue the iteration.
         return true;
       });
-
   // Initialize the storage scheme to an empty tensor. Sets the lvlSizes
   // and gives all position fields an initial zero entry, so that it is
   // easier to maintain the "linear + 1" length property.
   MutSparseTensorDescriptor desc(stt, fields);
   Value posZero = constantZero(builder, loc, stt.getPosType());
   for (Level lvl = 0, lvlRank = stt.getLvlRank(); lvl < lvlRank; lvl++) {
-    // FIXME: `toOrigDim` is deprecated.
-    desc.setLvlSize(builder, loc, lvl,
-                    dimSizes[toOrigDim(stt.getEncoding(), lvl)]);
+    desc.setLvlSize(builder, loc, lvl, lvlSizesValues[lvl]);
     const auto dlt = stt.getLvlType(lvl);
     if (isCompressedDLT(dlt) || isLooseCompressedDLT(dlt))
       createPushback(builder, loc, desc, SparseTensorFieldKind::PosMemRef, lvl,
@@ -371,8 +372,8 @@ static void genEndInsert(OpBuilder &builder, Location loc,
                          SparseTensorDescriptor desc) {
   const SparseTensorType stt(desc.getRankedTensorType());
   const Level lvlRank = stt.getLvlRank();
-  for (Level l = 0; l < lvlRank; l++) {
-    const auto dlt = stt.getLvlType(l);
+  for (Level lvl = 0; lvl < lvlRank; lvl++) {
+    const auto dlt = stt.getLvlType(lvl);
     if (isCompressedDLT(dlt)) {
       // Compressed dimensions need a position cleanup for all entries
       // that were not visited during the insertion pass.
@@ -380,10 +381,10 @@ static void genEndInsert(OpBuilder &builder, Location loc,
       // TODO: avoid cleanup and keep compressed scheme consistent at all
       // times?
       //
-      if (l > 0) {
+      if (lvl > 0) {
         Type posType = stt.getPosType();
-        Value posMemRef = desc.getPosMemRef(l);
-        Value hi = desc.getPosMemSize(builder, loc, l);
+        Value posMemRef = desc.getPosMemRef(lvl);
+        Value hi = desc.getPosMemSize(builder, loc, lvl);
         Value zero = constantIndex(builder, loc, 0);
         Value one = constantIndex(builder, loc, 1);
         // Vector of only one, but needed by createFor's prototype.
@@ -729,7 +730,6 @@ public:
 };
 
 /// Sparse codegen rule for the alloc operator.
-/// TODO(springerm): remove when bufferization.alloc_tensor is gone
 class SparseTensorAllocConverter
     : public OpConversionPattern<bufferization::AllocTensorOp> {
 public:
@@ -746,6 +746,8 @@ public:
     if (!resType.hasEncoding())
       return failure();
     Location loc = op.getLoc();
+
+    // Deal with copy.
     if (op.getCopy()) {
       auto desc = getDescriptorFromTensorTuple(adaptor.getCopy());
       SmallVector<Value> fields;
@@ -766,17 +768,22 @@ public:
       return success();
     }
 
+    // Construct the dim/lvl sizes and the (unused) dim2lvl/lvl2dim buffers.
+    SmallVector<Value> dimSizesValues;
+    SmallVector<Value> lvlSizesValues;
+    Value dimSizesBuffer;
+    Value dim2lvlBuffer;
+    Value lvl2dimBuffer;
+    createDimSizes(rewriter, loc, resType, adaptor.getDynamicSizes(),
+                   dimSizesValues);
+    genMapBuffers(rewriter, loc, resType, dimSizesValues, dimSizesBuffer,
+                  lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);
+
     // Construct allocation for each field.
     Value sizeHint = op.getSizeHint();
-    ValueRange dynSizes = adaptor.getDynamicSizes();
-    const size_t found = dynSizes.size();
-    const int64_t expected = resType.getNumDynamicDims();
-    if (found != static_cast<size_t>(expected))
-      return rewriter.notifyMatchFailure(op,
-                                         "Got wrong number of dynamic sizes");
     SmallVector<Value> fields;
-    createAllocFields(rewriter, loc, resType, dynSizes,
-                      enableBufferInitialization, fields, sizeHint);
+    createAllocFields(rewriter, loc, resType, enableBufferInitialization,
+                      sizeHint, lvlSizesValues, fields);
 
     // Replace operation with resulting memrefs.
     rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
@@ -788,7 +795,6 @@ private:
 };
 
 /// Sparse codegen rule for the empty tensor operator.
-/// TODO(springerm): remove when bufferization.alloc_tensor is gone
 class SparseTensorEmptyConverter : public OpConversionPattern<tensor::EmptyOp> {
 public:
   using OpConversionPattern::OpConversionPattern;
@@ -803,19 +809,24 @@ public:
     const auto resType = getSparseTensorType(op);
     if (!resType.hasEncoding())
       return failure();
+    Location loc = op.getLoc();
+
+    // Construct the dim/lvl sizes and the (unused) dim2lvl/lvl2dim buffers.
+    SmallVector<Value> dimSizesValues;
+    SmallVector<Value> lvlSizesValues;
+    Value dimSizesBuffer;
+    Value dim2lvlBuffer;
+    Value lvl2dimBuffer;
+    createDimSizes(rewriter, loc, resType, adaptor.getDynamicSizes(),
+                   dimSizesValues);
+    genMapBuffers(rewriter, loc, resType, dimSizesValues, dimSizesBuffer,
+                  lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);
 
     // Construct allocation for each field.
-    Location loc = op.getLoc();
     Value sizeHint; // none
-    const ValueRange dynSizes = adaptor.getDynamicSizes();
-    const size_t found = dynSizes.size();
-    const int64_t expected = resType.getNumDynamicDims();
-    if (found != static_cast<size_t>(expected))
-      return rewriter.notifyMatchFailure(op,
-                                         "Got wrong number of dynamic sizes");
     SmallVector<Value> fields;
-    createAllocFields(rewriter, loc, resType, dynSizes,
-                      enableBufferInitialization, fields, sizeHint);
+    createAllocFields(rewriter, loc, resType, enableBufferInitialization,
+                      sizeHint, lvlSizesValues, fields);
 
     // Replace operation with resulting memrefs.
     rewriter.replaceOp(op, genTuple(rewriter, loc, resType, fields));
@@ -1461,10 +1472,10 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
     //   if (! %isSorted) sparse_tensor.sort_coo(%nse, %coordinates, %values)
     //   update storage specifier
     //   @delSparseTensorReader(%reader)
-    SmallVector<Value> dimShapesValues;
+    SmallVector<Value> dimSizesValues;
     Value dimSizesBuffer;
     Value reader = genReader(rewriter, loc, dstTp, adaptor.getOperands()[0],
-                             dimShapesValues, dimSizesBuffer);
+                             dimSizesValues, dimSizesBuffer);
 
     // Get the number of stored entries.
     const Type indexTp = rewriter.getIndexType();
@@ -1472,25 +1483,19 @@ struct SparseNewConverter : public OpConversionPattern<NewOp> {
                                {indexTp}, {reader}, EmitCInterface::Off)
                     .getResult(0);
 
-    // Construct allocation for each field.
-    SmallVector<Value> dynSizes;
-    if (dstTp.hasDynamicDimShape()) {
-      for (const auto &d : llvm::enumerate(dstTp.getDimShape()))
-        if (ShapedType::isDynamic(d.value()))
-          dynSizes.push_back(rewriter.create<memref::LoadOp>(
-              loc, dimSizesBuffer, constantIndex(rewriter, loc, d.index())));
-    }
-    SmallVector<Value> fields;
-    createAllocFields(rewriter, loc, dstTp, dynSizes, /*enableInit=*/false,
-                      fields, nse);
-
-    // Now construct the lvl sizes and the dim2lvl/lvl2dim buffers.
+    // Construct the lvl sizes and the dim2lvl/lvl2dim buffers.
     SmallVector<Value> lvlSizesValues;
     Value dim2lvlBuffer;
     Value lvl2dimBuffer;
-    genMapBuffers(rewriter, loc, dstTp, dimShapesValues, dimSizesBuffer,
+    genMapBuffers(rewriter, loc, dstTp, dimSizesValues, dimSizesBuffer,
                   lvlSizesValues, dim2lvlBuffer, lvl2dimBuffer);
 
+    // Construct allocation for each field.
+    Value sizeHint = nse;
+    SmallVector<Value> fields;
+    createAllocFields(rewriter, loc, dstTp, /*enableInit=*/false, sizeHint,
+                      lvlSizesValues, fields);
+
     // Read the COO tensor data.
     MutSparseTensorDescriptor desc(dstTp, fields);
     Value xs = desc.getAOSMemRef();
diff --git a/mlir/test/Dialect/SparseTensor/codegen.mlir b/mlir/test/Dialect/SparseTensor/codegen.mlir
index 12d77ec..e63595b 100644
--- a/mlir/test/Dialect/SparseTensor/codegen.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen.mlir
@@ -252,7 +252,7 @@ func.func @sparse_values_coo(%arg0: tensor<?x?x?xf64, #ccoo>) -> memref<?xf64> {
 }
 
 
-// CHECK-LABEL:   func.func @sparse_indices_coo(
+// CHECK-LABEL: func.func @sparse_indices_coo(
 //  CHECK-SAME: %[[A0:.*0]]: memref<?xindex>,
 //  CHECK-SAME: %[[A1:.*1]]: memref<?xindex>,
 //  CHECK-SAME: %[[A2:.*2]]: memref<?xindex>,
@@ -270,7 +270,7 @@ func.func @sparse_indices_coo(%arg0: tensor<?x?x?xf64, #ccoo>) -> memref<?xindex
   return %0 : memref<?xindex, strided<[?], offset: ?>>
 }
 
-// CHECK-LABEL:   func.func @sparse_indices_buffer_coo(
+// CHECK-LABEL: func.func @sparse_indices_buffer_coo(
 //  CHECK-SAME: %[[A0:.*0]]: memref<?xindex>,
 //  CHECK-SAME: %[[A1:.*1]]: memref<?xindex>,
 //  CHECK-SAME: %[[A2:.*2]]: memref<?xindex>,
@@ -676,26 +676,26 @@ func.func @sparse_convert_element_type(%arg0: tensor<32xf32, #SparseVector>) ->
 // CHECK:       memref.store %[[VAL_4]], %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<2xindex>
 // CHECK:       %[[VAL_8:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_7]], %[[VAL_2]]) : (!llvm.ptr, memref<?xindex>, i32) -> !llvm.ptr
 // CHECK:       %[[VAL_9:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_8]]) : (!llvm.ptr) -> memref<?xindex>
-// CHECK:       %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index
-// CHECK:       %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
-// CHECK:       %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:       %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index
-// CHECK:       %[[VAL_14:.*]] = memref.alloc() : memref<2xindex>
-// CHECK:       %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
-// CHECK:       %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref<?xindex>
-// CHECK:       %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref<?xf32>
-// CHECK:       %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init
-// CHECK:       %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  lvl_sz at 0 with %[[VAL_11]]
-// CHECK:       %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]]  pos_mem_sz at 0
-// CHECK:       %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]]
-// CHECK:       %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]]  pos_mem_sz at 0 with %[[VAL_22]]
-// CHECK:       %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  lvl_sz at 1 with %[[VAL_12]]
-// CHECK:       %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]]
-// CHECK:       %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]]  pos_mem_sz at 0 with %[[VAL_26]]
-// CHECK:       %[[VAL_28:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:       %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref<?xindex>
-// CHECK:       memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex>
-// CHECK:       memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex>
+// CHECK-DAG:   %[[VAL_10:.*]] = call @getSparseTensorReaderNSE(%[[VAL_8]]) : (!llvm.ptr) -> index
+// CHECK-DAG:   %[[VAL_11:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_4]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_12:.*]] = memref.load %[[VAL_9]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_13:.*]] = arith.muli %[[VAL_10]], %[[VAL_5]] : index
+// CHECK-DAG:   %[[VAL_14:.*]] = memref.alloc() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_15:.*]] = memref.cast %[[VAL_14]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   %[[VAL_16:.*]] = memref.alloc(%[[VAL_13]]) : memref<?xindex>
+// CHECK-DAG:   %[[VAL_17:.*]] = memref.alloc(%[[VAL_10]]) : memref<?xf32>
+// CHECK-DAG:   %[[VAL_18:.*]] = sparse_tensor.storage_specifier.init
+// CHECK-DAG:   %[[VAL_19:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  lvl_sz at 0 with %[[VAL_11]]
+// CHECK-DAG:   %[[VAL_20:.*]] = sparse_tensor.storage_specifier.get %[[VAL_19]]  pos_mem_sz at 0
+// CHECK-DAG:   %[[VAL_21:.*]], %[[VAL_22:.*]] = sparse_tensor.push_back %[[VAL_20]], %[[VAL_15]], %[[VAL_4]]
+// CHECK-DAG:   %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_19]]  pos_mem_sz at 0 with %[[VAL_22]]
+// CHECK-DAG:   %[[VAL_24:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  lvl_sz at 1 with %[[VAL_12]]
+// CHECK-DAG:   %[[VAL_25:.*]], %[[VAL_26:.*]] = sparse_tensor.push_back %[[VAL_22]], %[[VAL_21]], %[[VAL_4]], %[[VAL_3]]
+// CHECK-DAG:   %[[VAL_27:.*]] = sparse_tensor.storage_specifier.set %[[VAL_24]]  pos_mem_sz at 0 with %[[VAL_26]]
+// CHECK-DAG:   %[[VAL_28:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_29:.*]] = memref.cast %[[VAL_28]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   memref.store %[[VAL_4]], %[[VAL_28]]{{\[}}%[[VAL_4]]] : memref<2xindex>
+// CHECK-DAG:   memref.store %[[VAL_3]], %[[VAL_28]]{{\[}}%[[VAL_3]]] : memref<2xindex>
 // CHECK:       %[[VAL_30:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_8]], %[[VAL_29]], %[[VAL_29]], %[[VAL_16]], %[[VAL_17]]) : (!llvm.ptr, memref<?xindex>, memref<?xindex>, memref<?xindex>, memref<?xf32>) -> i1
 // CHECK:       %[[VAL_31:.*]] = arith.cmpi eq, %[[VAL_30]], %[[VAL_1]] : i1
 // CHECK:       scf.if %[[VAL_31]] {
@@ -722,31 +722,31 @@ func.func @sparse_new_coo(%arg0: !llvm.ptr) -> tensor<?x?xf32, #Coo> {
 // CHECK:       memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<2xindex>
 // CHECK:       memref.store %[[VAL_3]], %[[VAL_5]]{{\[}}%[[VAL_2]]] : memref<2xindex>
 // CHECK:       %[[VAL_7:.*]] = call @createCheckedSparseTensorReader(%[[A0]], %[[VAL_6]], %[[VAL_1]]) : (!llvm.ptr, memref<?xindex>, i32) -> !llvm.ptr
-// CHECK:       %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref<?xindex>
-// CHECK:       %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index
-// CHECK:       %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
-// CHECK:       %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref<?xindex>
-// CHECK:       %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
-// CHECK:       %[[VAL_13:.*]] = memref.alloc() : memref<2xindex>
-// CHECK:       %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref<?xindex>
-// CHECK:       %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref<?xindex>
-// CHECK:       %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref<?xf32>
-// CHECK:       %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init
-// CHECK:       %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]]  lvl_sz at 0 with %[[VAL_11]]
-// CHECK:       %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]]  pos_mem_sz at 0
-// CHECK:       %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]]
-// CHECK:       %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  pos_mem_sz at 0 with %[[VAL_21]]
-// CHECK:       %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]]  lvl_sz at 1 with %[[VAL_10]]
-// CHECK:       %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]]
-// CHECK:       %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  pos_mem_sz at 0 with %[[VAL_25]]
-// CHECK:       %[[VAL_27:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:       %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref<?xindex>
-// CHECK:       memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex>
-// CHECK:       memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex>
-// CHECK:       %[[VAL_29:.*]] = memref.alloca() : memref<2xindex>
-// CHECK:       %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref<?xindex>
-// CHECK:       memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex>
-// CHECK:       memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex>
+// CHECK-DAG:   %[[VAL_8:.*]] = call @getSparseTensorReaderDimSizes(%[[VAL_7]]) : (!llvm.ptr) -> memref<?xindex>
+// CHECK-DAG:   %[[VAL_9:.*]] = call @getSparseTensorReaderNSE(%[[VAL_7]]) : (!llvm.ptr) -> index
+// CHECK-DAG:   %[[VAL_10:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_11:.*]] = memref.load %[[VAL_8]]{{\[}}%[[VAL_2]]] : memref<?xindex>
+// CHECK-DAG:   %[[VAL_12:.*]] = arith.muli %[[VAL_9]], %[[VAL_4]] : index
+// CHECK-DAG:   %[[VAL_13:.*]] = memref.alloc() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_14:.*]] = memref.cast %[[VAL_13]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   %[[VAL_15:.*]] = memref.alloc(%[[VAL_12]]) : memref<?xindex>
+// CHECK-DAG:   %[[VAL_16:.*]] = memref.alloc(%[[VAL_9]]) : memref<?xf32>
+// CHECK-DAG:   %[[VAL_17:.*]] = sparse_tensor.storage_specifier.init
+// CHECK-DAG:   %[[VAL_18:.*]] = sparse_tensor.storage_specifier.set %[[VAL_17]]  lvl_sz at 0 with %[[VAL_11]]
+// CHECK-DAG:   %[[VAL_19:.*]] = sparse_tensor.storage_specifier.get %[[VAL_18]]  pos_mem_sz at 0
+// CHECK-DAG:   %[[VAL_20:.*]], %[[VAL_21:.*]] = sparse_tensor.push_back %[[VAL_19]], %[[VAL_14]], %[[VAL_3]]
+// CHECK-DAG:   %[[VAL_22:.*]] = sparse_tensor.storage_specifier.set %[[VAL_18]]  pos_mem_sz at 0 with %[[VAL_21]]
+// CHECK-DAG:   %[[VAL_23:.*]] = sparse_tensor.storage_specifier.set %[[VAL_22]]  lvl_sz at 1 with %[[VAL_10]]
+// CHECK-DAG:   %[[VAL_24:.*]], %[[VAL_25:.*]] = sparse_tensor.push_back %[[VAL_21]], %[[VAL_20]], %[[VAL_3]], %[[VAL_2]]
+// CHECK-DAG:   %[[VAL_26:.*]] = sparse_tensor.storage_specifier.set %[[VAL_23]]  pos_mem_sz at 0 with %[[VAL_25]]
+// CHECK-DAG:   %[[VAL_27:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_28:.*]] = memref.cast %[[VAL_27]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   memref.store %[[VAL_2]], %[[VAL_27]]{{\[}}%[[VAL_3]]] : memref<2xindex>
+// CHECK-DAG:   memref.store %[[VAL_3]], %[[VAL_27]]{{\[}}%[[VAL_2]]] : memref<2xindex>
+// CHECK-DAG:   %[[VAL_29:.*]] = memref.alloca() : memref<2xindex>
+// CHECK-DAG:   %[[VAL_30:.*]] = memref.cast %[[VAL_29]] : memref<2xindex> to memref<?xindex>
+// CHECK-DAG:   memref.store %[[VAL_2]], %[[VAL_29]]{{\[}}%[[VAL_3]]] : memref<2xindex>
+// CHECK-DAG:   memref.store %[[VAL_3]], %[[VAL_29]]{{\[}}%[[VAL_2]]] : memref<2xindex>
 // CHECK:       %[[VAL_31:.*]] = call @getSparseTensorReaderReadToBuffers0F32(%[[VAL_7]], %[[VAL_28]], %[[VAL_30]], %[[VAL_15]], %[[VAL_16]]) : (!llvm.ptr, memref<?xindex>, memref<?xindex>, memref<?xindex>, memref<?xf32>) -> i1
 // CHECK:       memref.store %[[VAL_9]], %[[VAL_24]]{{\[}}%[[VAL_2]]] : memref<?xindex>
 // CHECK:       %[[VAL_32:.*]] = sparse_tensor.storage_specifier.set %[[VAL_26]]  crd_mem_sz at 0 with %[[VAL_12]]
diff --git a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
index 14a8f0e..e6d9700 100644
--- a/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
+++ b/mlir/test/Dialect/SparseTensor/codegen_buffer_initialization.mlir
@@ -4,9 +4,9 @@
 
 // CHECK-LABEL:   func.func @empty_sparse_vector(
 //  CHECK-SAME:     %[[VAL_0:.*]]: index) -> (memref<?xindex>, memref<?xindex>, memref<?xf64>, !sparse_tensor.storage_specifier
-//       CHECK:     %[[VAL_1:.*]] = arith.constant 1 : index
-//       CHECK:     %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64
-//       CHECK:     %[[VAL_3:.*]] = arith.constant 0 : index
+//   CHECK-DAG:     %[[VAL_1:.*]] = arith.constant 1 : index
+//   CHECK-DAG:     %[[VAL_2:.*]] = arith.constant 0.000000e+00 : f64
+//   CHECK-DAG:     %[[VAL_3:.*]] = arith.constant 0 : index
 //       CHECK:     %[[VAL_4:.*]] = memref.alloc() : memref<16xindex>
 //       CHECK:     %[[VAL_5:.*]] = memref.cast %[[VAL_4]] : memref<16xindex> to memref<?xindex>
 //       CHECK:     linalg.fill ins(%[[VAL_3]] : index) outs(%[[VAL_4]] : memref<16xindex>)
diff --git a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
index d92165e..6468c4b 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/CPU/block.mlir
@@ -3,7 +3,7 @@
 //
 // Set-up that's shared across all tests in this directory. In principle, this
 // config could be moved to lit.local.cfg. However, there are downstream users that
-//  do not use these LIT config files. Hence why this is kept inline.
+// do not use these LIT config files. Hence why this is kept inline.
 //
 // DEFINE: %{sparsifier_opts} = enable-runtime-library=true
 // DEFINE: %{sparsifier_opts_sve} = enable-arm-sve=true %{sparsifier_opts}
@@ -20,10 +20,13 @@
 // REDEFINE: %{env} = TENSOR0="%mlir_src_dir/test/Integration/data/block.mtx"
 // RUN: %{compile} | env %{env} %{run} | FileCheck %s
 //
-// TODO: enable!
 // Do the same run, but now with direct IR generation.
 // REDEFINE: %{sparsifier_opts} = enable-runtime-library=false
-// R_UN: %{compile} | env %{env} %{run} | FileCheck %s
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
+//
+// Do the same run, but now with direct IR generation and vectorization.
+// REDEFINE: %{sparsifier_opts} = enable-runtime-library=false enable-buffer-initialization=true vl=2 reassociate-fp-reductions=true enable-index-optimizations=true
+// RUN: %{compile} | env %{env} %{run} | FileCheck %s
 
 !Filename = !llvm.ptr
 
diff --git a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
index fb0da0c..db5c154 100644
--- a/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
+++ b/mlir/test/Integration/Dialect/SparseTensor/GPU/CUDA/sparse-sddmm-lib.mlir
@@ -16,8 +16,7 @@
 //
 // without RT lib:
 //
-// TODO: make this work
-// R_U_N: %{compile} enable-runtime-library=false" | %{run}
+// RUN: %{compile} enable-runtime-library=false" | %{run}
 
 !Filename = !llvm.ptr
 
-- 
cgit v1.1


From c06700bd75fcff8187ec70b3d34d274554f93a81 Mon Sep 17 00:00:00 2001
From: Shraiysh <Shraiysh.Vaishay@amd.com>
Date: Thu, 16 Nov 2023 19:17:56 -0600
Subject: Revert "[flang][OpenMP] Add semantic check for declare target"
 (#72592)

Reverts llvm/llvm-project#71861
---
 flang/lib/Lower/OpenMP.cpp                         |  15 --
 flang/lib/Parser/openmp-parsers.cpp                |   2 -
 flang/lib/Semantics/check-omp-structure.cpp        |  51 ++----
 flang/lib/Semantics/check-omp-structure.h          |   2 -
 flang/lib/Semantics/resolve-directives.cpp         |   3 -
 .../test/Lower/OpenMP/FIR/declare-target-data.f90  |  16 --
 .../OpenMP/FIR/declare-target-func-and-subr.f90    |  68 --------
 ...are-target-implicit-func-and-subr-cap-enter.f90 | 192 ---------------------
 flang/test/Lower/OpenMP/declare-target-data.f90    |  16 --
 .../Lower/OpenMP/declare-target-func-and-subr.f90  |  68 --------
 ...are-target-implicit-func-and-subr-cap-enter.f90 | 192 ---------------------
 .../OpenMP/declare-target-implicit-tarop-cap.f90   |  14 --
 flang/test/Lower/OpenMP/function-filtering-2.f90   |   8 -
 flang/test/Lower/OpenMP/function-filtering.f90     |  18 --
 .../Parser/OpenMP/declare_target-device_type.f90   |  27 +--
 .../Semantics/OpenMP/declarative-directive.f90     |  12 +-
 flang/test/Semantics/OpenMP/declare-target01.f90   |  48 ------
 flang/test/Semantics/OpenMP/declare-target02.f90   |  54 ------
 flang/test/Semantics/OpenMP/declare-target06.f90   |   5 -
 flang/test/Semantics/OpenMP/requires04.f90         |   5 +-
 flang/test/Semantics/OpenMP/requires05.f90         |   2 -
 llvm/include/llvm/Frontend/OpenMP/OMP.td           |   6 +-
 22 files changed, 21 insertions(+), 803 deletions(-)
 delete mode 100644 flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90
 delete mode 100644 flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 6657f46..00f1602 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -590,8 +590,6 @@ public:
   bool processSectionsReduction(mlir::Location currentLocation) const;
   bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
   bool
-  processEnter(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
-  bool
   processUseDeviceAddr(llvm::SmallVectorImpl<mlir::Value> &operands,
                        llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
                        llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
@@ -1853,18 +1851,6 @@ bool ClauseProcessor::processTo(
       });
 }
 
-bool ClauseProcessor::processEnter(
-    llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const {
-  return findRepeatableClause<ClauseTy::Enter>(
-      [&](const ClauseTy::Enter *enterClause,
-          const Fortran::parser::CharBlock &) {
-        // Case: declare target to(func, var1, var2)...
-        gatherFuncAndVarSyms(enterClause->v,
-                             mlir::omp::DeclareTargetCaptureClause::enter,
-                             result);
-      });
-}
-
 bool ClauseProcessor::processUseDeviceAddr(
     llvm::SmallVectorImpl<mlir::Value> &operands,
     llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
@@ -2806,7 +2792,6 @@ static mlir::omp::DeclareTargetDeviceType getDeclareTargetInfo(
 
     ClauseProcessor cp(converter, *clauseList);
     cp.processTo(symbolAndClause);
-    cp.processEnter(symbolAndClause);
     cp.processLink(symbolAndClause);
     cp.processDeviceType(deviceType);
     cp.processTODO<Fortran::parser::OmpClause::Indirect>(
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index bba1be2..0271f69 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -259,8 +259,6 @@ TYPE_PARSER(
             parenthesized("STATIC" >> maybe("," >> scalarIntExpr)))) ||
     "DYNAMIC_ALLOCATORS" >>
         construct<OmpClause>(construct<OmpClause::DynamicAllocators>()) ||
-    "ENTER" >> construct<OmpClause>(construct<OmpClause::Enter>(
-                   parenthesized(Parser<OmpObjectList>{}))) ||
     "FINAL" >> construct<OmpClause>(construct<OmpClause::Final>(
                    parenthesized(scalarLogicalExpr))) ||
     "FULL" >> construct<OmpClause>(construct<OmpClause::Full>()) ||
diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index 4dbbf12..0b1a581 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -1165,31 +1165,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Allocate &x) {
   }
 }
 
-void OmpStructureChecker::Enter(const parser::OmpDeclareTargetWithClause &x) {
-  SetClauseSets(llvm::omp::Directive::OMPD_declare_target);
-}
-
-void OmpStructureChecker::Leave(const parser::OmpDeclareTargetWithClause &x) {
-  if (x.v.v.size() > 0) {
-    const parser::OmpClause *enterClause =
-        FindClause(llvm::omp::Clause::OMPC_enter);
-    const parser::OmpClause *toClause = FindClause(llvm::omp::Clause::OMPC_to);
-    const parser::OmpClause *linkClause =
-        FindClause(llvm::omp::Clause::OMPC_link);
-    if (!enterClause && !toClause && !linkClause) {
-      context_.Say(x.source,
-          "If the DECLARE TARGET directive has a clause, it must contain at lease one ENTER clause or LINK clause"_err_en_US);
-    }
-    if (toClause) {
-      context_.Say(toClause->source,
-          "The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead."_warn_en_US);
-    }
-  }
-}
-
 void OmpStructureChecker::Enter(const parser::OpenMPDeclareTargetConstruct &x) {
   const auto &dir{std::get<parser::Verbatim>(x.t)};
   PushContext(dir.source, llvm::omp::Directive::OMPD_declare_target);
+  const auto &spec{std::get<parser::OmpDeclareTargetSpecifier>(x.t)};
+  if (std::holds_alternative<parser::OmpDeclareTargetWithClause>(spec.u)) {
+    SetClauseSets(llvm::omp::Directive::OMPD_declare_target);
+  }
 }
 
 void OmpStructureChecker::Enter(const parser::OmpDeclareTargetWithList &x) {
@@ -1263,8 +1245,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) {
     CheckThreadprivateOrDeclareTargetVar(*objectList);
   } else if (const auto *clauseList{
                  parser::Unwrap<parser::OmpClauseList>(spec.u)}) {
-    bool toClauseFound{false}, deviceTypeClauseFound{false},
-        enterClauseFound{false};
+    bool toClauseFound{false}, deviceTypeClauseFound{false};
     for (const auto &clause : clauseList->v) {
       common::visit(
           common::visitors{
@@ -1279,12 +1260,6 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) {
                 CheckIsVarPartOfAnotherVar(dir.source, linkClause.v);
                 CheckThreadprivateOrDeclareTargetVar(linkClause.v);
               },
-              [&](const parser::OmpClause::Enter &enterClause) {
-                enterClauseFound = true;
-                CheckSymbolNames(dir.source, enterClause.v);
-                CheckIsVarPartOfAnotherVar(dir.source, enterClause.v);
-                CheckThreadprivateOrDeclareTargetVar(enterClause.v);
-              },
               [&](const parser::OmpClause::DeviceType &deviceTypeClause) {
                 deviceTypeClauseFound = true;
                 if (deviceTypeClause.v.v !=
@@ -1298,7 +1273,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &x) {
           },
           clause.u);
 
-      if ((toClauseFound || enterClauseFound) && !deviceTypeClauseFound) {
+      if (toClauseFound && !deviceTypeClauseFound) {
         deviceConstructFound_ = true;
       }
     }
@@ -2253,7 +2228,6 @@ CHECK_SIMPLE_CLAUSE(CancellationConstructType, OMPC_cancellation_construct_type)
 CHECK_SIMPLE_CLAUSE(Doacross, OMPC_doacross)
 CHECK_SIMPLE_CLAUSE(OmpxAttribute, OMPC_ompx_attribute)
 CHECK_SIMPLE_CLAUSE(OmpxBare, OMPC_ompx_bare)
-CHECK_SIMPLE_CLAUSE(Enter, OMPC_enter)
 
 CHECK_REQ_SCALAR_INT_CLAUSE(Grainsize, OMPC_grainsize)
 CHECK_REQ_SCALAR_INT_CLAUSE(NumTasks, OMPC_num_tasks)
@@ -3255,13 +3229,12 @@ const parser::OmpObjectList *OmpStructureChecker::GetOmpObjectList(
     const parser::OmpClause &clause) {
 
   // Clauses with OmpObjectList as its data member
-  using MemberObjectListClauses =
-      std::tuple<parser::OmpClause::Copyprivate, parser::OmpClause::Copyin,
-          parser::OmpClause::Firstprivate, parser::OmpClause::From,
-          parser::OmpClause::Lastprivate, parser::OmpClause::Link,
-          parser::OmpClause::Private, parser::OmpClause::Shared,
-          parser::OmpClause::To, parser::OmpClause::Enter,
-          parser::OmpClause::UseDevicePtr, parser::OmpClause::UseDeviceAddr>;
+  using MemberObjectListClauses = std::tuple<parser::OmpClause::Copyprivate,
+      parser::OmpClause::Copyin, parser::OmpClause::Firstprivate,
+      parser::OmpClause::From, parser::OmpClause::Lastprivate,
+      parser::OmpClause::Link, parser::OmpClause::Private,
+      parser::OmpClause::Shared, parser::OmpClause::To,
+      parser::OmpClause::UseDevicePtr, parser::OmpClause::UseDeviceAddr>;
 
   // Clauses with OmpObjectList in the tuple
   using TupleObjectListClauses =
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 90e5c9f..d35602c 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -80,8 +80,6 @@ public:
   void Enter(const parser::OpenMPDeclareTargetConstruct &);
   void Leave(const parser::OpenMPDeclareTargetConstruct &);
   void Enter(const parser::OmpDeclareTargetWithList &);
-  void Enter(const parser::OmpDeclareTargetWithClause &);
-  void Leave(const parser::OmpDeclareTargetWithClause &);
   void Enter(const parser::OpenMPExecutableAllocate &);
   void Leave(const parser::OpenMPExecutableAllocate &);
   void Enter(const parser::OpenMPAllocatorsConstruct &);
diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp
index 882ecad..bbb105e 100644
--- a/flang/lib/Semantics/resolve-directives.cpp
+++ b/flang/lib/Semantics/resolve-directives.cpp
@@ -1744,9 +1744,6 @@ bool OmpAttributeVisitor::Pre(const parser::OpenMPDeclareTargetConstruct &x) {
       } else if (const auto *linkClause{
                      std::get_if<parser::OmpClause::Link>(&clause.u)}) {
         ResolveOmpObjectList(linkClause->v, Symbol::Flag::OmpDeclareTarget);
-      } else if (const auto *enterClause{
-                     std::get_if<parser::OmpClause::Enter>(&clause.u)}) {
-        ResolveOmpObjectList(enterClause->v, Symbol::Flag::OmpDeclareTarget);
       }
     }
   }
diff --git a/flang/test/Lower/OpenMP/FIR/declare-target-data.f90 b/flang/test/Lower/OpenMP/FIR/declare-target-data.f90
index bb3bbc8..e57d928 100644
--- a/flang/test/Lower/OpenMP/FIR/declare-target-data.f90
+++ b/flang/test/Lower/OpenMP/FIR/declare-target-data.f90
@@ -32,10 +32,6 @@ INTEGER, POINTER :: pt2 => pt2_tar
 INTEGER :: data_int_to = 5
 !$omp declare target to(data_int_to)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_int_enter {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : i32
-INTEGER :: data_int_enter = 5
-!$omp declare target enter(data_int_enter)
-
 !CHECK-DAG: fir.global @_QMtest_0Edata_int_clauseless {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : i32
 INTEGER :: data_int_clauseless = 1
 !$omp declare target(data_int_clauseless)
@@ -46,12 +42,6 @@ REAL :: data_extended_to_1 = 2
 REAL :: data_extended_to_2 = 3
 !$omp declare target to(data_extended_to_1, data_extended_to_2)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
-REAL :: data_extended_enter_1 = 2
-REAL :: data_extended_enter_2 = 3
-!$omp declare target enter(data_extended_enter_1, data_extended_enter_2)
-
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 REAL :: data_extended_link_1 = 2
@@ -79,10 +69,4 @@ PROGRAM commons
     REAL :: two_to = 2
     COMMON /numbers_to/ one_to, two_to
     !$omp declare target to(/numbers_to/)
-
-    !CHECK-DAG: fir.global @numbers_enter_ {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : tuple<f32, f32> {
-    REAL :: one_enter = 1
-    REAL :: two_enter = 2
-    COMMON /numbers_enter/ one_enter, two_enter
-    !$omp declare target enter(/numbers_enter/)
 END
diff --git a/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90
index 36d4d7d..26741c6 100644
--- a/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90
+++ b/flang/test/Lower/OpenMP/FIR/declare-target-func-and-subr.f90
@@ -13,14 +13,6 @@ FUNCTION FUNC_T_DEVICE() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_DEVICE
 
-! DEVICE-LABEL: func.func @_QPfunc_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
-!$omp declare target enter(FUNC_ENTER_DEVICE) device_type(nohost)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_ENTER_DEVICE
-
 ! HOST-LABEL: func.func @_QPfunc_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_HOST() RESULT(I)
@@ -29,14 +21,6 @@ FUNCTION FUNC_T_HOST() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_HOST
 
-! HOST-LABEL: func.func @_QPfunc_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_ENTER_HOST() RESULT(I)
-!$omp declare target enter(FUNC_ENTER_HOST) device_type(host)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_ENTER_HOST
-
 ! ALL-LABEL: func.func @_QPfunc_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_ANY() RESULT(I)
@@ -45,14 +29,6 @@ FUNCTION FUNC_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_ANY
 
-! ALL-LABEL: func.func @_QPfunc_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_ENTER_ANY() RESULT(I)
-!$omp declare target enter(FUNC_ENTER_ANY) device_type(any)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_ENTER_ANY
-
 ! ALL-LABEL: func.func @_QPfunc_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
@@ -61,14 +37,6 @@ FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_DEFAULT_T_ANY
 
-! ALL-LABEL: func.func @_QPfunc_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
-!$omp declare target enter(FUNC_DEFAULT_ENTER_ANY)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_DEFAULT_ENTER_ANY
-
 ! ALL-LABEL: func.func @_QPfunc_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
@@ -97,48 +65,24 @@ SUBROUTINE SUBR_T_DEVICE()
 !$omp declare target to(SUBR_T_DEVICE) device_type(nohost)
 END
 
-! DEVICE-LABEL: func.func @_QPsubr_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_ENTER_DEVICE()
-!$omp declare target enter(SUBR_ENTER_DEVICE) device_type(nohost)
-END
-
 ! HOST-LABEL: func.func @_QPsubr_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_HOST()
 !$omp declare target to(SUBR_T_HOST) device_type(host)
 END
 
-! HOST-LABEL: func.func @_QPsubr_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_ENTER_HOST()
-!$omp declare target enter(SUBR_ENTER_HOST) device_type(host)
-END
-
 ! ALL-LABEL: func.func @_QPsubr_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_ANY()
 !$omp declare target to(SUBR_T_ANY) device_type(any)
 END
 
-! ALL-LABEL: func.func @_QPsubr_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_ENTER_ANY()
-!$omp declare target enter(SUBR_ENTER_ANY) device_type(any)
-END
-
 ! ALL-LABEL: func.func @_QPsubr_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_T_ANY()
 !$omp declare target to(SUBR_DEFAULT_T_ANY)
 END
 
-! ALL-LABEL: func.func @_QPsubr_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_DEFAULT_ENTER_ANY()
-!$omp declare target enter(SUBR_DEFAULT_ENTER_ANY)
-END
-
 ! ALL-LABEL: func.func @_QPsubr_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_ANY()
@@ -164,15 +108,3 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
         K = RECURSIVE_DECLARE_TARGET(INCREMENT + 1)
     END IF
 END FUNCTION RECURSIVE_DECLARE_TARGET
-
-! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
-RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K)
-!$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost)
-    INTEGER :: INCREMENT, K
-    IF (INCREMENT == 10) THEN
-        K = INCREMENT
-    ELSE
-        K = RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT + 1)
-    END IF
-END FUNCTION RECURSIVE_DECLARE_TARGET_ENTER
diff --git a/flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90
deleted file mode 100644
index 8e88d1b0..0000000
--- a/flang/test/Lower/OpenMP/FIR/declare-target-implicit-func-and-subr-cap-enter.f90
+++ /dev/null
@@ -1,192 +0,0 @@
-!RUN: %flang_fc1 -emit-fir -fopenmp %s -o - | FileCheck %s
-!RUN: %flang_fc1 -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s  --check-prefix=DEVICE
-!RUN: bbc -emit-fir -fopenmp %s -o - | FileCheck %s
-!RUN: bbc -emit-fir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_twice() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_twice
-
-! CHECK-LABEL: func.func @_QPtarget_function_twice_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
-function target_function_twice_host() result(i)
-!$omp declare target enter(target_function_twice_host) device_type(host)
-   integer :: i
-   i = implicitly_captured_twice()
-end function target_function_twice_host
-
-! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_twice_device() result(i)
-!$omp declare target enter(target_function_twice_device) device_type(nohost)
-   integer :: i
-   i = implicitly_captured_twice()
-end function target_function_twice_device
-
-!! -----
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_nest() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_nest
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_one
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter){{.*}}}
-function implicitly_captured_one() result(k)
-   k = implicitly_captured_nest()
-end function implicitly_captured_one
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_two() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_two
-
-! DEVICE-LABEL: func.func @_QPtarget_function_test
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_test() result(j)
-!$omp declare target enter(target_function_test) device_type(nohost)
-   integer :: i, j
-   i = implicitly_captured_one()
-   j = implicitly_captured_two() + i
-end function target_function_test
-
-!! -----
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_nest_twice() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_nest_twice
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_one_twice() result(k)
-   k = implicitly_captured_nest_twice()
-end function implicitly_captured_one_twice
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_two_twice() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_two_twice
-
-! DEVICE-LABEL: func.func @_QPtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_test_device() result(j)
-   !$omp declare target enter(target_function_test_device) device_type(nohost)
-   integer :: i, j
-   i = implicitly_captured_one_twice()
-   j = implicitly_captured_two_twice() + i
-end function target_function_test_device
-
-! CHECK-LABEL: func.func @_QPtarget_function_test_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
-function target_function_test_host() result(j)
-   !$omp declare target enter(target_function_test_host) device_type(host)
-   integer :: i, j
-   i = implicitly_captured_one_twice()
-   j = implicitly_captured_two_twice() + i
-end function target_function_test_host
-
-!! -----
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
-!$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host)
-   integer :: increment, k
-   if (increment == 10) then
-      k = increment
-   else
-      k = implicitly_captured_with_dev_type_recursive(increment + 1)
-   end if
-end function implicitly_captured_with_dev_type_recursive
-
-! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_with_dev_type_recurse() result(i)
-!$omp declare target enter(target_function_with_dev_type_recurse) device_type(nohost)
-   integer :: i
-   i = implicitly_captured_with_dev_type_recursive(0)
-end function target_function_with_dev_type_recurse
-
-!! -----
-
-module test_module
-contains
-! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-   function implicitly_captured_nest_twice() result(i)
-      integer :: i
-      i = 10
-   end function implicitly_captured_nest_twice
-
-! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-   function implicitly_captured_one_twice() result(k)
-      !$omp declare target enter(implicitly_captured_one_twice) device_type(host)
-      k = implicitly_captured_nest_twice()
-   end function implicitly_captured_one_twice
-
-! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-   function implicitly_captured_two_twice() result(y)
-      integer :: y
-      y = 5
-   end function implicitly_captured_two_twice
-
-! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-   function target_function_test_device() result(j)
-      !$omp declare target enter(target_function_test_device) device_type(nohost)
-      integer :: i, j
-      i = implicitly_captured_one_twice()
-      j = implicitly_captured_two_twice() + i
-   end function target_function_test_device
-end module test_module
-
-!! -----
-
-program mb
-   interface
-      subroutine caller_recursive
-         !$omp declare target enter(caller_recursive) device_type(nohost)
-      end subroutine
-
-      recursive subroutine implicitly_captured_recursive(increment)
-         integer :: increment
-      end subroutine
-   end interface
-end program
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-recursive subroutine implicitly_captured_recursive(increment)
-   integer :: increment
-   if (increment == 10) then
-      return
-   else
-      call implicitly_captured_recursive(increment + 1)
-   end if
-end subroutine
-
-! DEVICE-LABEL: func.func @_QPcaller_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-subroutine caller_recursive
-!$omp declare target enter(caller_recursive) device_type(nohost)
-   call implicitly_captured_recursive(0)
-end subroutine
diff --git a/flang/test/Lower/OpenMP/declare-target-data.f90 b/flang/test/Lower/OpenMP/declare-target-data.f90
index 4568810..e74f847 100644
--- a/flang/test/Lower/OpenMP/declare-target-data.f90
+++ b/flang/test/Lower/OpenMP/declare-target-data.f90
@@ -32,10 +32,6 @@ INTEGER, POINTER :: pt2 => pt2_tar
 INTEGER :: data_int_to = 5
 !$omp declare target to(data_int_to)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_int_enter {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : i32
-INTEGER :: data_int_enter = 5
-!$omp declare target enter(data_int_enter)
-
 !CHECK-DAG: fir.global @_QMtest_0Edata_int_clauseless {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>} : i32
 INTEGER :: data_int_clauseless = 1
 !$omp declare target(data_int_clauseless)
@@ -46,12 +42,6 @@ REAL :: data_extended_to_1 = 2
 REAL :: data_extended_to_2 = 3
 !$omp declare target to(data_extended_to_1, data_extended_to_2)
 
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
-!CHECK-DAG: fir.global @_QMtest_0Edata_extended_enter_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : f32
-REAL :: data_extended_enter_1 = 2
-REAL :: data_extended_enter_2 = 3
-!$omp declare target enter(data_extended_enter_1, data_extended_enter_2)
-
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_1 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 !CHECK-DAG: fir.global @_QMtest_0Edata_extended_link_2 {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (link)>} : f32
 REAL :: data_extended_link_1 = 2
@@ -79,10 +69,4 @@ PROGRAM commons
     REAL :: two_to = 2
     COMMON /numbers_to/ one_to, two_to
     !$omp declare target to(/numbers_to/)
-
-    !CHECK-DAG: fir.global @numbers_enter_ {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>} : tuple<f32, f32> {
-    REAL :: one_enter = 1
-    REAL :: two_enter = 2
-    COMMON /numbers_enter/ one_enter, two_enter
-    !$omp declare target enter(/numbers_enter/)
 END
diff --git a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90 b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
index 3d2c406..a5cdfca8 100644
--- a/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
+++ b/flang/test/Lower/OpenMP/declare-target-func-and-subr.f90
@@ -13,14 +13,6 @@ FUNCTION FUNC_T_DEVICE() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_DEVICE
 
-! DEVICE-LABEL: func.func @_QPfunc_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_ENTER_DEVICE() RESULT(I)
-!$omp declare target enter(FUNC_ENTER_DEVICE) device_type(nohost)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_ENTER_DEVICE
-
 ! HOST-LABEL: func.func @_QPfunc_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_HOST() RESULT(I)
@@ -29,14 +21,6 @@ FUNCTION FUNC_T_HOST() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_HOST
 
-! HOST-LABEL: func.func @_QPfunc_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_ENTER_HOST() RESULT(I)
-!$omp declare target enter(FUNC_ENTER_HOST) device_type(host)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_ENTER_HOST
-
 ! ALL-LABEL: func.func @_QPfunc_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_T_ANY() RESULT(I)
@@ -45,14 +29,6 @@ FUNCTION FUNC_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_T_ANY
 
-! ALL-LABEL: func.func @_QPfunc_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_ENTER_ANY() RESULT(I)
-!$omp declare target enter(FUNC_ENTER_ANY) device_type(any)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_ENTER_ANY
-
 ! ALL-LABEL: func.func @_QPfunc_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
@@ -61,14 +37,6 @@ FUNCTION FUNC_DEFAULT_T_ANY() RESULT(I)
     I = 1
 END FUNCTION FUNC_DEFAULT_T_ANY
 
-! ALL-LABEL: func.func @_QPfunc_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-FUNCTION FUNC_DEFAULT_ENTER_ANY() RESULT(I)
-!$omp declare target enter(FUNC_DEFAULT_ENTER_ANY)
-    INTEGER :: I
-    I = 1
-END FUNCTION FUNC_DEFAULT_ENTER_ANY
-
 ! ALL-LABEL: func.func @_QPfunc_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 FUNCTION FUNC_DEFAULT_ANY() RESULT(I)
@@ -97,48 +65,24 @@ SUBROUTINE SUBR_T_DEVICE()
 !$omp declare target to(SUBR_T_DEVICE) device_type(nohost)
 END
 
-! DEVICE-LABEL: func.func @_QPsubr_enter_device()
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_ENTER_DEVICE()
-!$omp declare target enter(SUBR_ENTER_DEVICE) device_type(nohost)
-END
-
 ! HOST-LABEL: func.func @_QPsubr_t_host()
 ! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_HOST()
 !$omp declare target to(SUBR_T_HOST) device_type(host)
 END
 
-! HOST-LABEL: func.func @_QPsubr_enter_host()
-! HOST-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_ENTER_HOST()
-!$omp declare target enter(SUBR_ENTER_HOST) device_type(host)
-END
-
 ! ALL-LABEL: func.func @_QPsubr_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_T_ANY()
 !$omp declare target to(SUBR_T_ANY) device_type(any)
 END
 
-! ALL-LABEL: func.func @_QPsubr_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_ENTER_ANY()
-!$omp declare target enter(SUBR_ENTER_ANY) device_type(any)
-END
-
 ! ALL-LABEL: func.func @_QPsubr_default_t_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_T_ANY()
 !$omp declare target to(SUBR_DEFAULT_T_ANY)
 END
 
-! ALL-LABEL: func.func @_QPsubr_default_enter_any()
-! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}
-SUBROUTINE SUBR_DEFAULT_ENTER_ANY()
-!$omp declare target enter(SUBR_DEFAULT_ENTER_ANY)
-END
-
 ! ALL-LABEL: func.func @_QPsubr_default_any()
 ! ALL-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (to)>{{.*}}
 SUBROUTINE SUBR_DEFAULT_ANY()
@@ -164,15 +108,3 @@ RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET(INCREMENT) RESULT(K)
         K = RECURSIVE_DECLARE_TARGET(INCREMENT + 1)
     END IF
 END FUNCTION RECURSIVE_DECLARE_TARGET
-
-! DEVICE-LABEL: func.func @_QPrecursive_declare_target_enter
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}
-RECURSIVE FUNCTION RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT) RESULT(K)
-!$omp declare target enter(RECURSIVE_DECLARE_TARGET_ENTER) device_type(nohost)
-    INTEGER :: INCREMENT, K
-    IF (INCREMENT == 10) THEN
-        K = INCREMENT
-    ELSE
-        K = RECURSIVE_DECLARE_TARGET_ENTER(INCREMENT + 1)
-    END IF
-END FUNCTION RECURSIVE_DECLARE_TARGET_ENTER
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
deleted file mode 100644
index ed718a4..0000000
--- a/flang/test/Lower/OpenMP/declare-target-implicit-func-and-subr-cap-enter.f90
+++ /dev/null
@@ -1,192 +0,0 @@
-!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
-!RUN: %flang_fc1 -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s  --check-prefix=DEVICE
-!RUN: bbc -emit-hlfir -fopenmp %s -o - | FileCheck %s
-!RUN: bbc -emit-hlfir -fopenmp -fopenmp-is-target-device %s -o - | FileCheck %s --check-prefix=DEVICE
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_twice() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_twice
-
-! CHECK-LABEL: func.func @_QPtarget_function_twice_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
-function target_function_twice_host() result(i)
-!$omp declare target enter(target_function_twice_host) device_type(host)
-   integer :: i
-   i = implicitly_captured_twice()
-end function target_function_twice_host
-
-! DEVICE-LABEL: func.func @_QPtarget_function_twice_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_twice_device() result(i)
-!$omp declare target enter(target_function_twice_device) device_type(nohost)
-   integer :: i
-   i = implicitly_captured_twice()
-end function target_function_twice_device
-
-!! -----
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_nest
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_nest() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_nest
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_one
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter){{.*}}}
-function implicitly_captured_one() result(k)
-   k = implicitly_captured_nest()
-end function implicitly_captured_one
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_two
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_two() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_two
-
-! DEVICE-LABEL: func.func @_QPtarget_function_test
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_test() result(j)
-!$omp declare target enter(target_function_test) device_type(nohost)
-   integer :: i, j
-   i = implicitly_captured_one()
-   j = implicitly_captured_two() + i
-end function target_function_test
-
-!! -----
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_nest_twice() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_nest_twice
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_one_twice() result(k)
-   k = implicitly_captured_nest_twice()
-end function implicitly_captured_one_twice
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_two_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_two_twice() result(k)
-   integer :: i
-   i = 10
-   k = i
-end function implicitly_captured_two_twice
-
-! DEVICE-LABEL: func.func @_QPtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_test_device() result(j)
-   !$omp declare target enter(target_function_test_device) device_type(nohost)
-   integer :: i, j
-   i = implicitly_captured_one_twice()
-   j = implicitly_captured_two_twice() + i
-end function target_function_test_device
-
-! CHECK-LABEL: func.func @_QPtarget_function_test_host
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
-function target_function_test_host() result(j)
-   !$omp declare target enter(target_function_test_host) device_type(host)
-   integer :: i, j
-   i = implicitly_captured_one_twice()
-   j = implicitly_captured_two_twice() + i
-end function target_function_test_host
-
-!! -----
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_with_dev_type_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-recursive function implicitly_captured_with_dev_type_recursive(increment) result(k)
-!$omp declare target enter(implicitly_captured_with_dev_type_recursive) device_type(host)
-   integer :: increment, k
-   if (increment == 10) then
-      k = increment
-   else
-      k = implicitly_captured_with_dev_type_recursive(increment + 1)
-   end if
-end function implicitly_captured_with_dev_type_recursive
-
-! DEVICE-LABEL: func.func @_QPtarget_function_with_dev_type_recurse
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-function target_function_with_dev_type_recurse() result(i)
-!$omp declare target enter(target_function_with_dev_type_recurse) device_type(nohost)
-   integer :: i
-   i = implicitly_captured_with_dev_type_recursive(0)
-end function target_function_with_dev_type_recurse
-
-!! -----
-
-module test_module
-contains
-! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_nest_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-   function implicitly_captured_nest_twice() result(i)
-      integer :: i
-      i = 10
-   end function implicitly_captured_nest_twice
-
-! CHECK-LABEL: func.func @_QMtest_modulePimplicitly_captured_one_twice
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (any), capture_clause = (enter)>{{.*}}}
-   function implicitly_captured_one_twice() result(k)
-      !$omp declare target enter(implicitly_captured_one_twice) device_type(host)
-      k = implicitly_captured_nest_twice()
-   end function implicitly_captured_one_twice
-
-! DEVICE-LABEL: func.func @_QMtest_modulePimplicitly_captured_two_twice
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-   function implicitly_captured_two_twice() result(y)
-      integer :: y
-      y = 5
-   end function implicitly_captured_two_twice
-
-! DEVICE-LABEL: func.func @_QMtest_modulePtarget_function_test_device
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-   function target_function_test_device() result(j)
-      !$omp declare target enter(target_function_test_device) device_type(nohost)
-      integer :: i, j
-      i = implicitly_captured_one_twice()
-      j = implicitly_captured_two_twice() + i
-   end function target_function_test_device
-end module test_module
-
-!! -----
-
-program mb
-   interface
-      subroutine caller_recursive
-         !$omp declare target enter(caller_recursive) device_type(nohost)
-      end subroutine
-
-      recursive subroutine implicitly_captured_recursive(increment)
-         integer :: increment
-      end subroutine
-   end interface
-end program
-
-! DEVICE-LABEL: func.func @_QPimplicitly_captured_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-recursive subroutine implicitly_captured_recursive(increment)
-   integer :: increment
-   if (increment == 10) then
-      return
-   else
-      call implicitly_captured_recursive(increment + 1)
-   end if
-end subroutine
-
-! DEVICE-LABEL: func.func @_QPcaller_recursive
-! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>{{.*}}}
-subroutine caller_recursive
-!$omp declare target enter(caller_recursive) device_type(nohost)
-   call implicitly_captured_recursive(0)
-end subroutine
diff --git a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90 b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
index 7d1ae06..0f54860 100644
--- a/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
+++ b/flang/test/Lower/OpenMP/declare-target-implicit-tarop-cap.f90
@@ -34,20 +34,6 @@ function implicitly_captured_one_twice() result(k)
    k = implicitly_captured_nest_twice()
 end function implicitly_captured_one_twice
 
-! CHECK-LABEL: func.func @_QPimplicitly_captured_nest_twice_enter
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_nest_twice_enter() result(i)
-   integer :: i
-   i = 10
-end function implicitly_captured_nest_twice_enter
-
-! CHECK-LABEL: func.func @_QPimplicitly_captured_one_twice_enter
-! CHECK-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (enter)>{{.*}}}
-function implicitly_captured_one_twice_enter() result(k)
-!$omp declare target enter(implicitly_captured_one_twice_enter) device_type(host)
-   k = implicitly_captured_nest_twice_enter()
-end function implicitly_captured_one_twice_enter
-
 ! DEVICE-LABEL: func.func @_QPimplicitly_captured_two_twice
 ! DEVICE-SAME: {{.*}}attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>{{.*}}}
 function implicitly_captured_two_twice() result(y)
diff --git a/flang/test/Lower/OpenMP/function-filtering-2.f90 b/flang/test/Lower/OpenMP/function-filtering-2.f90
index e1d0f72..17cd0d4 100644
--- a/flang/test/Lower/OpenMP/function-filtering-2.f90
+++ b/flang/test/Lower/OpenMP/function-filtering-2.f90
@@ -19,14 +19,6 @@ subroutine declaretarget()
     call implicit_invocation()
 end subroutine declaretarget
 
-! MLIR: func.func @{{.*}}declaretarget_enter() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (enter)>}
-! MLIR: return
-! LLVM: define {{.*}} @{{.*}}declaretarget_enter{{.*}}(
-subroutine declaretarget_enter()
-!$omp declare target enter(declaretarget_enter) device_type(nohost)
-    call implicit_invocation()
-end subroutine declaretarget_enter
-
 ! MLIR: func.func @{{.*}}no_declaretarget() attributes {omp.declare_target = #omp.declaretarget<device_type = (nohost), capture_clause = (to)>}
 ! MLIR: return
 ! LLVM: define {{.*}} @{{.*}}no_declaretarget{{.*}}(
diff --git a/flang/test/Lower/OpenMP/function-filtering.f90 b/flang/test/Lower/OpenMP/function-filtering.f90
index c473b99..45d8c2e2 100644
--- a/flang/test/Lower/OpenMP/function-filtering.f90
+++ b/flang/test/Lower/OpenMP/function-filtering.f90
@@ -19,16 +19,6 @@ function device_fn() result(x)
   x = 10
 end function device_fn
 
-! MLIR-ALL: func.func @{{.*}}device_fn_enter(
-! MLIR-ALL: return
-
-! LLVM-ALL: define {{.*}} @{{.*}}device_fn_enter{{.*}}(
-function device_fn_enter() result(x)
-  !$omp declare target enter(device_fn_enter) device_type(nohost)
-  integer :: x
-  x = 10
-end function device_fn_enter
-
 ! MLIR-HOST: func.func @{{.*}}host_fn(
 ! MLIR-HOST: return
 ! MLIR-DEVICE-NOT: func.func {{.*}}host_fn(
@@ -41,14 +31,6 @@ function host_fn() result(x)
   x = 10
 end function host_fn
 
-! LLVM-HOST: define {{.*}} @{{.*}}host_fn_enter{{.*}}(
-! LLVM-DEVICE-NOT: {{.*}} @{{.*}}host_fn_enter{{.*}}(
-function host_fn_enter() result(x)
-  !$omp declare target enter(host_fn_enter) device_type(host)
-  integer :: x
-  x = 10
-end function host_fn_enter
-
 ! MLIR-ALL: func.func @{{.*}}target_subr(
 ! MLIR-ALL: return
 
diff --git a/flang/test/Parser/OpenMP/declare_target-device_type.f90 b/flang/test/Parser/OpenMP/declare_target-device_type.f90
index 0b4f75e..2f6b68a 100644
--- a/flang/test/Parser/OpenMP/declare_target-device_type.f90
+++ b/flang/test/Parser/OpenMP/declare_target-device_type.f90
@@ -2,27 +2,12 @@
 ! RUN: %flang_fc1 -fdebug-dump-parse-tree -fopenmp %s | FileCheck --check-prefix="PARSE-TREE" %s
 
 subroutine openmp_declare_target
-    integer, save :: x, y
-    !CHECK: !$omp declare target device_type(host) enter(x)
-    !$omp declare target device_type(host) enter(x)
-    !CHECK: !$omp declare target device_type(nohost) enter(x)
-    !$omp declare target device_type(nohost) enter(x)
-    !CHECK: !$omp declare target device_type(any) enter(x)
-    !$omp declare target device_type(any) enter(x)
-
-    !CHECK: !$omp declare target device_type(host) to(x)
-    !$omp declare target device_type(host) to(x)
-    !CHECK: !$omp declare target device_type(nohost) to(x)
-    !$omp declare target device_type(nohost) to(x)
-    !CHECK: !$omp declare target device_type(any) to(x)
-    !$omp declare target device_type(any) to(x)
-
-    !CHECK: !$omp declare target device_type(host) enter(y) to(x)
-    !$omp declare target device_type(host) enter(y) to(x)
-    !CHECK: !$omp declare target device_type(nohost) enter(y) to(x)
-    !$omp declare target device_type(nohost) enter(y) to(x)
-    !CHECK: !$omp declare target device_type(any) enter(y) to(x)
-    !$omp declare target device_type(any) enter(y) to(x)
+    !CHECK: !$omp declare target device_type(host)
+    !$omp declare target device_type(host)
+    !CHECK: !$omp declare target device_type(nohost)
+    !$omp declare target device_type(nohost)
+    !CHECK: !$omp declare target device_type(any)
+    !$omp declare target device_type(any)
     integer :: a(1024), i
     !CHECK: do
     do i = 1, 1024
diff --git a/flang/test/Semantics/OpenMP/declarative-directive.f90 b/flang/test/Semantics/OpenMP/declarative-directive.f90
index 4d10dc2..e48a682 100644
--- a/flang/test/Semantics/OpenMP/declarative-directive.f90
+++ b/flang/test/Semantics/OpenMP/declarative-directive.f90
@@ -61,22 +61,12 @@ contains
     !WARNING: The entity with PARAMETER attribute is used in a DECLARE TARGET directive
     !WARNING: The entity with PARAMETER attribute is used in a DECLARE TARGET directive
     !$omp declare target (foo, N, M)
-    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target to(Q, S) link(R)
-    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
-    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
-    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
-    !$omp declare target enter(Q, S) link(R)
-    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
-    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
-    !ERROR: MAP clause is not allowed on the DECLARE TARGET directive
-    !$omp declare target to(Q) map(from:Q)
-    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !ERROR: MAP clause is not allowed on the DECLARE TARGET directive
-    !$omp declare target enter(Q) map(from:Q)
+    !$omp declare target map(from:Q)
     integer, parameter :: N=10000, M=1024
     integer :: i
     real :: Q(N, N), R(N,M), S(M,M)
diff --git a/flang/test/Semantics/OpenMP/declare-target01.f90 b/flang/test/Semantics/OpenMP/declare-target01.f90
index 3168781..862b4c5 100644
--- a/flang/test/Semantics/OpenMP/declare-target01.f90
+++ b/flang/test/Semantics/OpenMP/declare-target01.f90
@@ -49,89 +49,41 @@ module declare_target01
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
   !$omp declare target (y%KIND)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var)
 
-  !$omp declare target enter (my_var)
-
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var) device_type(host)
-  
-  !$omp declare target enter (my_var) device_type(host)
 
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%t_i)
 
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (my_var%t_i)
-
-  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%t_arr)
 
-  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (my_var%t_arr)
-
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%kind_param)
 
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (my_var%kind_param)
-
-  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (my_var%len_param)
 
-  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (my_var%len_param)
-
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr)
 
-  !$omp declare target enter (arr)
-
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr) device_type(nohost)
 
-  !$omp declare target enter (arr) device_type(nohost)
-
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr(1))
 
   !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (arr(1))
-
-  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr(1:2))
 
-  !ERROR: A variable that is part of another variable (as an array or structure element) cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (arr(1:2))
-
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (x%KIND)
 
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (x%KIND)
-
-  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (w%LEN)
 
   !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (w%LEN)
-
-  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (y%KIND)
 
-  !ERROR: A type parameter inquiry cannot appear on the DECLARE TARGET directive
-  !$omp declare target enter (y%KIND)
-
   !$omp declare target link (my_var2)
 
   !$omp declare target link (my_var2) device_type(any)
diff --git a/flang/test/Semantics/OpenMP/declare-target02.f90 b/flang/test/Semantics/OpenMP/declare-target02.f90
index 8166e10..57a2776 100644
--- a/flang/test/Semantics/OpenMP/declare-target02.f90
+++ b/flang/test/Semantics/OpenMP/declare-target02.f90
@@ -16,23 +16,13 @@ program declare_target02
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target (a1)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr1_to)
 
-  !$omp declare target enter (arr1_to)
-
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (blk1_to)
 
-  !$omp declare target enter (blk1_to)
-
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target to (a1_to)
 
-  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
-  !$omp declare target enter (a1_to)
-
   !$omp declare target link (arr1_link)
 
   !$omp declare target link (blk1_link)
@@ -44,27 +34,19 @@ program declare_target02
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target (eq_a)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target to (eq_a)
 
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
-  !$omp declare target enter (eq_a)
-
-  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target link (eq_b)
 
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target (eq_c)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target to (eq_c)
 
   !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
-  !$omp declare target enter (eq_c)
-
-  !ERROR: A variable in a DECLARE TARGET directive cannot appear in an EQUIVALENCE statement
   !$omp declare target link (eq_d)
   equivalence(eq_c, eq_d)
 
@@ -87,32 +69,17 @@ contains
     !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
     !$omp declare target (a3)
 
-    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target to (arr2_to)
 
-    !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
-    !$omp declare target enter (arr2_to)
-
-    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !$omp declare target to (arr3_to)
 
-    !$omp declare target enter (arr3_to)
-
-    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
     !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
     !$omp declare target to (a2_to)
 
     !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
-    !$omp declare target enter (a2_to)
-
-    !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
-    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
     !$omp declare target to (a3_to)
 
-    !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
-    !$omp declare target enter (a3_to)
-
     !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
     !$omp declare target link (arr2_link)
 
@@ -137,22 +104,13 @@ module mod4
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target (a4)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (arr4_to)
 
-  !$omp declare target enter (arr4_to)
-
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to (blk4_to)
-  !$omp declare target enter (blk4_to)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target to (a4_to)
 
-  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
-  !$omp declare target enter (a4_to)
-
   !$omp declare target link (arr4_link)
 
   !$omp declare target link (blk4_link)
@@ -174,27 +132,15 @@ subroutine func5()
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target (a5)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
   !$omp declare target to (arr5_to)
-  
-  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
-  !$omp declare target enter (arr5_to)
 
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
   !$omp declare target to (blk5_to)
 
-  !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
-  !$omp declare target enter (blk5_to)
-
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
   !$omp declare target to (a5_to)
 
-  !ERROR: A variable in a DECLARE TARGET directive cannot be an element of a common block
-  !$omp declare target enter (a5_to)
-
   !ERROR: A variable that appears in a DECLARE TARGET directive must be declared in the scope of a module or have the SAVE attribute, either explicitly or implicitly
   !$omp declare target link (arr5_link)
 
diff --git a/flang/test/Semantics/OpenMP/declare-target06.f90 b/flang/test/Semantics/OpenMP/declare-target06.f90
index a1c55d3..a2d8263 100644
--- a/flang/test/Semantics/OpenMP/declare-target06.f90
+++ b/flang/test/Semantics/OpenMP/declare-target06.f90
@@ -15,14 +15,9 @@ module test_0
 !$omp declare target link(no_implicit_materialization_2)
 
 !ERROR: The given DECLARE TARGET directive clause has an invalid argument
-!WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
 !ERROR: No explicit type declared for 'no_implicit_materialization_3'
 !$omp declare target to(no_implicit_materialization_3)
 
-!ERROR: The given DECLARE TARGET directive clause has an invalid argument
-!ERROR: No explicit type declared for 'no_implicit_materialization_3'
-!$omp declare target enter(no_implicit_materialization_3)
-
 INTEGER :: data_int = 10
 !$omp declare target(data_int)
 end module test_0
diff --git a/flang/test/Semantics/OpenMP/requires04.f90 b/flang/test/Semantics/OpenMP/requires04.f90
index bb4101c..ae1c2c0 100644
--- a/flang/test/Semantics/OpenMP/requires04.f90
+++ b/flang/test/Semantics/OpenMP/requires04.f90
@@ -5,10 +5,7 @@
 ! device constructs, such as declare target with device_type=nohost|any.
 
 subroutine f
-  integer, save :: x
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
-  !$omp declare target to(x) device_type(nohost)
-  !$omp declare target enter(x) device_type(nohost)
+  !$omp declare target device_type(nohost)
 end subroutine f
 
 subroutine g
diff --git a/flang/test/Semantics/OpenMP/requires05.f90 b/flang/test/Semantics/OpenMP/requires05.f90
index dd27e38..9281f8b 100644
--- a/flang/test/Semantics/OpenMP/requires05.f90
+++ b/flang/test/Semantics/OpenMP/requires05.f90
@@ -5,9 +5,7 @@
 ! device constructs, such as declare target with 'to' clause and no device_type.
 
 subroutine f
-  !WARNING: The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead.
   !$omp declare target to(f)
-  !$omp declare target enter(f)
 end subroutine f
 
 subroutine g
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index 8bfacd8..f8b3b0c 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -289,9 +289,6 @@ def OMPC_To : Clause<"to"> {
   let clangClass = "OMPToClause";
   let flangClass = "OmpObjectList";
 }
-def OMPC_Enter : Clause<"enter"> {
-  let flangClass = "OmpObjectList";
-}
 def OMPC_From : Clause<"from"> {
   let clangClass = "OMPFromClause";
   let flangClass = "OmpObjectList";
@@ -1127,8 +1124,7 @@ def OMP_DeclareTarget : Directive<"declare target"> {
   let allowedClauses = [
     VersionedClause<OMPC_To>,
     VersionedClause<OMPC_Link>,
-    VersionedClause<OMPC_Indirect>,
-    VersionedClause<OMPC_Enter, 52>
+    VersionedClause<OMPC_Indirect>
   ];
   let allowedOnceClauses = [
     VersionedClause<OMPC_DeviceType, 50>
-- 
cgit v1.1


From 72b97630bc91efdf7671b08629b08c7b1c371a0b Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Thu, 16 Nov 2023 17:17:59 -0800
Subject: [SLP][NFC]Fix comparison of integers of different signs warning, NFC.

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d7967175..fca96491 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -8564,7 +8564,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const {
   // profitable for the vectorization, we can skip it, if the cost threshold is
   // default. The cost of vectorized PHI nodes is almost always 0 + the cost of
   // gathers/buildvectors.
-  constexpr unsigned Limit = 4;
+  constexpr int Limit = 4;
   if (!ForReduction && !SLPCostThreshold.getNumOccurrences() &&
       !VectorizableTree.empty() &&
       all_of(VectorizableTree, [&](const std::unique_ptr<TreeEntry> &TE) {
-- 
cgit v1.1


From 09ac2ec3ad368f82e6b0814a6427b2cb93591ef6 Mon Sep 17 00:00:00 2001
From: Tacet <4922191+AdvenamTacet@users.noreply.github.com>
Date: Fri, 17 Nov 2023 03:20:12 +0100
Subject: Remove deprecated warning from cmake files (#72595)

`LIBCXXABI_SYSROOT`, `LIBCXXABI_TARGET_TRIPLE` and
`LIBCXXABI_GCC_TOOLCHAIN` are not supported anymore. Based on the
comment, the warning should be removed after branching for LLVM 15.
---
 libcxxabi/CMakeLists.txt | 5 -----
 libunwind/CMakeLists.txt | 5 -----
 2 files changed, 10 deletions(-)

diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 6fd4f02..efe830b 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -88,11 +88,6 @@ else()
   set(LIBCXXABI_GENERATED_INCLUDE_DIR "${CMAKE_BINARY_DIR}/include/c++/v1")
 endif()
 
-# TODO: Remove this after branching for LLVM 15
-if(LIBCXXABI_SYSROOT OR LIBCXXABI_TARGET_TRIPLE OR LIBCXXABI_GCC_TOOLCHAIN)
-  message(WARNING "LIBCXXABI_SYSROOT, LIBCXXABI_TARGET_TRIPLE and LIBCXXABI_GCC_TOOLCHAIN are not supported anymore, please use the native CMake equivalents instead")
-endif()
-
 set(LIBCXXABI_LIBCXX_LIBRARY_PATH "" CACHE PATH "The path to libc++ library.")
 set(LIBCXXABI_LIBRARY_VERSION "1.0" CACHE STRING
 "Version of libc++abi. This will be reflected in the name of the shared \
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 84f8ce2..248e888 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -63,11 +63,6 @@ cmake_dependent_option(LIBUNWIND_INSTALL_SHARED_LIBRARY
   "Install the shared libunwind library." ON
   "LIBUNWIND_ENABLE_SHARED;LIBUNWIND_INSTALL_LIBRARY" OFF)
 
-# TODO: Remove this after branching for LLVM 15
-if(LIBUNWIND_SYSROOT OR LIBUNWIND_TARGET_TRIPLE OR LIBUNWIND_GCC_TOOLCHAIN)
-  message(WARNING "LIBUNWIND_SYSROOT, LIBUNWIND_TARGET_TRIPLE and LIBUNWIND_GCC_TOOLCHAIN are not supported anymore, please use the native CMake equivalents instead")
-endif()
-
 if(MINGW)
   set(LIBUNWIND_DEFAULT_TEST_CONFIG "llvm-libunwind-mingw.cfg.in")
 elseif (LIBUNWIND_ENABLE_SHARED)
-- 
cgit v1.1


From 00da5eb86ed0b86002b0947643f7da72faa4fd42 Mon Sep 17 00:00:00 2001
From: Tacet <4922191+AdvenamTacet@users.noreply.github.com>
Date: Fri, 17 Nov 2023 03:23:07 +0100
Subject: [NFC] Remove outdated comment (#72591)

Special-casing has been already removed.
---
 libcxx/include/deque | 1 -
 1 file changed, 1 deletion(-)

diff --git a/libcxx/include/deque b/libcxx/include/deque
index d2ddce3..1438f1e 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -967,7 +967,6 @@ public:
 // For more details, see the "Using libc++" documentation page or
 // the documentation for __sanitizer_annotate_contiguous_container.
 #if !defined(_LIBCPP_HAS_NO_ASAN)
-    // TODO LLVM18: Remove the special-casing
     _LIBCPP_HIDE_FROM_ABI void __annotate_double_ended_contiguous_container(
         const void* __beg,
         const void* __end,
-- 
cgit v1.1


From b034da7dad150a54661557cc3f712948b1e474e4 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne.2@gmail.com>
Date: Thu, 16 Nov 2023 22:00:37 -0500
Subject: [libc++][NFC] Fix include guard for simd helper header

---
 libcxx/test/std/experimental/simd/test_utils.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/std/experimental/simd/test_utils.h b/libcxx/test/std/experimental/simd/test_utils.h
index a478a43..65ffc73 100644
--- a/libcxx/test/std/experimental/simd/test_utils.h
+++ b/libcxx/test/std/experimental/simd/test_utils.h
@@ -6,8 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef TEST_UTIL_H
-#define TEST_UTIL_H
+#ifndef LIBCXX_TEST_STD_EXPERIMENTAL_SIMD_TEST_UTILS_H
+#define LIBCXX_TEST_STD_EXPERIMENTAL_SIMD_TEST_UTILS_H
 
 #include <algorithm>
 #include <array>
@@ -78,4 +78,4 @@ void assert_simd_mask_values_equal(const ex::simd_mask<T, SimdAbi>& origin_mask,
     assert(origin_mask[i] == expected_value[i]);
 }
 
-#endif // TEST_UTIL_H
+#endif // LIBCXX_TEST_STD_EXPERIMENTAL_SIMD_TEST_UTILS_H
-- 
cgit v1.1


From ae7fb21b5ad434e920fe394587ac13c899d8ee84 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Thu, 16 Nov 2023 20:24:14 -0800
Subject: [ELF] Make some InputSection/InputFile member functions const. NFC

---
 lld/ELF/InputFiles.cpp   | 10 +++++-----
 lld/ELF/InputFiles.h     |  7 ++++---
 lld/ELF/InputSection.cpp | 10 ++++++----
 lld/ELF/InputSection.h   | 10 +++++-----
 4 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index 4b4d7d6..e83cca3 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -348,7 +348,7 @@ static std::string createFileLineMsg(StringRef path, unsigned line) {
 
 template <class ELFT>
 static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
-                                InputSectionBase &sec, uint64_t offset) {
+                                const InputSectionBase &sec, uint64_t offset) {
   // In DWARF, functions and variables are stored to different places.
   // First, look up a function for a given offset.
   if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
@@ -363,7 +363,7 @@ static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
   return std::string(file.sourceFile);
 }
 
-std::string InputFile::getSrcMsg(const Symbol &sym, InputSectionBase &sec,
+std::string InputFile::getSrcMsg(const Symbol &sym, const InputSectionBase &sec,
                                  uint64_t offset) {
   if (kind() != ObjKind)
     return "";
@@ -474,8 +474,8 @@ ObjFile<ELFT>::getVariableLoc(StringRef name) {
 // Returns source line information for a given offset
 // using DWARF debug info.
 template <class ELFT>
-std::optional<DILineInfo> ObjFile<ELFT>::getDILineInfo(InputSectionBase *s,
-                                                       uint64_t offset) {
+std::optional<DILineInfo>
+ObjFile<ELFT>::getDILineInfo(const InputSectionBase *s, uint64_t offset) {
   // Detect SectionIndex for specified section.
   uint64_t sectionIndex = object::SectionedAddress::UndefSection;
   ArrayRef<InputSectionBase *> sections = s->file->getSections();
@@ -1822,7 +1822,7 @@ template <class ELFT> void ObjFile<ELFT>::parseLazy() {
   }
 }
 
-bool InputFile::shouldExtractForCommon(StringRef name) {
+bool InputFile::shouldExtractForCommon(StringRef name) const {
   if (isa<BitcodeFile>(this))
     return isBitcodeNonCommonDef(mb, name, archiveName);
 
diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h
index 41c2ba4..ab98d78 100644
--- a/lld/ELF/InputFiles.h
+++ b/lld/ELF/InputFiles.h
@@ -101,7 +101,7 @@ public:
 
   // Check if a non-common symbol should be extracted to override a common
   // definition.
-  bool shouldExtractForCommon(StringRef name);
+  bool shouldExtractForCommon(StringRef name) const;
 
   // .got2 in the current file. This is used by PPC32 -fPIC/-fPIE to compute
   // offsets in PLT call stubs.
@@ -133,7 +133,7 @@ public:
   // True if this is an argument for --just-symbols. Usually false.
   bool justSymbols = false;
 
-  std::string getSrcMsg(const Symbol &sym, InputSectionBase &sec,
+  std::string getSrcMsg(const Symbol &sym, const InputSectionBase &sec,
                         uint64_t offset);
 
   // On PPC64 we need to keep track of which files contain small code model
@@ -255,7 +255,8 @@ public:
     return getSymbol(symIndex);
   }
 
-  std::optional<llvm::DILineInfo> getDILineInfo(InputSectionBase *, uint64_t);
+  std::optional<llvm::DILineInfo> getDILineInfo(const InputSectionBase *,
+                                                uint64_t);
   std::optional<std::pair<std::string, unsigned>>
   getVariableLoc(StringRef name);
 
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index e4ce050..e6942a9 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -242,7 +242,8 @@ InputSection *InputSectionBase::getLinkOrderDep() const {
 }
 
 // Find a symbol that encloses a given location.
-Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, uint8_t type) {
+Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset,
+                                              uint8_t type) const {
   for (Symbol *b : file->getSymbols())
     if (Defined *d = dyn_cast<Defined>(b))
       if (d->section == this && d->value <= offset &&
@@ -252,7 +253,7 @@ Defined *InputSectionBase::getEnclosingSymbol(uint64_t offset, uint8_t type) {
 }
 
 // Returns an object file location string. Used to construct an error message.
-std::string InputSectionBase::getLocation(uint64_t offset) {
+std::string InputSectionBase::getLocation(uint64_t offset) const {
   std::string secAndOffset =
       (name + "+0x" + Twine::utohexstr(offset) + ")").str();
 
@@ -273,7 +274,8 @@ std::string InputSectionBase::getLocation(uint64_t offset) {
 //   foo.c:42 (/home/alice/possibly/very/long/path/foo.c:42)
 //
 //  Returns an empty string if there's no way to get line info.
-std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) {
+std::string InputSectionBase::getSrcMsg(const Symbol &sym,
+                                        uint64_t offset) const {
   return file->getSrcMsg(sym, *this, offset);
 }
 
@@ -286,7 +288,7 @@ std::string InputSectionBase::getSrcMsg(const Symbol &sym, uint64_t offset) {
 // or
 //
 //   path/to/foo.o:(function bar) in archive path/to/bar.a
-std::string InputSectionBase::getObjMsg(uint64_t off) {
+std::string InputSectionBase::getObjMsg(uint64_t off) const {
   std::string filename = std::string(file->getName());
 
   std::string archive;
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
index 7570901..fbaea57 100644
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -191,15 +191,15 @@ public:
 
   // Get a symbol that encloses this offset from within the section. If type is
   // not zero, return a symbol with the specified type.
-  Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0);
-  Defined *getEnclosingFunction(uint64_t offset) {
+  Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const;
+  Defined *getEnclosingFunction(uint64_t offset) const {
     return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC);
   }
 
   // Returns a source location string. Used to construct an error message.
-  std::string getLocation(uint64_t offset);
-  std::string getSrcMsg(const Symbol &sym, uint64_t offset);
-  std::string getObjMsg(uint64_t offset);
+  std::string getLocation(uint64_t offset) const;
+  std::string getSrcMsg(const Symbol &sym, uint64_t offset) const;
+  std::string getObjMsg(uint64_t offset) const;
 
   // Each section knows how to relocate itself. These functions apply
   // relocations, assuming that Buf points to this section's copy in
-- 
cgit v1.1


From 5353d3f509814a44093a61c2725fdfe7273aa25a Mon Sep 17 00:00:00 2001
From: Matthias Braun <matze@braunis.de>
Date: Thu, 16 Nov 2023 21:20:30 -0800
Subject: Remove unused LoopInfo from InlineSpiller and SpillPlacement (NFC)
 (#71874)

---
 llvm/lib/CodeGen/CalcSpillWeights.cpp | 5 ++---
 llvm/lib/CodeGen/InlineSpiller.cpp    | 9 ++-------
 llvm/lib/CodeGen/SpillPlacement.cpp   | 4 ----
 llvm/lib/CodeGen/SpillPlacement.h     | 2 --
 4 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index bf11492..f3cb7fa 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -163,8 +163,6 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
   const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   MachineBasicBlock *MBB = nullptr;
-  MachineLoop *Loop = nullptr;
-  bool IsExiting = false;
   float TotalWeight = 0;
   unsigned NumInstr = 0; // Number of instructions using LI
   SmallPtrSet<MachineInstr *, 8> Visited;
@@ -221,6 +219,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
     }
   };
 
+  bool IsExiting = false;
   std::set<CopyHint> CopyHints;
   DenseMap<unsigned, float> Hint;
   for (MachineRegisterInfo::reg_instr_nodbg_iterator
@@ -262,7 +261,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
       // Get loop info for mi.
       if (MI->getParent() != MBB) {
         MBB = MI->getParent();
-        Loop = Loops.getLoopFor(MBB);
+        const MachineLoop *Loop = Loops.getLoopFor(MBB);
         IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
       }
 
diff --git a/llvm/lib/CodeGen/InlineSpiller.cpp b/llvm/lib/CodeGen/InlineSpiller.cpp
index 2740265..213dc1e 100644
--- a/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -33,7 +33,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
@@ -86,7 +85,6 @@ class HoistSpillHelper : private LiveRangeEdit::Delegate {
   LiveIntervals &LIS;
   LiveStacks &LSS;
   MachineDominatorTree &MDT;
-  MachineLoopInfo &Loops;
   VirtRegMap &VRM;
   MachineRegisterInfo &MRI;
   const TargetInstrInfo &TII;
@@ -138,8 +136,7 @@ public:
                    VirtRegMap &vrm)
       : MF(mf), LIS(pass.getAnalysis<LiveIntervals>()),
         LSS(pass.getAnalysis<LiveStacks>()),
-        MDT(pass.getAnalysis<MachineDominatorTree>()),
-        Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
+        MDT(pass.getAnalysis<MachineDominatorTree>()), VRM(vrm),
         MRI(mf.getRegInfo()), TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -157,7 +154,6 @@ class InlineSpiller : public Spiller {
   LiveIntervals &LIS;
   LiveStacks &LSS;
   MachineDominatorTree &MDT;
-  MachineLoopInfo &Loops;
   VirtRegMap &VRM;
   MachineRegisterInfo &MRI;
   const TargetInstrInfo &TII;
@@ -197,8 +193,7 @@ public:
                 VirtRegAuxInfo &VRAI)
       : MF(MF), LIS(Pass.getAnalysis<LiveIntervals>()),
         LSS(Pass.getAnalysis<LiveStacks>()),
-        MDT(Pass.getAnalysis<MachineDominatorTree>()),
-        Loops(Pass.getAnalysis<MachineLoopInfo>()), VRM(VRM),
+        MDT(Pass.getAnalysis<MachineDominatorTree>()), VRM(VRM),
         MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()),
         TRI(*MF.getSubtarget().getRegisterInfo()),
         MBFI(Pass.getAnalysis<MachineBlockFrequencyInfo>()),
diff --git a/llvm/lib/CodeGen/SpillPlacement.cpp b/llvm/lib/CodeGen/SpillPlacement.cpp
index 6e74e51..cdb8099 100644
--- a/llvm/lib/CodeGen/SpillPlacement.cpp
+++ b/llvm/lib/CodeGen/SpillPlacement.cpp
@@ -32,7 +32,6 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
@@ -52,7 +51,6 @@ char &llvm::SpillPlacementID = SpillPlacement::ID;
 INITIALIZE_PASS_BEGIN(SpillPlacement, DEBUG_TYPE,
                       "Spill Code Placement Analysis", true, true)
 INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(SpillPlacement, DEBUG_TYPE,
                     "Spill Code Placement Analysis", true, true)
 
@@ -60,7 +58,6 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<MachineBlockFrequencyInfo>();
   AU.addRequiredTransitive<EdgeBundles>();
-  AU.addRequiredTransitive<MachineLoopInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -195,7 +192,6 @@ struct SpillPlacement::Node {
 bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   bundles = &getAnalysis<EdgeBundles>();
-  loops = &getAnalysis<MachineLoopInfo>();
 
   assert(!nodes && "Leaking node array");
   nodes = new Node[bundles->getNumBundles()];
diff --git a/llvm/lib/CodeGen/SpillPlacement.h b/llvm/lib/CodeGen/SpillPlacement.h
index 2a298c7..5fd9b08 100644
--- a/llvm/lib/CodeGen/SpillPlacement.h
+++ b/llvm/lib/CodeGen/SpillPlacement.h
@@ -38,13 +38,11 @@ class BitVector;
 class EdgeBundles;
 class MachineBlockFrequencyInfo;
 class MachineFunction;
-class MachineLoopInfo;
 
 class SpillPlacement : public MachineFunctionPass {
   struct Node;
   const MachineFunction *MF = nullptr;
   const EdgeBundles *bundles = nullptr;
-  const MachineLoopInfo *loops = nullptr;
   const MachineBlockFrequencyInfo *MBFI = nullptr;
   Node *nodes = nullptr;
 
-- 
cgit v1.1


From 865c1fda6f3e258b47a4f7992bb19ca2e9edcfa1 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Fri, 17 Nov 2023 14:02:47 +0800
Subject: [InstCombine] Preserve NSW flags for neg instructions (#72548)

Alive2: https://alive2.llvm.org/ce/z/F9HG3M

This missed optimization is discovered with the help of
https://github.com/AliveToolkit/alive2/pull/962.
---
 llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp |  4 ++--
 llvm/test/Transforms/InstCombine/div.ll                  | 16 ++++++++--------
 .../InstCombine/sdiv-exact-by-negative-power-of-two.ll   |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index d6fac88..4015672 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1433,7 +1433,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
   // sdiv Op0, (sext i1 X) --> -Op0 (because if X is 0, the op is undefined)
   if (match(Op1, m_AllOnes()) ||
       (match(Op1, m_SExt(m_Value(X))) && X->getType()->isIntOrIntVectorTy(1)))
-    return BinaryOperator::CreateNeg(Op0);
+    return BinaryOperator::CreateNSWNeg(Op0);
 
   // X / INT_MIN --> X == INT_MIN
   if (match(Op1, m_SignMask()))
@@ -1456,7 +1456,7 @@ Instruction *InstCombinerImpl::visitSDiv(BinaryOperator &I) {
       Constant *NegPow2C = ConstantExpr::getNeg(cast<Constant>(Op1));
       Constant *C = ConstantExpr::getExactLogBase2(NegPow2C);
       Value *Ashr = Builder.CreateAShr(Op0, C, I.getName() + ".neg", true);
-      return BinaryOperator::CreateNeg(Ashr);
+      return BinaryOperator::CreateNSWNeg(Ashr);
     }
   }
 
diff --git a/llvm/test/Transforms/InstCombine/div.ll b/llvm/test/Transforms/InstCombine/div.ll
index 844b357..cd17d10 100644
--- a/llvm/test/Transforms/InstCombine/div.ll
+++ b/llvm/test/Transforms/InstCombine/div.ll
@@ -22,7 +22,7 @@ define i32 @test2(i32 %A) {
 
 define i32 @sdiv_by_minus1(i32 %A) {
 ; CHECK-LABEL: @sdiv_by_minus1(
-; CHECK-NEXT:    [[B:%.*]] = sub i32 0, [[A:%.*]]
+; CHECK-NEXT:    [[B:%.*]] = sub nsw i32 0, [[A:%.*]]
 ; CHECK-NEXT:    ret i32 [[B]]
 ;
   %B = sdiv i32 %A, -1
@@ -31,7 +31,7 @@ define i32 @sdiv_by_minus1(i32 %A) {
 
 define <2 x i64> @sdiv_by_minus1_vec(<2 x i64> %x) {
 ; CHECK-LABEL: @sdiv_by_minus1_vec(
-; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i64> zeroinitializer, [[X:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i64> zeroinitializer, [[X:%.*]]
 ; CHECK-NEXT:    ret <2 x i64> [[DIV]]
 ;
   %div = sdiv <2 x i64> %x, <i64 -1, i64 -1>
@@ -48,7 +48,7 @@ define <2 x i64> @sdiv_by_minus1_vec_poison_elt(<2 x i64> %x) {
 
 define i32 @sdiv_by_sext_minus1(i1 %x, i32 %y) {
 ; CHECK-LABEL: @sdiv_by_sext_minus1(
-; CHECK-NEXT:    [[DIV:%.*]] = sub i32 0, [[Y:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw i32 0, [[Y:%.*]]
 ; CHECK-NEXT:    ret i32 [[DIV]]
 ;
   %sext = sext i1 %x to i32
@@ -58,7 +58,7 @@ define i32 @sdiv_by_sext_minus1(i1 %x, i32 %y) {
 
 define <2 x i32> @sdiv_by_sext_minus1_vec(<2 x i1> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @sdiv_by_sext_minus1_vec(
-; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i32> zeroinitializer, [[Y:%.*]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i32> zeroinitializer, [[Y:%.*]]
 ; CHECK-NEXT:    ret <2 x i32> [[DIV]]
 ;
   %sext = sext <2 x i1> %x to <2 x i32>
@@ -1308,8 +1308,8 @@ define i32 @udiv_select_of_constants_divisor(i1 %b, i32 %x) {
 define i1 @sdiv_one_icmpeq_one(i32 %x) {
 ; CHECK-LABEL: @sdiv_one_icmpeq_one(
 ; CHECK-NEXT:    [[X_FR:%.*]] = freeze i32 [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[X_FR]], 1
-; CHECK-NEXT:    ret i1 [[B]]
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X_FR]], 1
+; CHECK-NEXT:    ret i1 [[B1]]
 ;
   %A = sdiv i32 1, %x
   %B = icmp eq i32 %A, 1
@@ -1319,8 +1319,8 @@ define i1 @sdiv_one_icmpeq_one(i32 %x) {
 define i1 @sdiv_one_icmpeq_negone(i32 %x) {
 ; CHECK-LABEL: @sdiv_one_icmpeq_negone(
 ; CHECK-NEXT:    [[X_FR:%.*]] = freeze i32 [[X:%.*]]
-; CHECK-NEXT:    [[B:%.*]] = icmp eq i32 [[X_FR]], -1
-; CHECK-NEXT:    ret i1 [[B]]
+; CHECK-NEXT:    [[B1:%.*]] = icmp eq i32 [[X_FR]], -1
+; CHECK-NEXT:    ret i1 [[B1]]
 ;
   %A = sdiv i32 1, %x
   %B = icmp eq i32 %A, -1
diff --git a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
index 81f22bb..3451c5e 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-exact-by-negative-power-of-two.ll
@@ -38,7 +38,7 @@ define <2 x i8> @t2_vec_splat(<2 x i8> %x) {
 define <2 x i8> @t3_vec(<2 x i8> %x) {
 ; CHECK-LABEL: @t3_vec(
 ; CHECK-NEXT:    [[DIV_NEG:%.*]] = ashr exact <2 x i8> [[X:%.*]], <i8 5, i8 4>
-; CHECK-NEXT:    [[DIV:%.*]] = sub <2 x i8> zeroinitializer, [[DIV_NEG]]
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw <2 x i8> zeroinitializer, [[DIV_NEG]]
 ; CHECK-NEXT:    ret <2 x i8> [[DIV]]
 ;
   %div = sdiv exact <2 x i8> %x, <i8 -32, i8 -16>
@@ -76,8 +76,8 @@ define i8 @prove_exact_with_high_mask(i8 %x, i8 %y) {
 
 define i8 @prove_exact_with_high_mask_limit(i8 %x, i8 %y) {
 ; CHECK-LABEL: @prove_exact_with_high_mask_limit(
-; CHECK-NEXT:    [[A:%.*]] = ashr i8 [[X:%.*]], 5
-; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[A]]
+; CHECK-NEXT:    [[D_NEG:%.*]] = ashr i8 [[X:%.*]], 5
+; CHECK-NEXT:    [[D:%.*]] = sub nsw i8 0, [[D_NEG]]
 ; CHECK-NEXT:    ret i8 [[D]]
 ;
   %a = and i8 %x, -32
-- 
cgit v1.1


From 894a38753e8c4cfef7a1dae17a76b405208b2708 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Thu, 16 Nov 2023 18:04:15 +0100
Subject: [clang][Interp][NFC] Properly implement IntegralAP::from(IntegralAP)

This used to just pass on the given parameter, but we need to respect
the given bit width.
---
 clang/lib/AST/Interp/IntegralAP.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/Interp/IntegralAP.h b/clang/lib/AST/Interp/IntegralAP.h
index bd665959c..9019f32 100644
--- a/clang/lib/AST/Interp/IntegralAP.h
+++ b/clang/lib/AST/Interp/IntegralAP.h
@@ -102,7 +102,12 @@ public:
 
   template <bool InputSigned>
   static IntegralAP from(IntegralAP<InputSigned> V, unsigned NumBits = 0) {
-    return IntegralAP<Signed>(V.V);
+    if (NumBits == 0)
+      NumBits = V.bitWidth();
+
+    if constexpr (InputSigned)
+      return IntegralAP<Signed>(V.V.sextOrTrunc(NumBits));
+    return IntegralAP<Signed>(V.V.zextOrTrunc(NumBits));
   }
 
   template <unsigned Bits, bool InputSigned>
-- 
cgit v1.1


From 3defe8facc55431c040f964802588473e2d4452b Mon Sep 17 00:00:00 2001
From: Timm Baeder <tbaeder@redhat.com>
Date: Fri, 17 Nov 2023 08:29:13 +0100
Subject: [clang][Interp] Implement __builtin_bitreverse (#71687)

Since the return value of this function is slightly more involved than
the void/bool/int/size_t return values we've seen so far, also refactor
this.
---
 clang/lib/AST/Interp/InterpBuiltin.cpp      | 207 +++++++++++++++++++---------
 clang/test/AST/Interp/builtin-functions.cpp |   7 +
 2 files changed, 147 insertions(+), 67 deletions(-)

diff --git a/clang/lib/AST/Interp/InterpBuiltin.cpp b/clang/lib/AST/Interp/InterpBuiltin.cpp
index 8c5efe2..bb3e135 100644
--- a/clang/lib/AST/Interp/InterpBuiltin.cpp
+++ b/clang/lib/AST/Interp/InterpBuiltin.cpp
@@ -59,13 +59,54 @@ static void pushInt(InterpState &S, int32_t Val) {
     llvm_unreachable("Int isn't 16 or 32 bit?");
 }
 
-static bool retInt(InterpState &S, CodePtr OpPC, APValue &Result) {
-  PrimType IntType = getIntPrimType(S);
-  if (IntType == PT_Sint32)
-    return Ret<PT_Sint32>(S, OpPC, Result);
-  else if (IntType == PT_Sint16)
-    return Ret<PT_Sint16>(S, OpPC, Result);
-  llvm_unreachable("Int isn't 16 or 32 bit?");
+static void pushAPSInt(InterpState &S, const APSInt &Val) {
+  bool Signed = Val.isSigned();
+
+  if (Signed) {
+    switch (Val.getBitWidth()) {
+    case 64:
+      S.Stk.push<Integral<64, true>>(
+          Integral<64, true>::from(Val.getSExtValue()));
+      break;
+    case 32:
+      S.Stk.push<Integral<32, true>>(
+          Integral<32, true>::from(Val.getSExtValue()));
+      break;
+    case 16:
+      S.Stk.push<Integral<16, true>>(
+          Integral<16, true>::from(Val.getSExtValue()));
+      break;
+    case 8:
+      S.Stk.push<Integral<8, true>>(
+          Integral<8, true>::from(Val.getSExtValue()));
+      break;
+    default:
+      llvm_unreachable("Invalid integer bitwidth");
+    }
+    return;
+  }
+
+  // Unsigned.
+  switch (Val.getBitWidth()) {
+  case 64:
+    S.Stk.push<Integral<64, false>>(
+        Integral<64, false>::from(Val.getZExtValue()));
+    break;
+  case 32:
+    S.Stk.push<Integral<32, false>>(
+        Integral<32, false>::from(Val.getZExtValue()));
+    break;
+  case 16:
+    S.Stk.push<Integral<16, false>>(
+        Integral<16, false>::from(Val.getZExtValue()));
+    break;
+  case 8:
+    S.Stk.push<Integral<8, false>>(
+        Integral<8, false>::from(Val.getZExtValue()));
+    break;
+  default:
+    llvm_unreachable("Invalid integer bitwidth");
+  }
 }
 
 static void pushSizeT(InterpState &S, uint64_t Val) {
@@ -87,20 +128,29 @@ static void pushSizeT(InterpState &S, uint64_t Val) {
   }
 }
 
-static bool retSizeT(InterpState &S, CodePtr OpPC, APValue &Result) {
-  const TargetInfo &TI = S.getCtx().getTargetInfo();
-  unsigned SizeTWidth = TI.getTypeWidth(TI.getSizeType());
-
-  switch (SizeTWidth) {
-  case 64:
-    return Ret<PT_Uint64>(S, OpPC, Result);
-  case 32:
-    return Ret<PT_Uint32>(S, OpPC, Result);
-  case 16:
-    return Ret<PT_Uint16>(S, OpPC, Result);
+static bool retPrimValue(InterpState &S, CodePtr OpPC, APValue &Result,
+                         std::optional<PrimType> &T) {
+  if (!T)
+    return RetVoid(S, OpPC, Result);
+
+#define RET_CASE(X)                                                            \
+  case X:                                                                      \
+    return Ret<X>(S, OpPC, Result);
+  switch (*T) {
+    RET_CASE(PT_Float);
+    RET_CASE(PT_Bool);
+    RET_CASE(PT_Sint8);
+    RET_CASE(PT_Uint8);
+    RET_CASE(PT_Sint16);
+    RET_CASE(PT_Uint16);
+    RET_CASE(PT_Sint32);
+    RET_CASE(PT_Uint32);
+    RET_CASE(PT_Sint64);
+    RET_CASE(PT_Uint64);
+  default:
+    llvm_unreachable("Unsupported return type for builtin function");
   }
-
-  llvm_unreachable("size_t isn't 64 or 32 bit?");
+#undef RET_CASE
 }
 
 static bool interp__builtin_strcmp(InterpState &S, CodePtr OpPC,
@@ -457,40 +507,55 @@ static bool interp__builtin_clrsb(InterpState &S, CodePtr OpPC,
   return true;
 }
 
+static bool interp__builtin_bitreverse(InterpState &S, CodePtr OpPC,
+                                       const InterpFrame *Frame,
+                                       const Function *Func,
+                                       const CallExpr *Call) {
+  PrimType ArgT = *S.getContext().classify(Call->getArg(0)->getType());
+  APSInt Val = peekToAPSInt(S.Stk, ArgT);
+  pushAPSInt(S, APSInt(Val.reverseBits(), /*IsUnsigned=*/true));
+  return true;
+}
+
 bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
                       const CallExpr *Call) {
   InterpFrame *Frame = S.Current;
   APValue Dummy;
 
+  QualType ReturnType = Call->getCallReturnType(S.getCtx());
+  std::optional<PrimType> ReturnT = S.getContext().classify(ReturnType);
+  // If classify failed, we assume void.
+  assert(ReturnT || ReturnType->isVoidType());
+
   switch (F->getBuiltinID()) {
   case Builtin::BI__builtin_is_constant_evaluated:
     S.Stk.push<Boolean>(Boolean::from(S.inConstantContext()));
-    return Ret<PT_Bool>(S, OpPC, Dummy);
+    break;
   case Builtin::BI__builtin_assume:
-    return RetVoid(S, OpPC, Dummy);
+    break;
   case Builtin::BI__builtin_strcmp:
-    if (interp__builtin_strcmp(S, OpPC, Frame))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_strcmp(S, OpPC, Frame))
+      return false;
     break;
   case Builtin::BI__builtin_strlen:
-    if (interp__builtin_strlen(S, OpPC, Frame))
-      return retSizeT(S, OpPC, Dummy);
+    if (!interp__builtin_strlen(S, OpPC, Frame))
+      return false;
     break;
   case Builtin::BI__builtin_nan:
   case Builtin::BI__builtin_nanf:
   case Builtin::BI__builtin_nanl:
   case Builtin::BI__builtin_nanf16:
   case Builtin::BI__builtin_nanf128:
-    if (interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/false))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/false))
+      return false;
     break;
   case Builtin::BI__builtin_nans:
   case Builtin::BI__builtin_nansf:
   case Builtin::BI__builtin_nansl:
   case Builtin::BI__builtin_nansf16:
   case Builtin::BI__builtin_nansf128:
-    if (interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/true))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_nan(S, OpPC, Frame, F, /*Signaling=*/true))
+      return false;
     break;
 
   case Builtin::BI__builtin_huge_val:
@@ -503,15 +568,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__builtin_infl:
   case Builtin::BI__builtin_inff16:
   case Builtin::BI__builtin_inff128:
-    if (interp__builtin_inf(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_inf(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_copysign:
   case Builtin::BI__builtin_copysignf:
   case Builtin::BI__builtin_copysignl:
   case Builtin::BI__builtin_copysignf128:
-    if (interp__builtin_copysign(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_copysign(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_fmin:
@@ -519,8 +584,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__builtin_fminl:
   case Builtin::BI__builtin_fminf16:
   case Builtin::BI__builtin_fminf128:
-    if (interp__builtin_fmin(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_fmin(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_fmax:
@@ -528,60 +593,60 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__builtin_fmaxl:
   case Builtin::BI__builtin_fmaxf16:
   case Builtin::BI__builtin_fmaxf128:
-    if (interp__builtin_fmax(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_fmax(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_isnan:
-    if (interp__builtin_isnan(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isnan(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_issignaling:
-    if (interp__builtin_issignaling(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_issignaling(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_isinf:
-    if (interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/false))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/false))
+      return false;
     break;
 
   case Builtin::BI__builtin_isinf_sign:
-    if (interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/true))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isinf(S, OpPC, Frame, F, /*Sign=*/true))
+      return false;
     break;
 
   case Builtin::BI__builtin_isfinite:
-    if (interp__builtin_isfinite(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isfinite(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_isnormal:
-    if (interp__builtin_isnormal(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isnormal(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_issubnormal:
-    if (interp__builtin_issubnormal(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_issubnormal(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_iszero:
-    if (interp__builtin_iszero(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_iszero(S, OpPC, Frame, F))
+      return false;
     break;
   case Builtin::BI__builtin_isfpclass:
-    if (interp__builtin_isfpclass(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_isfpclass(S, OpPC, Frame, F, Call))
+      return false;
     break;
   case Builtin::BI__builtin_fpclassify:
-    if (interp__builtin_fpclassify(S, OpPC, Frame, F))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_fpclassify(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_fabs:
   case Builtin::BI__builtin_fabsf:
   case Builtin::BI__builtin_fabsl:
   case Builtin::BI__builtin_fabsf128:
-    if (interp__builtin_fabs(S, OpPC, Frame, F))
-      return Ret<PT_Float>(S, OpPC, Dummy);
+    if (!interp__builtin_fabs(S, OpPC, Frame, F))
+      return false;
     break;
 
   case Builtin::BI__builtin_popcount:
@@ -590,29 +655,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const Function *F,
   case Builtin::BI__popcnt16: // Microsoft variants of popcount
   case Builtin::BI__popcnt:
   case Builtin::BI__popcnt64:
-    if (interp__builtin_popcount(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_popcount(S, OpPC, Frame, F, Call))
+      return false;
     break;
 
   case Builtin::BI__builtin_parity:
   case Builtin::BI__builtin_parityl:
   case Builtin::BI__builtin_parityll:
-    if (interp__builtin_parity(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_parity(S, OpPC, Frame, F, Call))
+      return false;
     break;
 
   case Builtin::BI__builtin_clrsb:
   case Builtin::BI__builtin_clrsbl:
   case Builtin::BI__builtin_clrsbll:
-    if (interp__builtin_clrsb(S, OpPC, Frame, F, Call))
-      return retInt(S, OpPC, Dummy);
+    if (!interp__builtin_clrsb(S, OpPC, Frame, F, Call))
+      return false;
+    break;
+
+  case Builtin::BI__builtin_bitreverse8:
+  case Builtin::BI__builtin_bitreverse16:
+  case Builtin::BI__builtin_bitreverse32:
+  case Builtin::BI__builtin_bitreverse64:
+    if (!interp__builtin_bitreverse(S, OpPC, Frame, F, Call))
+      return false;
     break;
 
   default:
     return false;
   }
 
-  return false;
+  return retPrimValue(S, OpPC, Dummy, ReturnT);
 }
 
 bool InterpretOffsetOf(InterpState &S, CodePtr OpPC, const OffsetOfExpr *E,
diff --git a/clang/test/AST/Interp/builtin-functions.cpp b/clang/test/AST/Interp/builtin-functions.cpp
index acc2dc3..0726dab 100644
--- a/clang/test/AST/Interp/builtin-functions.cpp
+++ b/clang/test/AST/Interp/builtin-functions.cpp
@@ -324,3 +324,10 @@ namespace clrsb {
   char clrsb11[__builtin_clrsb(0xf) == BITSIZE(int) - 5 ? 1 : -1];
   char clrsb12[__builtin_clrsb(~0x1f) == BITSIZE(int) - 6 ? 1 : -1];
 }
+
+namespace bitreverse {
+  char bitreverse1[__builtin_bitreverse8(0x01) == 0x80 ? 1 : -1];
+  char bitreverse2[__builtin_bitreverse16(0x3C48) == 0x123C ? 1 : -1];
+  char bitreverse3[__builtin_bitreverse32(0x12345678) == 0x1E6A2C48 ? 1 : -1];
+  char bitreverse4[__builtin_bitreverse64(0x0123456789ABCDEFULL) == 0xF7B3D591E6A2C480 ? 1 : -1];
+}
-- 
cgit v1.1


From ea316625d1c984d63610a580b138c800115bfd86 Mon Sep 17 00:00:00 2001
From: Timm Baeder <tbaeder@redhat.com>
Date: Fri, 17 Nov 2023 08:31:25 +0100
Subject: [clang] Add bitint classification for __builtin_classify_type
 (#72036)

See #71911
---
 clang/docs/ReleaseNotes.rst                  | 2 ++
 clang/lib/AST/ExprConstant.cpp               | 7 ++++++-
 clang/test/Sema/builtin-classify-type.c      | 5 ++++-
 clang/test/SemaCXX/builtin-classify-type.cpp | 5 ++++-
 4 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 31ebe89..739831d 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -219,6 +219,8 @@ Non-comprehensive list of changes in this release
   determined at runtime.
 * The ``__datasizeof`` keyword has been added. It is similar to ``sizeof``
   except that it returns the size of a type ignoring tail padding.
+* ``__builtin_classify_type()`` now classifies ``_BitInt`` values as the return value ``18``,
+  to match GCC 14's behavior.
 
 New Compiler Flags
 ------------------
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 373972e..4fb444e 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11520,6 +11520,9 @@ enum class GCCTypeClass {
   // decay to pointer. (Prior to version 6 it was only used in C++ mode).
   // GCC reserves 15 for strings, but actually uses 5 (pointer) for string
   // literals.
+  // Lang = 16,
+  // OpaqueType = 17,
+  BitInt = 18
 };
 
 /// EvaluateBuiltinClassifyType - Evaluate __builtin_classify_type the same way
@@ -11652,11 +11655,13 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) {
   case Type::ObjCInterface:
   case Type::ObjCObjectPointer:
   case Type::Pipe:
-  case Type::BitInt:
     // GCC classifies vectors as None. We follow its lead and classify all
     // other types that don't fit into the regular classification the same way.
     return GCCTypeClass::None;
 
+  case Type::BitInt:
+    return GCCTypeClass::BitInt;
+
   case Type::LValueReference:
   case Type::RValueReference:
     llvm_unreachable("invalid type for expression");
diff --git a/clang/test/Sema/builtin-classify-type.c b/clang/test/Sema/builtin-classify-type.c
index a222ac8..9a4de34 100644
--- a/clang/test/Sema/builtin-classify-type.c
+++ b/clang/test/Sema/builtin-classify-type.c
@@ -11,7 +11,8 @@ enum gcc_type_class {
   function_type_class, method_type_class,
   record_type_class, union_type_class,
   array_type_class, string_type_class,
-  lang_type_class
+  lang_type_class, opaque_type_class,
+  bitint_type_class
 };
 
 void foo(void) {
@@ -45,6 +46,7 @@ void foo(void) {
   vint32_t3 vt5;
   typedef _BitInt(64) vint64_t3 __attribute__((vector_size(16)));
   vint64_t3 vt6;
+  _BitInt(16) bitint;
 
   _Atomic int atomic_i;
   _Atomic double atomic_d;
@@ -70,6 +72,7 @@ void foo(void) {
   int a17[__builtin_classify_type(atomic_d) == real_type_class ? 1 : -1];
   int a18[__builtin_classify_type(complex_i) == complex_type_class ? 1 : -1];
   int a19[__builtin_classify_type(complex_d) == complex_type_class ? 1 : -1];
+  int a20[__builtin_classify_type(bitint) == bitint_type_class ? 1 : -1];
 }
 
 extern int (^p)(void);
diff --git a/clang/test/SemaCXX/builtin-classify-type.cpp b/clang/test/SemaCXX/builtin-classify-type.cpp
index ebc8142..ed54309 100644
--- a/clang/test/SemaCXX/builtin-classify-type.cpp
+++ b/clang/test/SemaCXX/builtin-classify-type.cpp
@@ -11,7 +11,8 @@ enum gcc_type_class {
   function_type_class, method_type_class,
   record_type_class, union_type_class,
   array_type_class, string_type_class,
-  lang_type_class
+  lang_type_class, opaque_type_class,
+  bitint_type_class
 };
 
 class cl {
@@ -42,6 +43,7 @@ void foo() {
   _Atomic double atomic_d;
   _Complex int complex_i;
   _Complex double complex_d;
+  _BitInt(32) bitint;
 
   int a1[__builtin_classify_type(f()) == void_type_class ? 1 : -1];
   int a2[__builtin_classify_type(i) == integer_type_class ? 1 : -1];
@@ -65,5 +67,6 @@ void foo() {
   int a20[__builtin_classify_type(atomic_d) == real_type_class ? 1 : -1];
   int a21[__builtin_classify_type(complex_i) == complex_type_class ? 1 : -1];
   int a22[__builtin_classify_type(complex_d) == complex_type_class ? 1 : -1];
+  int a23[__builtin_classify_type(bitint) == bitint_type_class ? 1 : -1];
 }
 
-- 
cgit v1.1


From f653f6d57a8703b5b098d5cdbae40715ef3677fa Mon Sep 17 00:00:00 2001
From: Maksim Panchenko <maks@fb.com>
Date: Thu, 16 Nov 2023 23:36:19 -0800
Subject: [BOLT][NFC] Delete unused declarations (#72596)

---
 bolt/lib/Core/BinaryFunction.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp
index 19c8a7b..e81d58e 100644
--- a/bolt/lib/Core/BinaryFunction.cpp
+++ b/bolt/lib/Core/BinaryFunction.cpp
@@ -54,11 +54,9 @@ namespace opts {
 
 extern cl::OptionCategory BoltCategory;
 extern cl::OptionCategory BoltOptCategory;
-extern cl::OptionCategory BoltRelocCategory;
 
 extern cl::opt<bool> EnableBAT;
 extern cl::opt<bool> Instrument;
-extern cl::opt<bool> KeepNops;
 extern cl::opt<bool> StrictMode;
 extern cl::opt<bool> UpdateDebugSections;
 extern cl::opt<unsigned> Verbosity;
-- 
cgit v1.1


From d8ab8b95baa43fbc1dfedf83ca2e5ef7cbed74ee Mon Sep 17 00:00:00 2001
From: Aiden Grossman <agrossman154@yahoo.com>
Date: Thu, 16 Nov 2023 23:41:11 -0800
Subject: [Docs][llvm-exegesis] Fix minor issues in llvm-exegesis docs

---
 llvm/docs/CommandGuide/llvm-exegesis.rst | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index f44f3a7..874bae8 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -37,14 +37,14 @@ SUPPORTED PLATFORMS
 only), MIPS, and PowerPC (PowerPC64LE only) on Linux for benchmarking. Not all
 benchmarking functionality is guaranteed to work on every platform.
 :program:`llvm-exegesis` also has a separate analysis mode that is supported
-on every platform on which LLVM is.
+on every platform that LLVM is.
 
 SNIPPET ANNOTATIONS
 -------------------
 
 :program:`llvm-exegesis` supports benchmarking arbitrary snippets of assembly.
 However, benchmarking these snippets often requires some setup so that they
-can execute properly. :program:`llvm-exegesis` has two annotations and some
+can execute properly. :program:`llvm-exegesis` has four annotations and some
 additional utilities to help with setup so that snippets can be benchmarked
 properly.
 
@@ -69,10 +69,11 @@ properly.
   specifying memory definitions that can later be mapped into the execution
   process of a snippet with the `LLVM-EXEGESIS-MEM-MAP` annotation. Each
   value is named using the `<value name>` argument so that it can be referenced
-  later within a map annotation. The size is specified in bytes the the value
-  is taken in hexadecimal. If the size of the value is less than the specified
-  size, the value will be repeated until it fills the entire section of memory.
-  Using this annotation requires using the subprocess execution mode.
+  later within a map annotation. The size is specified in a decimal number of
+  bytes and the value is given in hexadecimal. If the size of the value is less
+  than the specified size, the value will be repeated until it fills the entire
+  section of memory. Using this annotation requires using the subprocess execution
+  mode.
 * `LLVM-EXEGESIS-MEM-MAP <value name> <address>` - This annotation allows for
   mapping previously defined memory definitions into the execution context of a
   process. The value name refers to a previously defined memory definition and
-- 
cgit v1.1


From 47b8763f8a814c0e755e154516537d8deb57e4b0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 17 Nov 2023 00:26:26 -0800
Subject: [ELF][test] gitBitcodeMachineKind: test EM_ARM

---
 lld/test/ELF/lto/arm.ll | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 lld/test/ELF/lto/arm.ll

diff --git a/lld/test/ELF/lto/arm.ll b/lld/test/ELF/lto/arm.ll
new file mode 100644
index 0000000..00d0865
--- /dev/null
+++ b/lld/test/ELF/lto/arm.ll
@@ -0,0 +1,59 @@
+; REQUIRES: arm
+;; Test we can infer the e_machine value EM_ARM from a bitcode file.
+
+; RUN: split-file %s %t
+
+; RUN: llvm-as %t/arm.ll -o %t/arm.o
+; RUN: ld.lld %t/arm.o -o %t/arm
+; RUN: llvm-readobj -h %t/arm | FileCheck %s --check-prefix=ARM
+; RUN: llvm-as %t/armeb.ll -o %t/armeb.o
+; RUN: not ld.lld %t/armeb.o -o %t/armeb
+
+; RUN: llvm-as %t/thumb.ll -o %t/thumb.o
+; RUN: ld.lld %t/thumb.o -o %t/thumb
+; RUN: llvm-readobj -h %t/thumb | FileCheck %s --check-prefix=THUMB
+; RUN: llvm-as %t/thumbeb.ll -o %t/thumbeb.o
+; RUN: not ld.lld %t/thumbeb.o -o %t/thumbeb
+
+; ARM:       Class: 32-bit
+; ARM:       DataEncoding: LittleEndian
+; ARM:     Machine: EM_ARM (
+; ARMEB:     Class: 32-bit
+; ARMEB:     DataEncoding: BigEndian
+; ARMEB:   Machine: EM_ARM (
+
+; THUMB:     Class: 32-bit
+; THUMB:     DataEncoding: LittleEndian
+; THUMB:   Machine: EM_ARM (
+
+;--- arm.ll
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7-linux-musleabi"
+
+define void @_start() {
+  ret void
+}
+
+;--- thumb.ll
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8m.base-none-unknown-gnueabi"
+
+define void @_start() {
+  ret void
+}
+
+;--- armeb.ll
+target datalayout = "E-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armebv7-unknown-linux-musleabi"
+
+define void @_start() {
+  ret void
+}
+
+;--- thumbeb.ll
+target datalayout = "E-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbebv8m.base-none-unknown-gnueabi"
+
+define void @_start() {
+  ret void
+}
-- 
cgit v1.1


From de176d8c5496d6cf20e82aface98e102c593dbe2 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 17 Nov 2023 09:34:24 +0100
Subject: [SCEV][LV] Invalidate LCSSA exit phis more thoroughly (#69909)

This an alternative to #69886. The basic problem is that SCEV can look
through trivial LCSSA phis. When the phi node later becomes non-trivial,
we do invalidate it, but this doesn't catch uses that are not covered by
the IR use-def walk, such as those in BECounts.

Fix this by adding a special invalidation method for LCSSA phis, which
will also invalidate all the SCEVUnknowns/SCEVAddRecExprs used by the
LCSSA phi node and defined in the loop.

We should probably also use this invalidation method in other places
that add predecessors to exit blocks, such as loop unrolling and loop
peeling.

Fixes #69097.
Fixes #66616.
Fixes #63970.
---
 llvm/include/llvm/Analysis/ScalarEvolution.h    |  4 ++
 llvm/lib/Analysis/ScalarEvolution.cpp           | 38 ++++++++++
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp |  2 +-
 llvm/test/Transforms/LoopVectorize/pr66616.ll   | 95 +++++++++++++++++++++++++
 4 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/pr66616.ll

diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index 234c05f..4f1237c 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -943,6 +943,10 @@ public:
   /// def-use chain linking it to a loop.
   void forgetValue(Value *V);
 
+  /// Forget LCSSA phi node V of loop L to which a new predecessor was added,
+  /// such that it may no longer be trivial.
+  void forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V);
+
   /// Called when the client has changed the disposition of values in
   /// this loop.
   ///
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 8b3fc8d..9670e53 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -8410,6 +8410,44 @@ void ScalarEvolution::forgetValue(Value *V) {
   forgetMemoizedResults(ToForget);
 }
 
+void ScalarEvolution::forgetLcssaPhiWithNewPredecessor(Loop *L, PHINode *V) {
+  if (!isSCEVable(V->getType()))
+    return;
+
+  // If SCEV looked through a trivial LCSSA phi node, we might have SCEV's
+  // directly using a SCEVUnknown/SCEVAddRec defined in the loop. After an
+  // extra predecessor is added, this is no longer valid. Find all Unknowns and
+  // AddRecs defined in the loop and invalidate any SCEV's making use of them.
+  if (const SCEV *S = getExistingSCEV(V)) {
+    struct InvalidationRootCollector {
+      Loop *L;
+      SmallVector<const SCEV *, 8> Roots;
+
+      InvalidationRootCollector(Loop *L) : L(L) {}
+
+      bool follow(const SCEV *S) {
+        if (auto *SU = dyn_cast<SCEVUnknown>(S)) {
+          if (auto *I = dyn_cast<Instruction>(SU->getValue()))
+            if (L->contains(I))
+              Roots.push_back(S);
+        } else if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S)) {
+          if (L->contains(AddRec->getLoop()))
+            Roots.push_back(S);
+        }
+        return true;
+      }
+      bool isDone() const { return false; }
+    };
+
+    InvalidationRootCollector C(L);
+    visitAll(S, C);
+    forgetMemoizedResults(C.Roots);
+  }
+
+  // Also perform the normal invalidation.
+  forgetValue(V);
+}
+
 void ScalarEvolution::forgetLoopDispositions() { LoopDispositions.clear(); }
 
 void ScalarEvolution::forgetBlockAndLoopDispositions(Value *V) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 0a10f0d6..64093a0 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3599,7 +3599,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
   OrigLoop->getExitBlocks(ExitBlocks);
   for (BasicBlock *Exit : ExitBlocks)
     for (PHINode &PN : Exit->phis())
-      PSE.getSE()->forgetValue(&PN);
+      PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);
 
   VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion()->getExitingBasicBlock();
   Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll
new file mode 100644
index 0000000..2fb7f88
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt -passes="print<scalar-evolution>,loop-vectorize" --verify-scev -S < %s -force-vector-width=4 2>/dev/null | FileCheck %s
+
+; Make sure users of SCEVUnknowns from the scalar loop are invalidated.
+
+define void @pr66616(ptr %ptr) {
+; CHECK-LABEL: define void @pr66616(
+; CHECK-SAME: ptr [[PTR:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
+; CHECK-NEXT:    br i1 true, label [[PREHEADER:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br label [[LOOP_1:%.*]]
+; CHECK:       loop.1:
+; CHECK-NEXT:    [[IV_1:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[LOOP_1]] ]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[ADD3:%.*]] = add i32 [[LOAD]], 1
+; CHECK-NEXT:    [[INC]] = add i8 [[IV_1]], 1
+; CHECK-NEXT:    [[COND1:%.*]] = icmp eq i8 [[INC]], 0
+; CHECK-NEXT:    br i1 [[COND1]], label [[PREHEADER]], label [[LOOP_1]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       preheader:
+; CHECK-NEXT:    [[ADD3_LCSSA:%.*]] = phi i32 [ [[ADD3]], [[LOOP_1]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 0, [[ADD3_LCSSA]]
+; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP6]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH2:%.*]], label [[VECTOR_PH3:%.*]]
+; CHECK:       vector.ph3:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP6]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP6]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
+; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[ADD3_LCSSA]], [[DOTCAST]]
+; CHECK-NEXT:    [[IND_END5:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[N_VEC]]
+; CHECK-NEXT:    br label [[VECTOR_BODY7:%.*]]
+; CHECK:       vector.body7:
+; CHECK-NEXT:    [[INDEX8:%.*]] = phi i64 [ 0, [[VECTOR_PH3]] ], [ [[INDEX_NEXT9:%.*]], [[VECTOR_BODY7]] ]
+; CHECK-NEXT:    [[INDEX_NEXT9]] = add nuw i64 [[INDEX8]], 4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT9]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK1:%.*]], label [[VECTOR_BODY7]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block1:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP6]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH2]]
+; CHECK:       scalar.ph2:
+; CHECK-NEXT:    [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK1]] ], [ [[ADD3_LCSSA]], [[PREHEADER]] ]
+; CHECK-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[IND_END5]], [[MIDDLE_BLOCK1]] ], [ [[PTR]], [[PREHEADER]] ]
+; CHECK-NEXT:    br label [[LOOP_2:%.*]]
+; CHECK:       loop.2:
+; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[IV_2_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL4]], [[SCALAR_PH2]] ]
+; CHECK-NEXT:    [[IV_3:%.*]] = phi ptr [ [[IV_3_I:%.*]], [[LOOP_2]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH2]] ]
+; CHECK-NEXT:    [[IV_2_I]] = add i32 [[IV_2]], 1
+; CHECK-NEXT:    [[IV_3_I]] = getelementptr i8, ptr [[IV_3]], i64 1
+; CHECK-NEXT:    [[COND2:%.*]] = icmp eq i32 [[IV_2]], 0
+; CHECK-NEXT:    br i1 [[COND2]], label [[EXIT]], label [[LOOP_2]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop.1
+
+loop.1:
+  %iv.1 = phi i8 [ 0, %entry ], [ %inc, %loop.1 ]
+  %load = load i32, ptr %ptr, align 4
+  %add3 = add i32 %load, 1
+  %inc = add i8 %iv.1, 1
+  %cond1 = icmp eq i8 %inc, 0
+  br i1 %cond1, label %preheader, label %loop.1
+
+preheader:
+  br label %loop.2
+
+loop.2:
+  %iv.2 = phi i32 [ %iv.2.i, %loop.2 ], [ %add3, %preheader ]
+  %iv.3 = phi ptr [ %iv.3.i, %loop.2 ], [ %ptr, %preheader ]
+  %iv.2.i = add i32 %iv.2, 1
+  %iv.3.i = getelementptr i8, ptr %iv.3, i64 1
+  %cond2 = icmp eq i32 %iv.2, 0
+  br i1 %cond2, label %exit, label %loop.2
+
+exit:
+  ret void
+}
-- 
cgit v1.1


From fd2d5add437b2b324492b1bc29374699fe7b614b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?kadir=20=C3=A7etinkaya?= <kadircet@google.com>
Date: Fri, 17 Nov 2023 10:11:21 +0100
Subject: [include-cleaner] Make sure exports of stdlib also works for physical
 files (#72246)

This was creating discrepancy in cases where we might have a physical
file entry (e.g. because we followed a source location from a stdlib
file) and tried to find its exporters.
---
 clang-tools-extra/include-cleaner/lib/Record.cpp       | 18 ++++--------------
 .../include-cleaner/unittests/RecordTest.cpp           |  2 ++
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/clang-tools-extra/include-cleaner/lib/Record.cpp b/clang-tools-extra/include-cleaner/lib/Record.cpp
index 7a8e10a..6e00ff9 100644
--- a/clang-tools-extra/include-cleaner/lib/Record.cpp
+++ b/clang-tools-extra/include-cleaner/lib/Record.cpp
@@ -240,20 +240,10 @@ public:
     // Make sure current include is covered by the export pragma.
     if ((Top.Block && HashLine > Top.SeenAtLine) ||
         Top.SeenAtLine == HashLine) {
-      if (IncludedHeader) {
-        switch (IncludedHeader->kind()) {
-        case Header::Physical:
-          Out->IWYUExportBy[IncludedHeader->physical().getUniqueID()]
-              .push_back(Top.Path);
-          break;
-        case Header::Standard:
-          Out->StdIWYUExportBy[IncludedHeader->standard()].push_back(Top.Path);
-          break;
-        case Header::Verbatim:
-          assert(false && "unexpected Verbatim header");
-          break;
-        }
-      }
+      if (IncludedFile)
+        Out->IWYUExportBy[IncludedFile->getUniqueID()].push_back(Top.Path);
+      if (IncludedHeader && IncludedHeader->kind() == Header::Standard)
+        Out->StdIWYUExportBy[IncludedHeader->standard()].push_back(Top.Path);
       // main-file #include with export pragma should never be removed.
       if (Top.SeenAtFile == SM.getMainFileID() && IncludedFile)
         Out->ShouldKeep.insert(IncludedFile->getUniqueID());
diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
index 3685073..dfefa66 100644
--- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -452,6 +452,8 @@ TEST_F(PragmaIncludeTest, IWYUExportForStandardHeaders) {
   auto &FM = Processed.fileManager();
   EXPECT_THAT(PI.getExporters(*tooling::stdlib::Header::named("<string>"), FM),
               testing::UnorderedElementsAre(FileNamed("export.h")));
+  EXPECT_THAT(PI.getExporters(llvm::cantFail(FM.getFileRef("string")), FM),
+              testing::UnorderedElementsAre(FileNamed("export.h")));
 }
 
 TEST_F(PragmaIncludeTest, IWYUExportBlock) {
-- 
cgit v1.1


From 76a441a6efa5b7e73d96a3d67512493f3873b1cb Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 17 Nov 2023 01:13:38 -0800
Subject: [MC] Fix compression header size check in ELF writer

This is #66888 with a test. For MC we only use a zstd test, as zlib has
a lot of versions/forks with different speed/size tradeoff, which would
make the test more brittle. If clang/test/Misc/cc1as-compress.s turns
out to be brittle, we could make the string longer.
---
 clang/test/Misc/cc1as-compress.s                | 2 +-
 llvm/lib/MC/ELFObjectWriter.cpp                 | 2 +-
 llvm/test/MC/ELF/compress-debug-sections-zstd.s | 9 ++++++++-
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/clang/test/Misc/cc1as-compress.s b/clang/test/Misc/cc1as-compress.s
index 54563c3..4c81c5c 100644
--- a/clang/test/Misc/cc1as-compress.s
+++ b/clang/test/Misc/cc1as-compress.s
@@ -12,4 +12,4 @@
 // ZSTD: 0000 02000000 00000000
 
 .section        .debug_str,"MS",@progbits,1
-.asciz  "perfectly compressable data sample *****************************************"
+.asciz  "perfectly compressable data sample ******************************************"
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 8490fef..e4d18d8 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -843,7 +843,7 @@ bool ELFWriter::maybeWriteCompression(
     uint32_t ChType, uint64_t Size,
     SmallVectorImpl<uint8_t> &CompressedContents, Align Alignment) {
   uint64_t HdrSize =
-      is64Bit() ? sizeof(ELF::Elf32_Chdr) : sizeof(ELF::Elf64_Chdr);
+      is64Bit() ? sizeof(ELF::Elf64_Chdr) : sizeof(ELF::Elf32_Chdr);
   if (Size <= HdrSize + CompressedContents.size())
     return false;
   // Platform specific header is followed by compressed data.
diff --git a/llvm/test/MC/ELF/compress-debug-sections-zstd.s b/llvm/test/MC/ELF/compress-debug-sections-zstd.s
index 01ae21a..a05c501 100644
--- a/llvm/test/MC/ELF/compress-debug-sections-zstd.s
+++ b/llvm/test/MC/ELF/compress-debug-sections-zstd.s
@@ -10,7 +10,8 @@
 # SEC: .debug_line   PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00   C 0 0  8
 # SEC: .debug_abbrev PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00     0 0  1
 # SEC: .debug_info   PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00     0 0  1
-# SEC: .debug_str    PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 01 MSC 0 0  8
+## .debug_str is uncompressed becuase sizeof(Elf64_Chdr)+len(compressed) >= len(uncompressed).
+# SEC: .debug_str    PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 01 MS  0 0  1
 # SEC: .debug_frame  PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 00   C 0 0  8
 
 # CHECK:      Contents of section .debug_line
@@ -23,6 +24,12 @@
 # RUN: llvm-objcopy --decompress-debug-sections %t %t.decom
 # RUN: cmp %t.uncom %t.decom
 
+## .debug_str is compressed becuase sizeof(Elf32_Chdr)+len(compressed) < len(uncompressed).
+# RUN: llvm-mc -filetype=obj -triple=i386 --defsym I386=1 -compress-debug-sections=zstd --defsym LONG=1 %s -o %t1
+# RUN: llvm-readelf -S %t1 | FileCheck %s --check-prefix=SEC1
+
+# SEC1: .debug_str    PROGBITS [[#%x,]] [[#%x,]] [[#%x,]] 01 MSC 0 0  4
+
 ## Don't compress a section not named .debug_*.
 	.section .nonalloc,"",@progbits
 .rept 50
-- 
cgit v1.1


From c4fd1fd6d4e0d21b2315fadecbcdc0892f3c6925 Mon Sep 17 00:00:00 2001
From: Marius Brehler <marius.brehler@iml.fraunhofer.de>
Date: Fri, 17 Nov 2023 10:22:15 +0100
Subject: [mlir][emitc] Rename `call` op to `call_opaque` (#72494)

This renames the `emitc.call` op to `emitc.call_opaque` as the existing
call op does not refer to the callee by symbol. The rename allows to
introduce a new call op alongside with a future `emitc.func` op to model
and facilitate functions and function calls.
---
 mlir/docs/Dialects/emitc.md                    |  8 ++++----
 mlir/include/mlir/Dialect/EmitC/IR/EmitC.td    | 18 +++++++++--------
 mlir/lib/Dialect/EmitC/IR/EmitC.cpp            |  2 +-
 mlir/lib/Target/Cpp/TranslateToCpp.cpp         | 26 +++++++++++++-----------
 mlir/test/Conversion/SCFToEmitC/if.mlir        | 28 +++++++++++++-------------
 mlir/test/Dialect/EmitC/attrs.mlir             |  4 ++--
 mlir/test/Dialect/EmitC/invalid_ops.mlir       | 24 +++++++++++-----------
 mlir/test/Dialect/EmitC/ops.mlir               | 18 ++++++++---------
 mlir/test/Dialect/EmitC/types.mlir             | 26 ++++++++++++------------
 mlir/test/Target/Cpp/attrs.mlir                |  4 ++--
 mlir/test/Target/Cpp/call.mlir                 | 18 ++++++++---------
 mlir/test/Target/Cpp/common-cpp.mlir           | 28 +++++++++++++-------------
 mlir/test/Target/Cpp/control_flow.mlir         |  4 ++--
 mlir/test/Target/Cpp/for.mlir                  | 10 ++++-----
 mlir/test/Target/Cpp/if.mlir                   | 14 ++++++-------
 mlir/test/Target/Cpp/literal_call_operand.mlir |  4 ++--
 mlir/test/Target/Cpp/types.mlir                | 24 +++++++++++-----------
 17 files changed, 132 insertions(+), 128 deletions(-)

diff --git a/mlir/docs/Dialects/emitc.md b/mlir/docs/Dialects/emitc.md
index 03b8561..809a046 100644
--- a/mlir/docs/Dialects/emitc.md
+++ b/mlir/docs/Dialects/emitc.md
@@ -3,12 +3,12 @@ ops. Those can be translated to C/C++ via the Cpp emitter.
 
 The following convention is followed:
 
-*   If template arguments are passed to an `emitc.call` operation, C++ is
+*   If template arguments are passed to an `emitc.call_opaque` operation, C++ is
     generated.
 *   If tensors are used, C++ is generated.
-*   If multiple return values are used within in a functions or an `emitc.call`
-    operation, C++11 is required.
-*   If floating-point type template arguments are passed to an `emitc.call`
+*   If multiple return values are used within in a functions or an
+    `emitc.call_opaque` operation, C++11 is required.
+*   If floating-point type template arguments are passed to an `emitc.call_opaque`
     operation, C++20 is required.
 *   Else the generated code is compatible with C99.
 
diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
index 2edeb6f..e09c6329 100644
--- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
+++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td
@@ -92,11 +92,12 @@ def EmitC_ApplyOp : EmitC_Op<"apply", []> {
   let hasVerifier = 1;
 }
 
-def EmitC_CallOp : EmitC_Op<"call", []> {
-  let summary = "Call operation";
+def EmitC_CallOpaqueOp : EmitC_Op<"call_opaque", []> {
+  let summary = "Opaque call operation";
   let description = [{
-    The `call` operation represents a C++ function call. The call allows
-    specifying order of operands and attributes in the call as follows:
+    The `call_opaque` operation represents a C++ function call. The callee
+    can be an arbitrary non-empty string. The call allows specifying order
+    of operands and attributes in the call as follows:
 
     - integer value of index type refers to an operand;
     - attribute which will get lowered to constant value in call;
@@ -105,10 +106,10 @@ def EmitC_CallOp : EmitC_Op<"call", []> {
 
     ```mlir
     // Custom form defining a call to `foo()`.
-    %0 = emitc.call "foo" () : () -> i32
+    %0 = emitc.call_opaque "foo" () : () -> i32
 
     // Generic form of the same operation.
-    %0 = "emitc.call"() {callee = "foo"} : () -> i32
+    %0 = "emitc.call_opaque"() {callee = "foo"} : () -> i32
     ```
   }];
   let arguments = (ins
@@ -454,7 +455,8 @@ def EmitC_VariableOp : EmitC_Op<"variable", []> {
     %1 = "emitc.variable"() {value = 0 : i32} : () -> i32
     %2 = emitc.apply "&"(%0) : (i32) -> !emitc.ptr<i32>
     %3 = emitc.apply "&"(%1) : (i32) -> !emitc.ptr<i32>
-    emitc.call "write"(%2, %3) : (!emitc.ptr<i32>, !emitc.ptr<i32>) -> ()
+    emitc.call_opaque "write"(%2, %3)
+      : (!emitc.ptr<i32>, !emitc.ptr<i32>) -> ()
     ```
   }];
 
@@ -477,7 +479,7 @@ def EmitC_AssignOp : EmitC_Op<"assign", []> {
     ```mlir
     // Integer variable
     %0 = "emitc.variable"(){value = 42 : i32} : () -> i32
-    %1 = emitc.call "foo"() : () -> (i32)
+    %1 = emitc.call_opaque "foo"() : () -> (i32)
 
     // Assign emitted as `... = ...;`
     "emitc.assign"(%0, %1) : (i32, i32) -> ()
diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
index d06381b..e8ea4da 100644
--- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
+++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp
@@ -131,7 +131,7 @@ bool CastOp::areCastCompatible(TypeRange inputs, TypeRange outputs) {
 // CallOp
 //===----------------------------------------------------------------------===//
 
-LogicalResult emitc::CallOp::verify() {
+LogicalResult emitc::CallOpaqueOp::verify() {
   // Callee must not be empty.
   if (getCallee().empty())
     return emitOpError("callee must not be empty");
diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
index 291624c..1b4ec9e 100644
--- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp
+++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp
@@ -412,13 +412,14 @@ static LogicalResult printOperation(CppEmitter &emitter, func::CallOp callOp) {
   return success();
 }
 
-static LogicalResult printOperation(CppEmitter &emitter, emitc::CallOp callOp) {
+static LogicalResult printOperation(CppEmitter &emitter,
+                                    emitc::CallOpaqueOp callOpaqueOp) {
   raw_ostream &os = emitter.ostream();
-  Operation &op = *callOp.getOperation();
+  Operation &op = *callOpaqueOp.getOperation();
 
   if (failed(emitter.emitAssignPrefix(op)))
     return failure();
-  os << callOp.getCallee();
+  os << callOpaqueOp.getCallee();
 
   auto emitArgs = [&](Attribute attr) -> LogicalResult {
     if (auto t = dyn_cast<IntegerAttr>(attr)) {
@@ -441,10 +442,10 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::CallOp callOp) {
     return success();
   };
 
-  if (callOp.getTemplateArgs()) {
+  if (callOpaqueOp.getTemplateArgs()) {
     os << "<";
-    if (failed(
-            interleaveCommaWithError(*callOp.getTemplateArgs(), os, emitArgs)))
+    if (failed(interleaveCommaWithError(*callOpaqueOp.getTemplateArgs(), os,
+                                        emitArgs)))
       return failure();
     os << ">";
   }
@@ -452,8 +453,8 @@ static LogicalResult printOperation(CppEmitter &emitter, emitc::CallOp callOp) {
   os << "(";
 
   LogicalResult emittedArgs =
-      callOp.getArgs()
-          ? interleaveCommaWithError(*callOp.getArgs(), os, emitArgs)
+      callOpaqueOp.getArgs()
+          ? interleaveCommaWithError(*callOpaqueOp.getArgs(), os, emitArgs)
           : emitter.emitOperands(op);
   if (failed(emittedArgs))
     return failure();
@@ -949,10 +950,11 @@ LogicalResult CppEmitter::emitOperation(Operation &op, bool trailingSemicolon) {
           .Case<cf::BranchOp, cf::CondBranchOp>(
               [&](auto op) { return printOperation(*this, op); })
           // EmitC ops.
-          .Case<emitc::AddOp, emitc::ApplyOp, emitc::AssignOp, emitc::CallOp,
-                emitc::CastOp, emitc::CmpOp, emitc::ConstantOp, emitc::DivOp,
-                emitc::ForOp, emitc::IfOp, emitc::IncludeOp, emitc::MulOp,
-                emitc::RemOp, emitc::SubOp, emitc::VariableOp>(
+          .Case<emitc::AddOp, emitc::ApplyOp, emitc::AssignOp,
+                emitc::CallOpaqueOp, emitc::CastOp, emitc::CmpOp,
+                emitc::ConstantOp, emitc::DivOp, emitc::ForOp, emitc::IfOp,
+                emitc::IncludeOp, emitc::MulOp, emitc::RemOp, emitc::SubOp,
+                emitc::VariableOp>(
               [&](auto op) { return printOperation(*this, op); })
           // Func ops.
           .Case<func::CallOp, func::ConstantOp, func::FuncOp, func::ReturnOp>(
diff --git a/mlir/test/Conversion/SCFToEmitC/if.mlir b/mlir/test/Conversion/SCFToEmitC/if.mlir
index e34fd6a..afc9abc 100644
--- a/mlir/test/Conversion/SCFToEmitC/if.mlir
+++ b/mlir/test/Conversion/SCFToEmitC/if.mlir
@@ -2,7 +2,7 @@
 
 func.func @test_if(%arg0: i1, %arg1: f32) {
   scf.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -10,7 +10,7 @@ func.func @test_if(%arg0: i1, %arg1: f32) {
 // CHECK-SAME:                     %[[VAL_0:.*]]: i1,
 // CHECK-SAME:                     %[[VAL_1:.*]]: f32) {
 // CHECK-NEXT:    emitc.if %[[VAL_0]] {
-// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call "func_const"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call_opaque "func_const"(%[[VAL_1]]) : (f32) -> i32
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return
 // CHECK-NEXT:  }
@@ -18,9 +18,9 @@ func.func @test_if(%arg0: i1, %arg1: f32) {
 
 func.func @test_if_else(%arg0: i1, %arg1: f32) {
   scf.if %arg0 {
-    %0 = emitc.call "func_true"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_true"(%arg1) : (f32) -> i32
   } else {
-    %0 = emitc.call "func_false"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_false"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -28,9 +28,9 @@ func.func @test_if_else(%arg0: i1, %arg1: f32) {
 // CHECK-SAME:                          %[[VAL_0:.*]]: i1,
 // CHECK-SAME:                          %[[VAL_1:.*]]: f32) {
 // CHECK-NEXT:    emitc.if %[[VAL_0]] {
-// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call "func_true"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_2:.*]] = emitc.call_opaque "func_true"(%[[VAL_1]]) : (f32) -> i32
 // CHECK-NEXT:    } else {
-// CHECK-NEXT:      %[[VAL_3:.*]] = emitc.call "func_false"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_3:.*]] = emitc.call_opaque "func_false"(%[[VAL_1]]) : (f32) -> i32
 // CHECK-NEXT:    }
 // CHECK-NEXT:    return
 // CHECK-NEXT:  }
@@ -39,12 +39,12 @@ func.func @test_if_else(%arg0: i1, %arg1: f32) {
 func.func @test_if_yield(%arg0: i1, %arg1: f32) {
   %0 = arith.constant 0 : i8
   %x, %y = scf.if %arg0 -> (i32, f64) {
-    %1 = emitc.call "func_true_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_true_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_true_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_true_2"(%arg1) : (f32) -> f64
     scf.yield %1, %2 : i32, f64
   } else {
-    %1 = emitc.call "func_false_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_false_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_false_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_false_2"(%arg1) : (f32) -> f64
     scf.yield %1, %2 : i32, f64
   }
   return
@@ -56,13 +56,13 @@ func.func @test_if_yield(%arg0: i1, %arg1: f32) {
 // CHECK-NEXT:    %[[VAL_3:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32
 // CHECK-NEXT:    %[[VAL_4:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f64
 // CHECK-NEXT:    emitc.if %[[VAL_0]] {
-// CHECK-NEXT:      %[[VAL_5:.*]] = emitc.call "func_true_1"(%[[VAL_1]]) : (f32) -> i32
-// CHECK-NEXT:      %[[VAL_6:.*]] = emitc.call "func_true_2"(%[[VAL_1]]) : (f32) -> f64
+// CHECK-NEXT:      %[[VAL_5:.*]] = emitc.call_opaque "func_true_1"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_6:.*]] = emitc.call_opaque "func_true_2"(%[[VAL_1]]) : (f32) -> f64
 // CHECK-NEXT:      emitc.assign %[[VAL_5]] : i32 to %[[VAL_3]] : i32
 // CHECK-NEXT:      emitc.assign %[[VAL_6]] : f64 to %[[VAL_4]] : f64
 // CHECK-NEXT:    } else {
-// CHECK-NEXT:      %[[VAL_7:.*]] = emitc.call "func_false_1"(%[[VAL_1]]) : (f32) -> i32
-// CHECK-NEXT:      %[[VAL_8:.*]] = emitc.call "func_false_2"(%[[VAL_1]]) : (f32) -> f64
+// CHECK-NEXT:      %[[VAL_7:.*]] = emitc.call_opaque "func_false_1"(%[[VAL_1]]) : (f32) -> i32
+// CHECK-NEXT:      %[[VAL_8:.*]] = emitc.call_opaque "func_false_2"(%[[VAL_1]]) : (f32) -> f64
 // CHECK-NEXT:      emitc.assign %[[VAL_7]] : i32 to %[[VAL_3]] : i32
 // CHECK-NEXT:      emitc.assign %[[VAL_8]] : f64 to %[[VAL_4]] : f64
 // CHECK-NEXT:    }
diff --git a/mlir/test/Dialect/EmitC/attrs.mlir b/mlir/test/Dialect/EmitC/attrs.mlir
index 8bf1962..11251b8 100644
--- a/mlir/test/Dialect/EmitC/attrs.mlir
+++ b/mlir/test/Dialect/EmitC/attrs.mlir
@@ -5,8 +5,8 @@
 // CHECK-LABEL: func @opaque_attrs() {
 func.func @opaque_attrs() {
   // CHECK-NEXT: #emitc.opaque<"attr">
-  emitc.call "f"() {args = [#emitc.opaque<"attr">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"attr">]} : () -> ()
   // CHECK-NEXT: #emitc.opaque<"\22quoted_attr\22">
-  emitc.call "f"() {args = [#emitc.opaque<"\"quoted_attr\"">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"\"quoted_attr\"">]} : () -> ()
   return
 }
diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir
index 53d88ad..49efb96 100644
--- a/mlir/test/Dialect/EmitC/invalid_ops.mlir
+++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir
@@ -25,48 +25,48 @@ func.func @empty_constant() {
 // -----
 
 func.func @index_args_out_of_range_1() {
-    // expected-error @+1 {{'emitc.call' op index argument is out of range}}
-    emitc.call "test" () {args = [0 : index]} : () -> ()
+    // expected-error @+1 {{'emitc.call_opaque' op index argument is out of range}}
+    emitc.call_opaque "test" () {args = [0 : index]} : () -> ()
     return
 }
 
 // -----
 
 func.func @index_args_out_of_range_2(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op index argument is out of range}}
-    emitc.call "test" (%arg, %arg) {args = [2 : index]} : (i32, i32) -> ()
+    // expected-error @+1 {{'emitc.call_opaque' op index argument is out of range}}
+    emitc.call_opaque "test" (%arg, %arg) {args = [2 : index]} : (i32, i32) -> ()
     return
 }
 
 // -----
 
 func.func @empty_callee() {
-    // expected-error @+1 {{'emitc.call' op callee must not be empty}}
-    emitc.call "" () : () -> ()
+    // expected-error @+1 {{'emitc.call_opaque' op callee must not be empty}}
+    emitc.call_opaque "" () : () -> ()
     return
 }
 
 // -----
 
 func.func @nonetype_arg(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op array argument has no type}}
-    emitc.call "nonetype_arg"(%arg) {args = [0 : index, [0, 1, 2]]} : (i32) -> i32
+    // expected-error @+1 {{'emitc.call_opaque' op array argument has no type}}
+    emitc.call_opaque "nonetype_arg"(%arg) {args = [0 : index, [0, 1, 2]]} : (i32) -> i32
     return
 }
 
 // -----
 
 func.func @array_template_arg(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op template argument has invalid type}}
-    emitc.call "nonetype_template_arg"(%arg) {template_args = [[0, 1, 2]]} : (i32) -> i32
+    // expected-error @+1 {{'emitc.call_opaque' op template argument has invalid type}}
+    emitc.call_opaque "nonetype_template_arg"(%arg) {template_args = [[0, 1, 2]]} : (i32) -> i32
     return
 }
 
 // -----
 
 func.func @dense_template_argument(%arg : i32) {
-    // expected-error @+1 {{'emitc.call' op template argument has invalid type}}
-    emitc.call "dense_template_argument"(%arg) {template_args = [dense<[1.0, 1.0]> : tensor<2xf32>]} : (i32) -> i32
+    // expected-error @+1 {{'emitc.call_opaque' op template argument has invalid type}}
+    emitc.call_opaque "dense_template_argument"(%arg) {template_args = [dense<[1.0, 1.0]> : tensor<2xf32>]} : (i32) -> i32
     return
 }
 
diff --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir
index 6c83986..b3a24c2 100644
--- a/mlir/test/Dialect/EmitC/ops.mlir
+++ b/mlir/test/Dialect/EmitC/ops.mlir
@@ -8,8 +8,8 @@ emitc.include "test.h"
 
 // CHECK-LABEL: func @f(%{{.*}}: i32, %{{.*}}: !emitc.opaque<"int32_t">) {
 func.func @f(%arg0: i32, %f: !emitc.opaque<"int32_t">) {
-  %1 = "emitc.call"() {callee = "blah"} : () -> i64
-  emitc.call "foo" (%1) {args = [
+  %1 = "emitc.call_opaque"() {callee = "blah"} : () -> i64
+  emitc.call_opaque "foo" (%1) {args = [
     0 : index, dense<[0, 1]> : tensor<2xi32>, 0 : index
   ]} : (i64) -> ()
   return
@@ -100,14 +100,14 @@ func.func @cmp(%arg0 : i32, %arg1 : f32, %arg2 : i64, %arg3 : f64, %arg4 : !emit
 
 func.func @test_if(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
   }
   return
 }
 
 func.func @test_if_explicit_yield(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
      emitc.yield
   }
   return
@@ -115,9 +115,9 @@ func.func @test_if_explicit_yield(%arg0: i1, %arg1: f32) {
 
 func.func @test_if_else(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-    %0 = emitc.call "func_true"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_true"(%arg1) : (f32) -> i32
   } else {
-    %0 = emitc.call "func_false"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_false"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -130,14 +130,14 @@ func.func @test_assign(%arg1: f32) {
 
 func.func @test_for(%arg0 : index, %arg1 : index, %arg2 : index) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 {
-    %0 = emitc.call "func_const"(%i0) : (index) -> i32
+    %0 = emitc.call_opaque "func_const"(%i0) : (index) -> i32
   }
   return
 }
 
 func.func @test_for_explicit_yield(%arg0 : index, %arg1 : index, %arg2 : index) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 {
-    %0 = emitc.call "func_const"(%i0) : (index) -> i32
+    %0 = emitc.call_opaque "func_const"(%i0) : (index) -> i32
     emitc.yield
   }
   return
@@ -145,7 +145,7 @@ func.func @test_for_explicit_yield(%arg0 : index, %arg1 : index, %arg2 : index)
 
 func.func @test_for_not_index_induction(%arg0 : i16, %arg1 : i16, %arg2 : i16) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 : i16 {
-    %0 = emitc.call "func_const"(%i0) : (i16) -> i32
+    %0 = emitc.call_opaque "func_const"(%i0) : (i16) -> i32
   }
   return
 }
diff --git a/mlir/test/Dialect/EmitC/types.mlir b/mlir/test/Dialect/EmitC/types.mlir
index 3d68629..26d6f43 100644
--- a/mlir/test/Dialect/EmitC/types.mlir
+++ b/mlir/test/Dialect/EmitC/types.mlir
@@ -5,17 +5,17 @@
 // CHECK-LABEL: func @opaque_types() {
 func.func @opaque_types() {
   // CHECK-NEXT: !emitc.opaque<"int">
-  emitc.call "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"byte">
-  emitc.call "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"unsigned">
-  emitc.call "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"status_t">
-  emitc.call "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"std::vector<std::string>">
-  emitc.call "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
   // CHECK-NEXT: !emitc.opaque<"SmallVector<int*, 4>">
-  emitc.call "f"() {template_args = [!emitc.opaque<"SmallVector<int*, 4>">]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.opaque<"SmallVector<int*, 4>">]} : () -> ()
 
   return
 }
@@ -23,19 +23,19 @@ func.func @opaque_types() {
 // CHECK-LABEL: func @pointer_types() {
 func.func @pointer_types() {
   // CHECK-NEXT: !emitc.ptr<i32>
-  emitc.call "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<i64>
-  emitc.call "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<f32>
-  emitc.call "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<f64>
-  emitc.call "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
   // CHECK-NEXT: !emitc.ptr<i32>
-  %0 = emitc.call "f"() : () -> (!emitc.ptr<i32>)
+  %0 = emitc.call_opaque "f"() : () -> (!emitc.ptr<i32>)
   // CHECK-NEXT: (!emitc.ptr<i32>) -> !emitc.ptr<!emitc.ptr<i32>>
-  %1 = emitc.call "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
+  %1 = emitc.call_opaque "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
   // CHECK-NEXT: !emitc.ptr<!emitc.opaque<"int">>
-  emitc.call "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
 
   return
 }
diff --git a/mlir/test/Target/Cpp/attrs.mlir b/mlir/test/Target/Cpp/attrs.mlir
index 4e5bdfd..0a42570 100644
--- a/mlir/test/Target/Cpp/attrs.mlir
+++ b/mlir/test/Target/Cpp/attrs.mlir
@@ -3,8 +3,8 @@
 // CHECK-LABEL: void opaque_attrs() {
 func.func @opaque_attrs() {
   // CHECK-NEXT: f(OPAQUE_ENUM_VALUE);
-  emitc.call "f"() {args = [#emitc.opaque<"OPAQUE_ENUM_VALUE">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"OPAQUE_ENUM_VALUE">]} : () -> ()
   // CHECK-NEXT: f("some string");
-  emitc.call "f"() {args = [#emitc.opaque<"\"some string\"">]} : () -> ()
+  emitc.call_opaque "f"() {args = [#emitc.opaque<"\"some string\"">]} : () -> ()
   return
 }
diff --git a/mlir/test/Target/Cpp/call.mlir b/mlir/test/Target/Cpp/call.mlir
index 3bd4b4b..2bcdc87 100644
--- a/mlir/test/Target/Cpp/call.mlir
+++ b/mlir/test/Target/Cpp/call.mlir
@@ -1,34 +1,34 @@
 // RUN: mlir-translate -mlir-to-cpp %s | FileCheck %s -check-prefix=CPP-DEFAULT
 // RUN: mlir-translate -mlir-to-cpp -declare-variables-at-top %s | FileCheck %s -check-prefix=CPP-DECLTOP
 
-func.func @emitc_call() {
-  %0 = emitc.call "func_a" () : () -> i32
-  %1 = emitc.call "func_b" () : () -> i32
+func.func @emitc_call_opaque() {
+  %0 = emitc.call_opaque "func_a" () : () -> i32
+  %1 = emitc.call_opaque "func_b" () : () -> i32
   return
 }
-// CPP-DEFAULT: void emitc_call() {
+// CPP-DEFAULT: void emitc_call_opaque() {
 // CPP-DEFAULT-NEXT: int32_t [[V0:[^ ]*]] = func_a();
 // CPP-DEFAULT-NEXT: int32_t [[V1:[^ ]*]] = func_b();
 
-// CPP-DECLTOP: void emitc_call() {
+// CPP-DECLTOP: void emitc_call_opaque() {
 // CPP-DECLTOP-NEXT: int32_t [[V0:[^ ]*]];
 // CPP-DECLTOP-NEXT: int32_t [[V1:[^ ]*]];
 // CPP-DECLTOP-NEXT: [[V0:]] = func_a();
 // CPP-DECLTOP-NEXT: [[V1:]] = func_b();
 
 
-func.func @emitc_call_two_results() {
+func.func @emitc_call_opaque_two_results() {
   %0 = arith.constant 0 : index
-  %1:2 = emitc.call "two_results" () : () -> (i32, i32)
+  %1:2 = emitc.call_opaque "two_results" () : () -> (i32, i32)
   return
 }
-// CPP-DEFAULT: void emitc_call_two_results() {
+// CPP-DEFAULT: void emitc_call_opaque_two_results() {
 // CPP-DEFAULT-NEXT: size_t [[V1:[^ ]*]] = 0;
 // CPP-DEFAULT-NEXT: int32_t [[V2:[^ ]*]];
 // CPP-DEFAULT-NEXT: int32_t [[V3:[^ ]*]];
 // CPP-DEFAULT-NEXT: std::tie([[V2]], [[V3]]) = two_results();
 
-// CPP-DECLTOP: void emitc_call_two_results() {
+// CPP-DECLTOP: void emitc_call_opaque_two_results() {
 // CPP-DECLTOP-NEXT: size_t [[V1:[^ ]*]];
 // CPP-DECLTOP-NEXT: int32_t [[V2:[^ ]*]];
 // CPP-DECLTOP-NEXT: int32_t [[V3:[^ ]*]];
diff --git a/mlir/test/Target/Cpp/common-cpp.mlir b/mlir/test/Target/Cpp/common-cpp.mlir
index 252f5e2..b537e70 100644
--- a/mlir/test/Target/Cpp/common-cpp.mlir
+++ b/mlir/test/Target/Cpp/common-cpp.mlir
@@ -8,13 +8,13 @@ emitc.include <"myheader.h">
 // CHECK: void test_foo_print() {
 func.func @test_foo_print() {
   // CHECK: [[V1:[^ ]*]] = foo::constant({0, 1});
-  %0 = emitc.call "foo::constant"() {args = [dense<[0, 1]> : tensor<2xi32>]} : () -> (i32)
+  %0 = emitc.call_opaque "foo::constant"() {args = [dense<[0, 1]> : tensor<2xi32>]} : () -> (i32)
   // CHECK: [[V2:[^ ]*]] = foo::op_and_attr({0, 1}, [[V1]]);
-  %1 = emitc.call "foo::op_and_attr"(%0) {args = [dense<[0, 1]> : tensor<2xi32>, 0 : index]} : (i32) -> (i32)
+  %1 = emitc.call_opaque "foo::op_and_attr"(%0) {args = [dense<[0, 1]> : tensor<2xi32>, 0 : index]} : (i32) -> (i32)
   // CHECK: [[V3:[^ ]*]] = foo::op_and_attr([[V2]], {0, 1});
-  %2 = emitc.call "foo::op_and_attr"(%1) {args = [0 : index, dense<[0, 1]> : tensor<2xi32>]} : (i32) -> (i32)
+  %2 = emitc.call_opaque "foo::op_and_attr"(%1) {args = [0 : index, dense<[0, 1]> : tensor<2xi32>]} : (i32) -> (i32)
   // CHECK: foo::print([[V3]]);
-  emitc.call "foo::print"(%2): (i32) -> ()
+  emitc.call_opaque "foo::print"(%2): (i32) -> ()
   return
 }
 
@@ -27,7 +27,7 @@ func.func @test_single_return(%arg0 : i32) -> i32 {
 // CHECK: std::tuple<int32_t, int32_t> test_multiple_return()
 func.func @test_multiple_return() -> (i32, i32) {
   // CHECK: std::tie([[V3:.*]], [[V4:.*]]) = foo::blah();
-  %0:2 = emitc.call "foo::blah"() : () -> (i32, i32)
+  %0:2 = emitc.call_opaque "foo::blah"() : () -> (i32, i32)
   // CHECK: [[V5:[^ ]*]] = test_single_return([[V3]]);
   %1 = call @test_single_return(%0#0) : (i32) -> i32
   // CHECK: return std::make_tuple([[V5]], [[V4]]);
@@ -37,48 +37,48 @@ func.func @test_multiple_return() -> (i32, i32) {
 // CHECK: test_float
 func.func @test_float() {
   // CHECK: foo::constant({(float)0.0e+00, (float)1.000000000e+00})
-  %0 = emitc.call "foo::constant"() {args = [dense<[0.000000e+00, 1.000000e+00]> : tensor<2xf32>]} : () -> f32
+  %0 = emitc.call_opaque "foo::constant"() {args = [dense<[0.000000e+00, 1.000000e+00]> : tensor<2xf32>]} : () -> f32
   return
 }
 
 // CHECK: test_uint
 func.func @test_uint() {
   // CHECK: uint32_t
-  %0 = emitc.call "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui32>]} : () -> ui32
+  %0 = emitc.call_opaque "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui32>]} : () -> ui32
   // CHECK: uint64_t
-  %1 = emitc.call "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui64>]} : () -> ui64
+  %1 = emitc.call_opaque "foo::constant"() {args = [dense<[0, 1]> : tensor<2xui64>]} : () -> ui64
   return
 }
 
 // CHECK: int64_t test_plus_int(int64_t [[V1]])
 func.func @test_plus_int(%arg0 : i64) -> i64 {
   // CHECK: mhlo::add([[V1]], [[V1]])
-  %0 = emitc.call "mhlo::add"(%arg0, %arg0) {args = [0 : index, 1 : index]} : (i64, i64) -> i64
+  %0 = emitc.call_opaque "mhlo::add"(%arg0, %arg0) {args = [0 : index, 1 : index]} : (i64, i64) -> i64
   return %0 : i64
 }
 
 // CHECK: Tensor<float, 2> mixed_types(Tensor<double, 2> [[V1]])
 func.func @mixed_types(%arg0: tensor<2xf64>) -> tensor<2xf32> {
   // CHECK: foo::mixed_types([[V1]]);
-  %0 = emitc.call "foo::mixed_types"(%arg0) {args = [0 : index]} : (tensor<2xf64>) -> tensor<2xf32>
+  %0 = emitc.call_opaque "foo::mixed_types"(%arg0) {args = [0 : index]} : (tensor<2xf64>) -> tensor<2xf32>
   return %0 : tensor<2xf32>
 }
 
 // CHECK: Tensor<uint64_t> mhlo_convert(Tensor<uint32_t> [[V1]])
 func.func @mhlo_convert(%arg0: tensor<ui32>) -> tensor<ui64> {
   // CHECK: mhlo::convert([[V1]]);
-  %0 = emitc.call "mhlo::convert"(%arg0) {args = [0 : index]} : (tensor<ui32>) -> tensor<ui64>
+  %0 = emitc.call_opaque "mhlo::convert"(%arg0) {args = [0 : index]} : (tensor<ui32>) -> tensor<ui64>
   return %0 : tensor<ui64>
 }
 
 // CHECK: status_t opaque_types(bool [[V1:[^ ]*]], char [[V2:[^ ]*]]) {
 func.func @opaque_types(%arg0: !emitc.opaque<"bool">, %arg1: !emitc.opaque<"char">) -> !emitc.opaque<"status_t"> {
   // CHECK: int [[V3:[^ ]*]] = a([[V1]], [[V2]]);
-  %0 = emitc.call "a"(%arg0, %arg1) : (!emitc.opaque<"bool">, !emitc.opaque<"char">) -> (!emitc.opaque<"int">)
+  %0 = emitc.call_opaque "a"(%arg0, %arg1) : (!emitc.opaque<"bool">, !emitc.opaque<"char">) -> (!emitc.opaque<"int">)
   // CHECK: char [[V4:[^ ]*]] = b([[V3]]);
-  %1 = emitc.call "b"(%0): (!emitc.opaque<"int">) -> (!emitc.opaque<"char">)
+  %1 = emitc.call_opaque "b"(%0): (!emitc.opaque<"int">) -> (!emitc.opaque<"char">)
   // CHECK: status_t [[V5:[^ ]*]] = c([[V3]], [[V4]]);
-  %2 = emitc.call "c"(%0, %1): (!emitc.opaque<"int">, !emitc.opaque<"char">) -> (!emitc.opaque<"status_t">)
+  %2 = emitc.call_opaque "c"(%0, %1): (!emitc.opaque<"int">, !emitc.opaque<"char">) -> (!emitc.opaque<"status_t">)
   return %2 : !emitc.opaque<"status_t">
 }
 
diff --git a/mlir/test/Target/Cpp/control_flow.mlir b/mlir/test/Target/Cpp/control_flow.mlir
index 474fa95..436543f 100644
--- a/mlir/test/Target/Cpp/control_flow.mlir
+++ b/mlir/test/Target/Cpp/control_flow.mlir
@@ -8,12 +8,12 @@ func.func @simple(i64, i1) -> i64 {
 ^bb1:
   cf.br ^bb3(%a: i64)
 ^bb2:
-  %b = emitc.call "add"(%a, %a) : (i64, i64) -> i64
+  %b = emitc.call_opaque "add"(%a, %a) : (i64, i64) -> i64
   cf.br ^bb3(%b: i64)
 ^bb3(%c: i64):
   cf.br ^bb4(%c, %a : i64, i64)
 ^bb4(%d : i64, %e : i64):
-  %0 = emitc.call "add"(%d, %e) : (i64, i64) -> i64
+  %0 = emitc.call_opaque "add"(%d, %e) : (i64, i64) -> i64
   return %0 : i64
 }
   // CPP-DECLTOP: int64_t simple(int64_t [[A:[^ ]*]], bool [[COND:[^ ]*]]) {
diff --git a/mlir/test/Target/Cpp/for.mlir b/mlir/test/Target/Cpp/for.mlir
index c02c8b1..90504b1 100644
--- a/mlir/test/Target/Cpp/for.mlir
+++ b/mlir/test/Target/Cpp/for.mlir
@@ -3,7 +3,7 @@
 
 func.func @test_for(%arg0 : index, %arg1 : index, %arg2 : index) {
   emitc.for %i0 = %arg0 to %arg1 step %arg2 {
-    %0 = emitc.call "f"() : () -> i32
+    %0 = emitc.call_opaque "f"() : () -> i32
   }
   return
 }
@@ -35,8 +35,8 @@ func.func @test_for_yield() {
   emitc.assign %s0 : i32 to %2 : i32
   emitc.assign %p0 : f32 to %3 : f32
   emitc.for %iter = %start to %stop step %step {
-    %sn = emitc.call "add"(%2, %iter) : (i32, index) -> i32
-    %pn = emitc.call "mul"(%3, %iter) : (f32, index) -> f32
+    %sn = emitc.call_opaque "add"(%2, %iter) : (i32, index) -> i32
+    %pn = emitc.call_opaque "mul"(%3, %iter) : (f32, index) -> f32
     emitc.assign %sn : i32 to %2 : i32
     emitc.assign %pn : f32 to %3 : f32
     emitc.yield
@@ -116,8 +116,8 @@ func.func @test_for_yield_2() {
   emitc.assign %s0 : i32 to %2 : i32
   emitc.assign %p0 : f32 to %3 : f32
   emitc.for %iter = %start to %stop step %step {
-    %sn = emitc.call "add"(%2, %iter) : (i32, index) -> i32
-    %pn = emitc.call "mul"(%3, %iter) : (f32, index) -> f32
+    %sn = emitc.call_opaque "add"(%2, %iter) : (i32, index) -> i32
+    %pn = emitc.call_opaque "mul"(%3, %iter) : (f32, index) -> f32
     emitc.assign %sn : i32 to %2 : i32
     emitc.assign %pn : f32 to %3 : f32
     emitc.yield
diff --git a/mlir/test/Target/Cpp/if.mlir b/mlir/test/Target/Cpp/if.mlir
index beff218..743f8ad 100644
--- a/mlir/test/Target/Cpp/if.mlir
+++ b/mlir/test/Target/Cpp/if.mlir
@@ -3,7 +3,7 @@
 
 func.func @test_if(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-     %0 = emitc.call "func_const"(%arg1) : (f32) -> i32
+     %0 = emitc.call_opaque "func_const"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -23,9 +23,9 @@ func.func @test_if(%arg0: i1, %arg1: f32) {
 
 func.func @test_if_else(%arg0: i1, %arg1: f32) {
   emitc.if %arg0 {
-    %0 = emitc.call "func_true"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_true"(%arg1) : (f32) -> i32
   } else {
-    %0 = emitc.call "func_false"(%arg1) : (f32) -> i32
+    %0 = emitc.call_opaque "func_false"(%arg1) : (f32) -> i32
   }
   return
 }
@@ -53,13 +53,13 @@ func.func @test_if_yield(%arg0: i1, %arg1: f32) {
   %x = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> i32
   %y = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f64
   emitc.if %arg0 {
-    %1 = emitc.call "func_true_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_true_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_true_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_true_2"(%arg1) : (f32) -> f64
     emitc.assign %1 : i32 to %x : i32
     emitc.assign %2 : f64 to %y : f64
   } else {
-    %1 = emitc.call "func_false_1"(%arg1) : (f32) -> i32
-    %2 = emitc.call "func_false_2"(%arg1) : (f32) -> f64
+    %1 = emitc.call_opaque "func_false_1"(%arg1) : (f32) -> i32
+    %2 = emitc.call_opaque "func_false_2"(%arg1) : (f32) -> f64
     emitc.assign %1 : i32 to %x : i32
     emitc.assign %2 : f64 to %y : f64
   }
diff --git a/mlir/test/Target/Cpp/literal_call_operand.mlir b/mlir/test/Target/Cpp/literal_call_operand.mlir
index 428b66b..00adb441 100644
--- a/mlir/test/Target/Cpp/literal_call_operand.mlir
+++ b/mlir/test/Target/Cpp/literal_call_operand.mlir
@@ -3,7 +3,7 @@
 
 func.func @emitc_call_operand() {
   %p0 = emitc.literal "M_PI" : f32
-  %1 = emitc.call "foo"(%p0) : (f32) -> f32
+  %1 = emitc.call_opaque "foo"(%p0) : (f32) -> f32
   return
 }
 // CPP-DEFAULT: void emitc_call_operand() {
@@ -15,7 +15,7 @@ func.func @emitc_call_operand() {
 
 func.func @emitc_call_operand_arg() {
   %p0 = emitc.literal "M_PI" : f32
-  %1 = emitc.call "bar"(%p0) {args = [42 : i32, 0 : index]} : (f32) -> f32
+  %1 = emitc.call_opaque "bar"(%p0) {args = [42 : i32, 0 : index]} : (f32) -> f32
   return
 }
 // CPP-DEFAULT: void emitc_call_operand_arg() {
diff --git a/mlir/test/Target/Cpp/types.mlir b/mlir/test/Target/Cpp/types.mlir
index 8b4cecf..0585b27 100644
--- a/mlir/test/Target/Cpp/types.mlir
+++ b/mlir/test/Target/Cpp/types.mlir
@@ -3,15 +3,15 @@
 // CHECK-LABEL: void opaque_types() {
 func.func @opaque_types() {
   // CHECK-NEXT: f<int>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"int">>]} : () -> ()
   // CHECK-NEXT: f<byte>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"byte">>]} : () -> ()
   // CHECK-NEXT: f<unsigned>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"unsigned">>]} : () -> ()
   // CHECK-NEXT: f<status_t>();
-  emitc.call "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc<opaque<"status_t">>]} : () -> ()
   // CHECK-NEXT: f<std::vector<std::string>>();
-  emitc.call "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.opaque<"std::vector<std::string>">]} : () -> ()
 
   return
 }
@@ -19,19 +19,19 @@ func.func @opaque_types() {
 // CHECK-LABEL: void ptr_types() {
 func.func @ptr_types() {
   // CHECK-NEXT: f<int32_t*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i32>]} : () -> ()
   // CHECK-NEXT: f<int64_t*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<i64>]} : () -> ()
   // CHECK-NEXT: f<float*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f32>]} : () -> ()
   // CHECK-NEXT: f<double*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<f64>]} : () -> ()
   // CHECK-NEXT: int32_t* [[V0:[^ ]*]] = f();
-  %0 = emitc.call "f"() : () -> (!emitc.ptr<i32>)
+  %0 = emitc.call_opaque "f"() : () -> (!emitc.ptr<i32>)
   // CHECK-NEXT: int32_t** [[V1:[^ ]*]] = f([[V0:[^ ]*]]);
-  %1 = emitc.call "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
+  %1 = emitc.call_opaque "f"(%0) : (!emitc.ptr<i32>) -> (!emitc.ptr<!emitc.ptr<i32>>)
   // CHECK-NEXT: f<int*>();
-  emitc.call "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
+  emitc.call_opaque "f"() {template_args = [!emitc.ptr<!emitc.opaque<"int">>]} : () -> ()
 
   return
 }
-- 
cgit v1.1


From 8775232c49cde288c429e27dd0f4f492fbb0cefa Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Fri, 17 Nov 2023 10:35:53 +0100
Subject: [ValueTracking] Remove handling of KnownBits assumptions with invert

For all practical purposes, we only care about comparisons with
constant RHS in this code. In that case, an invert will be
canonicalized into the constant and it will be handled by other cases.

Given the complete lack of test coverage, I'm removing this code.
---
 llvm/lib/Analysis/AssumptionCache.cpp |  8 +-----
 llvm/lib/Analysis/ValueTracking.cpp   | 53 -----------------------------------
 2 files changed, 1 insertion(+), 60 deletions(-)

diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp
index bbcf25e..81b2667 100644
--- a/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/llvm/lib/Analysis/AssumptionCache.cpp
@@ -94,13 +94,7 @@ findAffectedValues(CallBase *CI, TargetTransformInfo *TTI,
     if (Pred == ICmpInst::ICMP_EQ) {
       // For equality comparisons, we handle the case of bit inversion.
       auto AddAffectedFromEq = [&AddAffected](Value *V) {
-        Value *A;
-        if (match(V, m_Not(m_Value(A)))) {
-          AddAffected(A);
-          V = A;
-        }
-
-        Value *B;
+        Value *A, *B;
         // (A & B) or (A | B) or (A ^ B).
         if (match(V, m_BitwiseLogic(m_Value(A), m_Value(B)))) {
           AddAffected(A);
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 5f5d7e07..e25aa9c 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -662,16 +662,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       // known bits from the RHS to V.
       Known.Zero |= RHSKnown.Zero & MaskKnown.One;
       Known.One |= RHSKnown.One & MaskKnown.One;
-      // assume(~(v & b) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_And(m_V, m_Value(B))),
-                                   m_Value(A)))) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      KnownBits MaskKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
-      // For those bits in the mask that are known to be one, we can propagate
-      // inverted known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One & MaskKnown.One;
-      Known.One |= RHSKnown.Zero & MaskKnown.One;
       // assume(v | b = a)
     } else if (match(Cmp,
                      m_c_ICmp(Pred, m_c_Or(m_V, m_Value(B)), m_Value(A)))) {
@@ -682,16 +672,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       // bits from the RHS to V.
       Known.Zero |= RHSKnown.Zero & BKnown.Zero;
       Known.One |= RHSKnown.One & BKnown.Zero;
-      // assume(~(v | b) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Or(m_V, m_Value(B))),
-                                   m_Value(A)))) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
-      // For those bits in B that are known to be zero, we can propagate
-      // inverted known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One & BKnown.Zero;
-      Known.One |= RHSKnown.Zero & BKnown.Zero;
       // assume(v ^ b = a)
     } else if (match(Cmp,
                      m_c_ICmp(Pred, m_c_Xor(m_V, m_Value(B)), m_Value(A)))) {
@@ -705,19 +685,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       Known.One |= RHSKnown.One & BKnown.Zero;
       Known.Zero |= RHSKnown.One & BKnown.One;
       Known.One |= RHSKnown.Zero & BKnown.One;
-      // assume(~(v ^ b) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_c_Xor(m_V, m_Value(B))),
-                                   m_Value(A)))) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      KnownBits BKnown = computeKnownBits(B, Depth + 1, QueryNoAC);
-
-      // For those bits in B that are known to be zero, we can propagate
-      // inverted known bits from the RHS to V. For those bits in B that are
-      // known to be one, we can propagate known bits from the RHS to V.
-      Known.Zero |= RHSKnown.One & BKnown.Zero;
-      Known.One |= RHSKnown.Zero & BKnown.Zero;
-      Known.Zero |= RHSKnown.Zero & BKnown.One;
-      Known.One |= RHSKnown.One & BKnown.One;
       // assume(v << c = a)
     } else if (match(Cmp, m_c_ICmp(Pred, m_Shl(m_V, m_ConstantInt(C)),
                                    m_Value(A))) &&
@@ -729,17 +696,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       RHSKnown.Zero.lshrInPlace(C);
       RHSKnown.One.lshrInPlace(C);
       Known = Known.unionWith(RHSKnown);
-      // assume(~(v << c) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shl(m_V, m_ConstantInt(C))),
-                                   m_Value(A))) &&
-               C < BitWidth) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      // For those bits in RHS that are known, we can propagate them inverted
-      // to known bits in V shifted to the right by C.
-      RHSKnown.One.lshrInPlace(C);
-      Known.Zero |= RHSKnown.One;
-      RHSKnown.Zero.lshrInPlace(C);
-      Known.One |= RHSKnown.Zero;
       // assume(v >> c = a)
     } else if (match(Cmp, m_c_ICmp(Pred, m_Shr(m_V, m_ConstantInt(C)),
                                    m_Value(A))) &&
@@ -749,15 +705,6 @@ static void computeKnownBitsFromCmp(const Value *V, const ICmpInst *Cmp,
       // bits in V shifted to the right by C.
       Known.Zero |= RHSKnown.Zero << C;
       Known.One |= RHSKnown.One << C;
-      // assume(~(v >> c) = a)
-    } else if (match(Cmp, m_c_ICmp(Pred, m_Not(m_Shr(m_V, m_ConstantInt(C))),
-                                   m_Value(A))) &&
-               C < BitWidth) {
-      KnownBits RHSKnown = computeKnownBits(A, Depth + 1, QueryNoAC);
-      // For those bits in RHS that are known, we can propagate them inverted
-      // to known bits in V shifted to the right by C.
-      Known.Zero |= RHSKnown.One << C;
-      Known.One |= RHSKnown.Zero << C;
     }
     break;
   case ICmpInst::ICMP_NE: {
-- 
cgit v1.1


From b4c1421466bd1a8b024d557b259eb4406073be8d Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Fri, 17 Nov 2023 09:56:07 +0000
Subject: [mlir] Apply ClangTidy fix

Remove redundant return.
---
 mlir/lib/Analysis/Presburger/IntegerRelation.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
index 6b26e09..3724df5 100644
--- a/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
+++ b/mlir/lib/Analysis/Presburger/IntegerRelation.cpp
@@ -1351,7 +1351,6 @@ void IntegerRelation::simplify() {
     changed |= removeDuplicateConstraints();
   }
   // Current set is not empty.
-  return;
 }
 
 /// Removes local variables using equalities. Each equality is checked if it
-- 
cgit v1.1


From 1c05fe350064aa3a1784bb09829a07d501842d97 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 17 Nov 2023 09:57:05 +0000
Subject: [InstCombine] Pass InstCombineOptions instead of separate flags
 (NFC). (#72566)

This makes it simpler to pass additional flags/options in the future.
---
 .../Transforms/InstCombine/InstructionCombining.cpp    | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 463a7b5..5859f58 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -4367,8 +4367,7 @@ static bool combineInstructionsOverFunction(
     Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
     AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
     DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
-    ProfileSummaryInfo *PSI, unsigned MaxIterations, bool VerifyFixpoint,
-    LoopInfo *LI) {
+    ProfileSummaryInfo *PSI, LoopInfo *LI, const InstCombineOptions &Opts) {
   auto &DL = F.getParent()->getDataLayout();
 
   /// Builder - This is an IRBuilder that automatically inserts new
@@ -4394,8 +4393,8 @@ static bool combineInstructionsOverFunction(
   while (true) {
     ++Iteration;
 
-    if (Iteration > MaxIterations && !VerifyFixpoint) {
-      LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << MaxIterations
+    if (Iteration > Opts.MaxIterations && !Opts.VerifyFixpoint) {
+      LLVM_DEBUG(dbgs() << "\n\n[IC] Iteration limit #" << Opts.MaxIterations
                         << " on " << F.getName()
                         << " reached; stopping without verifying fixpoint\n");
       break;
@@ -4414,10 +4413,10 @@ static bool combineInstructionsOverFunction(
       break;
 
     MadeIRChange = true;
-    if (Iteration > MaxIterations) {
+    if (Iteration > Opts.MaxIterations) {
       report_fatal_error(
           "Instruction Combining did not reach a fixpoint after " +
-          Twine(MaxIterations) + " iterations");
+          Twine(Opts.MaxIterations) + " iterations");
     }
   }
 
@@ -4468,8 +4467,7 @@ PreservedAnalyses InstCombinePass::run(Function &F,
       &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr;
 
   if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
-                                       BFI, PSI, Options.MaxIterations,
-                                       Options.VerifyFixpoint, LI))
+                                       BFI, PSI, LI, Options))
     // No changes, all analyses are preserved.
     return PreservedAnalyses::all();
 
@@ -4518,9 +4516,7 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {
       nullptr;
 
   return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, TTI, DT, ORE,
-                                         BFI, PSI,
-                                         InstCombineDefaultMaxIterations,
-                                         /*VerifyFixpoint */ false, LI);
+                                         BFI, PSI, LI, InstCombineOptions());
 }
 
 char InstructionCombiningPass::ID = 0;
-- 
cgit v1.1


From 58253dcbcdfafb1fb1fb5ffc43d6f11a31f35e2a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 17 Nov 2023 09:26:09 +0000
Subject: [X86] getTargetConstantBitsFromNode - bail if we're loading from a
 constant vector element type larger than the target value size

This can be improved upon by just truncating the constant value, but the crash needs to be addressed first.

Fixes #72539
---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  2 ++
 llvm/test/CodeGen/X86/pr72539.ll        | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr72539.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7f9d971..cf66707 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4664,6 +4664,8 @@ static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
 
     unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
     unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
+    if ((SizeInBits % SrcEltSizeInBits) != 0)
+      return false;
 
     APInt UndefSrcElts(NumSrcElts, 0);
     SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
diff --git a/llvm/test/CodeGen/X86/pr72539.ll b/llvm/test/CodeGen/X86/pr72539.ll
new file mode 100644
index 0000000..fb4c98dca
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr72539.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64    | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX
+
+define void @PR72539(<8 x i32> %insertelement){
+; SSE-LABEL: PR72539:
+; SSE:       # %bb.0:
+; SSE-NEXT:    xorb $7, 0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: PR72539:
+; AVX:       # %bb.0:
+; AVX-NEXT:    movzbl {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %eax
+; AVX-NEXT:    xorb %al, 0
+; AVX-NEXT:    retq
+  %load671 = load i8, ptr addrspace(1) null, align 1
+  %shufflevector = shufflevector <8 x i32> %insertelement, <8 x i32> zeroinitializer, <8 x i32> zeroinitializer
+  %xor68 = xor <8 x i32> %shufflevector, <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+  %call69 = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> %xor68)
+  %trunc70 = trunc i32 %call69 to i8
+  %xor71 = xor i8 %load671, %trunc70
+  store i8 %xor71, ptr addrspace(1) null, align 1
+  ret void
+}
+declare i32 @llvm.vector.reduce.xor.v8i32(<8 x i32>)
-- 
cgit v1.1


From 915f6c3d6a4377e2672a95c656374d71df62e95a Mon Sep 17 00:00:00 2001
From: jeanPerier <jperier@nvidia.com>
Date: Fri, 17 Nov 2023 11:10:15 +0100
Subject: [flang][RFC] Adding a design document for assumed-rank objects
 (#71959)

This patch adds a document describing assumed-rank objects and the
related features as well as how they will be implemented in Flang.
---
 flang/docs/AssumedRank.md | 688 ++++++++++++++++++++++++++++++++++++++++++++++
 flang/docs/index.md       |   1 +
 2 files changed, 689 insertions(+)
 create mode 100644 flang/docs/AssumedRank.md

diff --git a/flang/docs/AssumedRank.md b/flang/docs/AssumedRank.md
new file mode 100644
index 0000000..c5d2c3e
--- /dev/null
+++ b/flang/docs/AssumedRank.md
@@ -0,0 +1,688 @@
+<!--===- docs/AssumedRank.md
+
+   Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+   See https://llvm.org/LICENSE.txt for license information.
+   SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+-->
+# Assumed-Rank Objects
+
+An assumed-rank dummy data object is a dummy argument that takes its rank from
+its effective argument. It is a dummy argument, or the associated entity of a
+SELECT RANK in the `RANK DEFAULT` block. Its rank is not known at compile
+time. The rank can be anything from 0 (scalar) to the maximum allowed rank in
+Fortran (currently 15 according to Fortran 2018 standard section 5.4.6 point
+1).
+
+This document summarizes the contexts where assumed-rank objects can appear,
+and then describes how they are implemented and lowered to HLFIR and FIR. All
+section references are made to the Fortran 2018 standard.
+
+## Fortran Standard References
+
+Here is a list of sections and constraints from the Fortran standard involving
+assumed-ranks.
+
+- 7.3.2.2 TYPE
+	- C711
+- 7.5.6.1 FINAL statement
+	- C789
+- 8.5.7 CONTIGUOUS attribute
+	- C830
+- 8.5.8 DIMENSION attribute
+- 8.5.8.7 Assumed-rank entity
+	- C837
+	- C838
+	- C839
+- 11.1.10 SELECT RANK
+- 15.5.2.13 Restrictions on entities associated with dummy arguments
+	- 1 (3) (b) and (c)
+	- 1 (4) (b) and (c)
+- 15.5.2.4 Ordinary dummy variables - point 17
+- 18 Interoperability with C
+	- 18.3.6 point 2 (5)
+
+### Summary of the constraints:
+
+Assumed-rank can:
+- be pointers, allocatables (or have neither of those atttributes).
+- be monomorphic or polymorphic (both `TYPE(*)` and `CLASS(*)`)
+- have all the attributes, except VALUE and CODIMENSION (C837). Notably, they
+  can have the CONTIGUOUS or OPTIONAL attributes (C830).
+- appear as an actual argument of an assumed-rank dummy (C838)
+- appear as the selector of SELECT RANK (C838)
+- appear as the argument of C_LOC and C_SIZEOF from ISO_C_BINDING (C838)
+- appear as the first argument of inquiry intrinsic functions (C838). These
+  inquiry functions listed in table 16.1 are detailed in the "Assumed-rank
+  features" section below.
+- appear in BIND(C) and non BIND(C interface (18.1 point 3)
+- be finalized on entry as INTENT(OUT) under some conditions that prevents the
+  assumed-rank to be associated with an assumed-size.
+- be associated with any kind of scalars and arrays, including assumed-size.
+
+Assumed-rank cannot:
+- be coarrays (C837)
+- have the VALUE attribute (C837)
+- be something that is not a named variable (they cannot be the result of a
+  function or a component reference)
+- appear in a designator other than the case listed above (C838). Notably, they
+  cannot be directly addressed, they cannot be used in elemental operations or
+  transformational intrinsics, they cannot be used in IO, they cannot be
+  assigned to....
+- be finalized on entry as INTENT(OUT) if it could be associated with an
+  assumed-size (C839).
+- be used in a reference to a procedure without an explicit interface
+  (15.4.2.2. point 3 (c)).
+
+With regard to aliasing, assumed-rank dummy objects follow the same rules as
+for assumed shapes, with the addition of 15.5.2.13 (c) which adds a rule when
+the actual is a scalar (adding that TARGET assumed-rank may alias if the actual
+argument is a scalar even if they have the CONTIGUOUS attribute, while it is OK
+to assume that CONTIGUOUS TARGET assumed shape do not alias with other
+dummies).
+
+---
+
+## Assumed-Rank Representations in Flang
+
+### Representation in Semantics
+In semantics (there is no concept of assumed-rank expression needed in
+`evaluate::Expr`). Such symbols have either `semantics::ObjectEntityDetails` (
+dummy data objects) with a `semantics::ArraySpec` that encodes the
+"assumed-rank-shape" (can be tested with IsAssumedRank()), or they have
+`semantics::AssocEntityDetails` (associated entity in the RANK DEFAULT case).
+
+Inside a select rank, a `semantics::Symbol` is created for the associated
+entity with `semantics::AssocEntityDetails` that points to the the selector
+and holds the rank outside of the RANK DEFAULT case.
+
+Assumed-rank dummies are also represented in the
+`evaluate::characteristics::TypeAndShape` (with the AssumedRank attribute) to
+represent assumed-rank in procedure characteristics.
+
+### Runtime Representation of Assumed-Ranks
+Assumed-ranks are implemented as CFI_cdesc_t (18.5.3) with the addition of an
+f18 specific addendum when required for the type. This is the usual f18
+descriptor, and no changes is required to represent assumed-ranks in this data
+structure. In fact, there is no difference between the runtime descriptor
+created for an assumed shape and the runtime descriptor created when the
+corresponding entity is passed as an assumed-rank.
+
+This means that any descriptor can be passed to an assumed-rank dummy (with
+care to ensure that the POINTER/ALLOCATABLE attribute match the dummy argument
+attributes as usual). Notably, any runtime interface that takes descriptor
+arguments of any ranks already work with assumed-rank entities without any
+changes or special cases.
+
+This also implies that the runtime cannot tell that an entity is an
+assumed-rank based on its descriptor, but there seems to be not need for this
+so far ("rank based" dispatching for user defined assignments and IO is not
+possible with assumed-ranks, and finalization is possible, but there is no need
+for the runtime to distinguish between finalization of an assumed-rank and
+finalization of other entities: only the runtime rank matters).
+
+The only difference and difficulty is that descriptor storage size of
+assumed-rank cannot be precisely known at compile time, and this impacts the
+way descriptor copies are generated in inline code. The size can still be
+maximized using the maximum rank, which the runtime code already does when
+creating temporary descriptor in many cases. Inline code also needs care if it
+needs to access the descriptor addendum (like the type descriptor), since its
+offset will not be a compile time constant as usual.
+
+Note that an alternative to maximizing the allocation of assumed-rank temporary
+descriptor could be to use automatic allocation based on the rank of the input
+descriptor, but this would make stack allocation analysis more complex (tools
+will likely not have the Fortran knowledge that this allocation size is bounded
+for instance) while the stack "over" allocation is likely reasonable (24 bytes
+per dimension). Hence the selection of the simple approach using static size
+allocation to the maximum rank.
+
+### Representation in FIR and HLFIR
+SSA values for assumed-rank entities have an MLIR type containing a
+`!fir.array<*xT>` sequence type wrapped in a `!fir.box` or `!fir.class` type
+(additionally wrapped in a `!fir.ref` type for pointers and allocatables).
+
+Examples:
+`INTEGER :: x(..)`  -> `!fir.box<!fir.array<* x i32>>` 
+`CLASS(*) :: x(..)`  -> `!fir.class<!fir.array<* x none>>`
+`TYPE(*) :: x(..)`  -> `!fir.box<!fir.array<* x none>>`
+`REAL, ALLOCATABLE :: x(..)`  -> `!fir.ref<!fir.box<!fir.heap<!fir.array<* x f32>>>>`
+`TYPE(t), POINTER :: x(..)`  -> `!fir.ref<!fir.box<!fir.ptr<!fir.array<* x !fir.type<t>>>>>` 
+
+All these FIR types are implemented as the address of a CFI_cdesc_t in code
+generation.
+
+There is no need to allow assumed-rank "expression" in HLFIR (hlfir.expr) since
+assumed-rank cannot appear in expressions (except as the actual argument to an
+assumed-rank dummy). Assumed-rank are variables. Also, since they cannot have
+the VALUE attribute, there is no need to use the hlfir.as_expr +
+hlfir.associate idiom to make copies for them.
+
+FIR/HLFIR operation where assumed-rank may appear:
+- as `hlfir.declare` and `fir.declare` operand and result.
+- as `fir.convert` operand and/or result.
+- as `fir.load` operand and result (POINTER and ALLOCATABLE dereference).
+- as a block argument (dummy argument).
+- as `fir.rebox_assumed_rank` operand/result (new operation to change some
+  fields of assumed-rank descriptors).
+- as `fir.box_rank` operand (rank inquiry).
+- as `fir.box_dim` operand (brutal user inquiry about the bounds of an
+  assumed-rank in a compile time constant dimension).
+- as `fir.box_addr` operand (to get the base address in inlined code for
+  C_LOC).
+- as `fir.box_elesize` operand (to implement LEN and STORAGE_SIZE).
+- as `fir.absent` result (passing absent actual to OPTIONAL assumed-rank dummy)
+- as `fir.is_present` operand (PRESENT inquiry)
+- as `hlfir.copy_in` and `hlfir.copy_out` operand and result (copy in and
+  copy-out of assumed-rank)
+- as `fir.alloca` type and result (when creating an assumed-rank POINTER dummy
+  from a non POINTER dummy).
+- as `fir.store` operands (same case as `fir.alloca`).
+
+FIR/HLFIR Operations that should not need to accept assumed-ranks but where it
+could still be relevant:
+- `fir.box_tdesc` and `fir.box_typecode` (polymorphic assumed-rank cannot
+  appear in a SELECT TYPE directly without using a SELECT RANK). Given the
+  CFI_cdesc_t structure, no change would be needed for `fir.box_typecode` to
+  support assumed-ranks, but `fir.box_tdesc` would require change since the
+  position of the type descriptor pointer depends on the rank.
+- as `fir.allocmem` / `fir.global` result (assumed-ranks are never local/global
+  entities). 
+- as `fir.embox` result (When creating descriptor for an explicit shape, the
+  descriptor can be created with the entity rank, and then casted via
+`fir.convert`).
+
+It is not expected for any other FIR or HLFIR operations to handle assumed-rank
+SSA values.
+
+#### Summary of the impact in FIR
+One new operation is needed, `fir.rebox_assumed_rank`, the rational being that
+fir.rebox codegen is already quite complex and not all the aspects of fir.rebox
+matters for assumed-ranks (only simple field changes are required with
+assumed-ranks). Also, this operation will be allowed to take an operand in
+memory to avoid expensive fir.load of pointer/allocatable inputs. The operation
+will also allow creating rank-one assumed-size descriptor from an input
+assumed-rank descriptor to cover the SELECT RANK `RANK(*)` case.
+
+It is proposed that the FIR descriptor inquiry operation (fir.box_addr,
+fir.box_rank, fir.box_dim, fir.box_elesize at least) be allowed to take
+fir.ref<fir.box> arguments (allocatable and pointer descriptors) directly
+instead of generating a fir.load first. A conditional "read" effect will be
+added in such case. Again, the purpose is to avoid generating descriptor copies
+for the sole purpose of satisfying the SSA IR constraints. This change will
+likely benefit the non assumed-rank case too (even though LLVM is quite good at
+removing pointless descriptor copies in these cases).
+
+It will be ensured that all the operation listed above accept assumed-rank
+operands (both the verifiers and coedgen). The codegen of `fir.load`,
+`fir.alloca`, `fir.store`, `hlfir.copy_in` and `hlfir.copy_out` will need
+special handling for assumed-ranks.
+
+### Representation in LLVM IR
+
+Assumed-rank descriptor types are lowered to the LLVM type of a CFI_cdesc_t
+descriptor with no dimension array field and no addendum. That way, any inline
+code attempt to directly access dimensions and addendum with constant offset
+will be invalid for more safety, but it will still be easy to generate LLVM GEP
+to address the first descriptor fields in LLVM (to get the base address, rank,
+type code and attributes).
+
+`!fir.box<!fir.array<* x i32>>` -> `!llvm.struct<(ptr, i64, i32, i8, i8, i8, i8>`
+
+## Assumed-rank Features
+
+This section list the different Fortran features where assumed-rank objects are
+involved and describes the related implementation design.
+
+### Assumed-rank in procedure references
+Assumed-rank arguments are implemented as being the address of a CFI_cdesc_t.
+
+When passing an actual argument to an assumed-rank dummy, the following points
+need special attention and are further described below:
+- Copy-in/copy-out when required
+- Creation of forwarding of the assumed-rank dummy descriptor (including when
+  the actual is an assumed-size).
+- Finalization, deallocation, and initialization of INTENT(OUT) assumed-rank
+  dummy.
+
+OPTIONAL assumed-ranks are implemented like other non assumed-rank OPTIONAL
+objects passed by descriptor: an absent assumed-rank is represented by a null
+pointer to a CFI_cdesc_t.
+
+The passing interface for assumed-rank described above and below is compliant
+by default with the BIND(C) case, except for the assumed-rank dummy descriptor
+lower bounds, which are only set to zeros in BIND(C) interface because it
+implies in most of the cases to create a new descriptor.
+
+VALUE is forbidden for assumed-rank dummies, so there is nothing to be done for
+it (although since copy-in/copy-out is possible, the compiler must anyway deal
+with creating assumed-rank copies, so it would likely not be an issue to relax
+this constraint).
+
+#### Copy-in and Copy out
+Copy-in and copy-out is required when passing an actual that is not contiguous
+to a non POINTER CONTIGUOUS assumed-rank.
+
+When the actual argument is ranked, the copy-in/copy-out can be performed on
+the ranked actual argument where the dynamic type has been aligned with the
+dummy type if needed (passing CLASS(T) to TYPE(T)) as illustrated below.
+
+```Fortran
+module m
+type t
+ integer :: i
+end type
+contains
+subroutine foo(x)
+ class(t) :: x(:)
+ interface
+  subroutine bar(x)
+    import :: t
+    type(t), contiguous :: x(..)
+  end subroutine
+ end interface
+ ! copy-in and copy-out is required aroud bar
+ call bar(x)
+end
+end module
+```
+
+When the actual is also an assumed-rank special the same copy-in/copy-out need
+may arise, and the `hlfir.copy_in` and `hlfir.copy_out` are also used to cover
+this case. The `hlfir.copy_in`operation is implemented using the `IsContiguous`
+runtime (can be used as-is) and the `AssignTemporary` temporary runtime.
+
+The difference with the ranked case is that more care is needed to create the
+output descriptor passed to `AssignTemporary`: it must be allocated to the
+maximum rank with the same type as the input descriptor and only the descriptor
+fields prior to the array dimensions will be initialized to those of an
+unallocated descriptor prior to the runtime call (`AssignTemporary` copies the
+addendum if needed).
+
+```Fortran
+subroutine foo2(x)
+ class(t) :: x(..)
+ interface
+  subroutine bar(x)
+    import :: t
+    type(t), contiguous :: x(..)
+  end subroutine
+ end interface
+ ! copy-in and copy-out is required aroud bar
+ call bar(x)
+end
+```
+#### Creating the descriptor for assumed-rank dummies
+
+There are four cases to distinguish:
+1. Actual does not have a descriptor (and is therefore ranked)
+2. Actual has a descriptor that can be forwarded for the dummy
+3. Actual has a ranked descriptor that cannot be forwarded for the dummy
+4. Actual has an assumed-rank descriptor that cannot be forwarded for the dummy
+
+For the first case, a descriptor will be created for the dummy with `fir.embox`
+has if it has the rank of the actual argument. This is the same logic as when
+dealing with assumed shape or INTENT(IN) POINTER dummy arguments, except that
+an extra cast to the assumed-rank descriptor type is added (no-op at runtime).
+Care must be taken to set the final dimension extent to -1 in the descriptor
+created for an assumed-size actual argument. Note that the descriptor created
+for an assumed-size still has the rank of the assumed-size, a rank-one
+descriptor will be created for it if needed in a RANK(*) block (nothing says
+that an assumed-size should be passed as a rank-one array in 15.5.2.4 point 17).
+
+For the second case, a cast is added to assumed-rank descriptor type if it is
+not one already and the descriptor is forwarded.
+
+For the third case, a new ranked descriptor with the dummy attribute/lower
+bounds is created from the actual argument descriptor with `fir.rebox` as it is
+done when passing to an assume shape dummy, and a cast to the assumed-rank
+descriptor is added .
+
+The last case is the same as the third one, except the that the descriptor
+manipulation is more complex since the storage size of the descriptor is
+unknown. `fir.rebox` codegen is already quite complex since it deals with
+creating descriptor for descriptor based array sections and pointer remapping.
+Both of those are meaningless in this case where the output descriptor is the
+same as the input one, except for the lower bounds, attribute, and derived type
+pointer field that may need to be changed to match the values describing the
+dummy. A simpler `fir.rebox_assumed_rank` operation is added for this use case.
+Notably, this operation can take fir.ref<fir.box> inputs to avoid creating an
+expensive and useless fir.load of POINTER/ALLOCATABLE descriptors.
+
+Fortran requires the compiler to fall in the 3rd and 4th case and create
+descriptor temporary for the dummy a lot more than one would think and hope. An
+annex section below discusses cases that force the compiler to create a new
+descriptor for the dummy even if the actual already has a descriptor. These are
+the same situations than with non assumed-rank arguments, but when passing
+assumed-rank to assumed-ranks, the cost of this extra copy is higher.
+
+#### Intent(out) assumed-rank finalization, deallocation, initialization
+
+The standard prevents INTENT(OUT) assumed-rank requiring finalization to be
+associated with assumed-size arrays (C839) because there would be no way to
+finalize such entities. But INTENT(OUT) finalization is still possible if the
+actual is not an assumed-size and not a nonpointer nonallocatable assumed-rank.
+
+Flang therefore needs to implement finalization, deallocation and
+initialization of INTENT(OUT) as usual. Non pointer non allocatable INTENT(OUT)
+finalization is done via a call to `Destroy` runtime API that takes a
+descriptor and can be directly used with an assumed-rank descriptor with no
+change. The initialization is done via a call to the `Initialize` runtime API
+that takes a descriptor and can also directly be used with an assumed
+descriptor. Conditional deallocation of INTENT(OUT) allocatable is done via an
+inline allocation status check and either an inline deallocate for intrinsic
+types, or a runtime call to `Deallocate` for the other cases. For
+assumed-ranks, the runtime call is always used regardless of the type to avoid
+inline descriptor manipulations. `Deallocate` runtime API also works with
+assumed-rank descriptors with no changes (like any runtime API taking
+descriptors of any rank).
+
+```Fortran
+subroutine foo(x)
+ class(*), allocatable :: x(..)
+ interface
+  subroutine bar(x)
+    class(*), intent(out) :: x(..)
+  end subroutine
+ end interface
+ ! x may require finalization and initialization on bar entry.
+ call bar(x)
+end
+subroutine bar(x)
+  class(*), intent(out) :: x(..)
+end subroutine
+```
+### Select Rank
+
+Select rank is implemented with a rank inquiry (and last extent for `RANK(*)`),
+followed by a jump in the related block where the selector descriptor is cast
+to a descriptor with the associated entity rank for the current block for the
+`RANK(cst)` cases. In the `RANK DEFAULT`, the input descriptor is kept with no
+cast, and in the RANK(*), a rank-one descriptor is created with the same
+dynamic type as the input.
+These new descriptor values are mapped to the associated entity symbol and
+lowering precede as usual. This is very similar to how Select Type is
+implemented. The `RANK(*)` is a bit odd, it detects assumed-ranks associated
+with an assumed-size arrays regardless of the rank, and takes precedence over
+any rank based matching.
+
+Note that `-1` is a magic extent number that encodes that a descriptor describes
+an entity that is an assumed-size (user specified extents of explicit shape
+arrays are always normalized to zero when negative, so `-1` is a safe value to
+identify a descriptor created for an assumed-size). It is actually well
+specified for the BIND(C) (18.5.2 point 1.) and is always used as such in flang
+descriptors.
+
+The implementation of SELECT RANK is done as follow:
+- Read the rank `r` in the descriptor
+- If there is a `RANK(*)`, read the extent in dimension `r`. If it is `-1`,
+  jump to the `RANK(*)` block. Otherwise, continue to the steps below.
+- For each `RANK(constant)` case, compare `constant` to `r`. Stop at first
+  match and jump to related block. The order of the comparisons does not matter
+(there cannot be more than one match).
+- Jump to `RANK DEFAULT` block is any. Otherwise jump to the end of the
+  construct.
+
+The blocks for each cases jumps at the end of the construct at the end. As
+opposed to SELECT TYPE, no clean-up should be needed at the construct level
+since the select-rank selector is a named entity and cannot be a temporary with
+a lifetime of the construct.
+
+Except for the `RANK(*)` case, the branching logic is implemented in FIR with a
+`fir.select_case` operating on the rank.
+
+Example:
+
+```Fortran
+subroutine test(x)
+  interface
+    subroutine assumed_size(x)
+      real :: x(*)
+    end subroutine
+    subroutine scalar(x)
+      real :: x
+    end subroutine
+    subroutine rank_one(x)
+      real :: x(:)
+    end subroutine
+    subroutine many_dim_array(x)
+      real :: x(..)
+    end subroutine
+  end interface
+  
+  real :: x(..)
+  select rank (y => x)
+  rank(*)
+    call assumed_size(y)
+  rank(0)
+    call scalar(y)
+  rank(1)
+    call rank_one(y)
+  rank default
+    call many_dim_array(y)
+  end select
+end subroutine
+```
+
+Pseudo FIR for the example (some converts and SSA constants creation are not shown for more clarity):
+
+```
+func.func @_QPtest(%arg0: !fir.box<!fir.array<?xf32>>) {
+  %x:2 = hlfir.declare %arg0 {uniq_name = "_QFtestEx"} : (!fir.box<!fir.array<*xf32>>) -> (!fir.box<!fir.array<*xf32>>, !fir.box<!fir.array<*xf32>>)
+  %r = fir.box_rank %x#1 : (!fir.box<!fir.array<*xf32>>) -> i32
+  %last_extent = fir.call @_FortranASizeDim(%x#1, %r, %sourcename, %sourceline)
+  %is_assumed_size = arith.cmpi eq %last_extent, %c-1: (i64, i64) -> i1
+  cf.cond_br %is_assumed_size, ^bb_assumed_size, ^bb_not_assumed_size
+^bb_assumed_size:
+  %r1_box = fir.rebox_assumed_rank %x#0 : (!fir.box<!fir.array<*xf32>>) -> !fir.box<!fir.array<?xf32>>
+  %addr = fir.box_addr %addr, !fir.box<!fir.array<?xf32>> -> !fir.ref<!fir.array<?xf32>>
+  fir.call @_QPassumed_size(%addr) (!fir.ref<!fir.array<?xf32>>) -> ()
+  cf.br ^bb_end
+^bb_not_assumed_size:
+  fir.select_case %3 : i32 [#fir.point, %c0, ^bb_scalar, #fir.point, %c1, ^bb_rank1, unit, ^bb_default]
+^bb_scalar:
+  %scalar_cast = fir.convert %x#1 : (!fir.box<!fir.array<*xf32>>) -> !fir.box<f32>
+  %x_scalar = fir.box_addr %scalar_cast: (!fir.box<f32>) -> !fir.ref<f32>
+  fir.call @_QPscalar(%x_scalar) (!fir.ref<f32>) -> ()
+  cf.br ^bb_end
+^bb_rank1:
+  %rank1_cast = fir.convert %x#1 : (!fir.box<!fir.array<*xf32>>) -> !fir.box<!fir.array<?xf32>>
+  fir.call @_QPrank_one(%rank1_cast) (!fir.box<!fir.array<?xf32>>) -> ()
+  cf.br ^bb_end
+^bb_default:
+  fir.call @_QPmany_dim_array(%x#1) (!fir.box<!fir.array<*xf32>>) -> ()
+  cf.br ^bb_end
+^bb_end
+  return
+}
+```
+
+### Inquiry intrinsic functions
+#### ALLOCATED and ASSOCIATED
+Implemented inline with `fir.box_addr` (reading the descriptor first address
+inline). Currently, FIR descriptor inquiry happens at the "descriptor value"
+level (require a fir.load of the POINTER or ALLOCATABLE !fir.ref<!fir.box<>>),
+to satisfy the SSA value semantics, the fir.load creates a copy of the
+underlying descriptor storage. With assume ranks, this copy will be "expensive"
+and harder to optimize out given the descriptor storage size is not a compile
+time constant. To avoid this extra cost, ALLOCATABLE and POINTER assumed-ranks
+will be cast to scalar descriptors before the `fir.load`.
+
+```Fortran
+real, allocatable :: x(..)
+print *, allocated(x)
+```
+
+```
+%1 = fir.convert %x : (!fir.ref<!fir.box<!fir.heap<!fir.array<* x f32>>>>) -> !fir.ref<!fir.box<!fir.heap<f32>>>
+%2 = fir.load %x : !fir.ref<!fir.box<!fir.heap<f32>>>
+%addr = fir.box_addr %2 : (!fir.box<!fir.heap<f32>>) -> fir.ref<f32>
+# .... "addr != null" as usual
+```
+#### LEN and STORAGE_SIZE
+Implemented inline with `fir.box_elesize` with the same approach as
+ALLOCATED/ASSOCIATED when dealing with fir.box load for POINTERS and
+ALLOCATABLES.
+
+```Fortran
+character(*) :: x(..)
+print *, len(x)
+```
+
+```
+%ele_size = fir.box_elesize %x : (!fir.box<!fir.array<*x!fir.char<?>>>) -> i64
+# .... divide by character KIND byte size if needed as usual 
+```
+#### PRESENT
+Implemented inline with `fir.is_present` which ends-up implemented as a check
+that the descriptor address is not null just like with OPTIONAL assumed shapes
+and OPTIONAL pointers and allocatables.
+
+```Fortran
+real, optional :: x(..)
+print *, present(x)
+```
+
+```
+%is_present = fir.is_prent %x : (!fir.box<!fir.array<*xf32>>) -> i1
+```
+#### RANK
+Implemented inline with `fir.box_rank` which simply reads the descriptor rank
+field.
+
+```Fortran
+real :: x(..)
+print *, len(x)
+```
+
+```
+%rank = fir.box_rank %x : (!fir.box<!fir.array<*xf32>>) -> i32
+```
+#### SIZE
+Using the runtime can be queried as it is done for assumed shapes. When DIM is
+present and is constant, `fir.box_dim` can also be used with the option to add
+a runtime check that RANK <= DIM. Pointers and allocatables are dereferenced,
+which in FIR currently creates a descriptor copy that cannot be simplified
+like for the previous inquiries by inserting a cast before the fir.load (the
+dimension info must be correctly copied). 
+
+#### LBOUND, SHAPE, and UBOUND
+When DIM is present an is present, the runtime can be used as it is currently
+with assumed shapes. When DIM is absent, the result is a rank-one array whose
+extent is the rank. The runtime has an entry for UBOUND that takes a descriptor
+and allocate the result as needed, so the same logic as for assumed shape can
+be used.
+
+There is no such entry for LBOUND/SHAPE currently, it would likely be best to
+add one rather than to jungle with inline code. Pointers and allocatables
+dereference is similar as with SIZE.
+
+#### EXTENDS_TYPE_OF, SAME_TYPE_AS, and IS_CONTIGUOUS
+Using the runtime as it is done currently with assumed shapes. Pointers and
+allocatables dereference is similar as with SIZE.
+
+#### C_LOC from ISO_C_BINDING
+Implemented with `fir.box_addr` as with other C_LOC cases for entities that
+have descriptors.
+
+#### C_SIZE_OF from ISO_C_BINDING
+Implemented as STORAGE_SIZE * SIZE.
+
+#### Floating point inquiries and NEW_LINE
+BIT_SIZE, DIGITS, EPSILON, HUGE, KIND, MAXEXPONENT, MINEXPONENT, NEW_LINE,
+PRECISION, RADIX, RANGE, TINY all accept assumed-rank, but are always constant
+folded by semantics based on the type and lowering does not need to deal with
+them.
+
+#### Coarray inquiries
+Assumed-rank cannot be coarrays (C837), but they can still technically appear
+in COSHAPE (which should obviously return zero). They cannot appear in LBOUND,
+LCOBOUND, UBOUND, UCOBOUND that require the argument to be a coarray.
+
+## Annex 1 - Descriptor temporary for the dummy arguments
+
+When passing an actual argument that is descriptor to a dummy that must be
+passed by descriptor, one could expect that the descriptor of the actual can
+just be forwarded to the dummy, but this is unfortunately not possible in quite
+some cases. This is not specific to assumed-ranks, but since the cost of
+descriptor temporaries is higher for assumed-ranked, it is discussed here.
+
+Below are the reasons for which a new descriptor may be required:
+1. passing a POINTER to a non POINTER
+2. setting the descriptor CFI_cdesc_t `attribute` according to the dummy
+   POINTER/ALLOCATABLE attributes (18.3.6 point 4 for the BIND(C) case).
+3. setting the CFI_cdesc_t lower bounds to zero for a BIND(C) assumed
+   shape/rank dummy (18.5.3 point 3).
+4. setting the derived type pointer to the dummy dynamic type when passing a
+   CLASS() actual to a TYPE() dummy.
+
+Justification of 1.:
+When passing a POINTER to a non POINTER, the target of the pointer is passed,
+and nothing prevents the association status of the actual argument to change
+during the call (e.g. if the POINTER is another argument of the call, or is a
+module variable, it may be re-associated in the call). These association status
+change of the actual should not impact the dummy, so they must not share the
+same descriptor.
+
+Justification of 2.:
+In the BIND(C) case, this is required by 18.3.6 point 4. Outside of the BIND(C)
+case, this should still be done because any runtime call where the dummy
+descriptor is forwarded may misbehave if the ALLOCATABLE/POINTER attribute is
+not the one of the dummy (e.g. reallocation could be triggered instead of
+padding/trimming characters).
+
+Justification of 3:
+18.5.3 point 3.
+
+Justification of 4:
+If the descriptor derived type info pointer is not the one of the dummy dynamic
+type, many runtime call like IO and assignment will misbehave when being
+provided the dummy descriptor.
+
+For point 2., 3., and 4., one could be tempted to change the descriptor fields
+before and after the call, but this is risky since this would assume nothing
+will access the actual argument descriptor during the call. And even without
+bringing any potential asynchronous behavior of OpenMP/OpenACC/Cuda Fortran
+extensions, the actual argument descriptor may be passed inside a call in
+another arguments with "different" lower bounds POINTER or ALLOCATABLE (but
+could also be accessed via host of use association in general).
+
+
+## Annex 2 - Assumed-Rank Objects and IGNORE_TKR
+
+It is possible to:
+- Set IGNORE_TKR(TK) on assumed-rank dummies (but TYPE(*) is better when
+  possible).
+- Pass an assumed-rank to an IGNORE_TKR(R) dummy that is not passed
+  by descriptor (explicit shape and assumed-size). Note that copy-in and
+  copy-out will be performed for the dummy
+
+It is not possible to:
+- Set IGNORE_TKR(R) on an assumed-rank dummy.
+
+Example:
+
+```Fortran
+subroutine test(assumed_rank_actual)
+interface
+ subroutine assumed_size_dummy(x)
+    !dir$ ignore_tkr(tkr) x
+    integer :: x(*)
+ end subroutine
+ subroutine any_type_assumed_rank(x)
+    !dir$ ignore_tkr(tk) x
+    integer :: x(..)
+ end subroutine
+end interface
+  real :: assumed_rank_actual(..)
+  call assumed_size_dummy(assumed_rank_actual) !OK
+  call any_type_assumed_rank(assumed_rank_actual) !OK
+end subroutine
+```
+
+## Annex 3 - Test Plan
+
+MPI_f08 module makes usage of assumed-rank (see
+https://www.mpi-forum.org/docs/mpi-3.1/mpi31-report.pdf).  As such compiling
+MPI_f08 modules of MPI libraries and some applications making usage of MPI_f08
+will be a good test for the implementation of this feature.
diff --git a/flang/docs/index.md b/flang/docs/index.md
index 5c9b889..a619507 100644
--- a/flang/docs/index.md
+++ b/flang/docs/index.md
@@ -41,6 +41,7 @@ on how to get in touch with us and to learn more about the current status.
    Aliasing
    AliasingAnalysisFIR
    ArrayComposition
+   AssumedRank
    BijectiveInternalNameUniquing
    Calls
    Character
-- 
cgit v1.1


From a66085c84c829f1685a3d73f7f7ffa2ad1e9f81f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 17 Nov 2023 10:11:48 +0000
Subject: [X86] vec_fabs.ll - sort tests into 128/256/512-bit vector types

---
 llvm/test/CodeGen/X86/vec_fabs.ll | 171 ++++++++++++++++++++------------------
 1 file changed, 91 insertions(+), 80 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll
index 8af067d..cc10689 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -10,6 +10,10 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512FP16
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VLDQ
 
+;
+; 128-bit Vectors
+;
+
 define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X86-AVX-LABEL: fabs_v2f64:
 ; X86-AVX:       # %bb.0:
@@ -92,6 +96,49 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 }
 declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
 
+define <8 x half> @fabs_v8f16(ptr %p) {
+; X86-AVX1-LABEL: fabs_v8f16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    movl 4(%esp), [[ADDRREG:%.*]]
+; X86-AVX1-NEXT:    vmovaps ([[ADDRREG]]), %xmm0
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    retl
+
+; X86-AVX2-LABEL: fabs_v8f16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    movl 4(%esp), [[REG:%.*]]
+; X86-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-AVX2-NEXT:    vpand ([[REG]]), %xmm0, %xmm0
+; X86-AVX2-NEXT:    retl
+
+; X64-AVX512VL-LABEL: fabs_v8f16:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-AVX512VL-NEXT:    vpand (%rdi), %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    retq
+
+; X64-AVX1-LABEL: fabs_v8f16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    retq
+
+; X64-AVX2-LABEL: fabs_v8f16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
+
+  %v = load <8 x half>, ptr %p, align 16
+  %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
+  ret <8 x half> %nnv
+}
+declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
+
+;
+; 256-bit Vectors
+;
+
 define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ; X86-AVX1-LABEL: fabs_v4f64:
 ; X86-AVX1:       # %bb.0:
@@ -139,86 +186,6 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 }
 declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p)
 
-define <8 x half> @fabs_v8f16(ptr %p) {
-; X86-AVX1-LABEL: fabs_v8f16:
-; X86-AVX1:       # %bb.0:
-; X86-AVX1-NEXT:    movl 4(%esp), [[ADDRREG:%.*]]
-; X86-AVX1-NEXT:    vmovaps ([[ADDRREG]]), %xmm0
-; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
-; X86-AVX1-NEXT:    retl
-
-; X86-AVX2-LABEL: fabs_v8f16:
-; X86-AVX2:       # %bb.0:
-; X86-AVX2-NEXT:    movl 4(%esp), [[REG:%.*]]
-; X86-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-AVX2-NEXT:    vpand ([[REG]]), %xmm0, %xmm0
-; X86-AVX2-NEXT:    retl
-
-; X64-AVX512VL-LABEL: fabs_v8f16:
-; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-AVX512VL-NEXT:    vpand (%rdi), %xmm0, %xmm0
-; X64-AVX512VL-NEXT:    retq
-
-; X64-AVX1-LABEL: fabs_v8f16:
-; X64-AVX1:       # %bb.0:
-; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0
-; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; X64-AVX1-NEXT:    retq
-
-; X64-AVX2-LABEL: fabs_v8f16:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
-; X64-AVX2-NEXT:    retq
-
-  %v = load <8 x half>, ptr %p, align 16
-  %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
-  ret <8 x half> %nnv
-}
-declare <8 x half> @llvm.fabs.v8f16(<8 x half> %p)
-
-define <16 x half> @fabs_v16f16(ptr %p) {
-; X86-AVX512FP16-LABEL: fabs_v16f16:
-; X86-AVX512FP16:       # %bb.0:
-; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
-; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]]
-; X86-AVX512FP16-NEXT:  vpand ([[REG]]), [[YMM]], [[YMM]]
-; X86-AVX512FP16-NEXT:  retl
-
-; X64-AVX512FP16-LABEL: fabs_v16f16:
-; X64-AVX512FP16:       # %bb.0:
-; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]]
-; X64-AVX512FP16-NEXT:  vpand (%rdi), [[YMM]], [[YMM]]
-; X64-AVX512FP16-NEXT:  retq
-; 
-  %v = load <16 x half>, ptr %p, align 32
-  %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
-  ret <16 x half> %nnv
-}
-declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p)
-
-define <32 x half> @fabs_v32f16(ptr %p) {
-; X86-AVX512FP16-LABEL: fabs_v32f16:
-; X86-AVX512FP16:       # %bb.0:
-; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
-; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]]
-; X86-AVX512FP16-NEXT:  vpandq ([[REG]]), [[ZMM]], [[ZMM]]
-; X86-AVX512FP16-NEXT:  retl
-
-; X64-AVX512FP16-LABEL: fabs_v32f16:
-; X64-AVX512FP16:       # %bb.0:
-; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]]
-; X64-AVX512FP16-NEXT:  vpandq (%rdi), [[ZMM]], [[ZMM]]
-; X64-AVX512FP16-NEXT:  retq
-
-  %v = load <32 x half>, ptr %p, align 64
-  %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
-  ret <32 x half> %nnv
-}
-declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p)
-
-
 define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ; X86-AVX1-LABEL: fabs_v8f32:
 ; X86-AVX1:       # %bb.0:
@@ -266,6 +233,30 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 }
 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
 
+define <16 x half> @fabs_v16f16(ptr %p) {
+; X86-AVX512FP16-LABEL: fabs_v16f16:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
+; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]]
+; X86-AVX512FP16-NEXT:  vpand ([[REG]]), [[YMM]], [[YMM]]
+; X86-AVX512FP16-NEXT:  retl
+
+; X64-AVX512FP16-LABEL: fabs_v16f16:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]]
+; X64-AVX512FP16-NEXT:  vpand (%rdi), [[YMM]], [[YMM]]
+; X64-AVX512FP16-NEXT:  retq
+; 
+  %v = load <16 x half>, ptr %p, align 32
+  %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
+  ret <16 x half> %nnv
+}
+declare <16 x half> @llvm.fabs.v16f16(<16 x half> %p)
+
+;
+; 512-bit Vectors
+;
+
 define <8 x double> @fabs_v8f64(<8 x double> %p) {
 ; X86-AVX-LABEL: fabs_v8f64:
 ; X86-AVX:       # %bb.0:
@@ -344,6 +335,26 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
 }
 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
 
+define <32 x half> @fabs_v32f16(ptr %p) {
+; X86-AVX512FP16-LABEL: fabs_v32f16:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
+; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]]
+; X86-AVX512FP16-NEXT:  vpandq ([[REG]]), [[ZMM]], [[ZMM]]
+; X86-AVX512FP16-NEXT:  retl
+
+; X64-AVX512FP16-LABEL: fabs_v32f16:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]]
+; X64-AVX512FP16-NEXT:  vpandq (%rdi), [[ZMM]], [[ZMM]]
+; X64-AVX512FP16-NEXT:  retq
+
+  %v = load <32 x half>, ptr %p, align 64
+  %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
+  ret <32 x half> %nnv
+}
+declare <32 x half> @llvm.fabs.v32f16(<32 x half> %p)
+
 ; PR20354: when generating code for a vector fabs op,
 ; make sure that we're only turning off the sign bit of each float value.
 ; No constant pool loads or vector ops are needed for the fabs of a
-- 
cgit v1.1


From 0b0440613f94cced3b047f0105a7770b07662e20 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 17 Nov 2023 10:31:09 +0000
Subject: [X86] vec_fabs.ll - regenerate checks and add common AVX512 prefixes

---
 llvm/test/CodeGen/X86/vec_fabs.ll | 4251 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 4169 insertions(+), 82 deletions(-)

diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll
index cc10689..f691cb7 100644
--- a/llvm/test/CodeGen/X86/vec_fabs.ll
+++ b/llvm/test/CodeGen/X86/vec_fabs.ll
@@ -1,14 +1,14 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX1
 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86,X86-AVX,X86-AVX2
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VL
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX512FP16
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512VLDQ
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VL
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512FP16
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X86,X86-AVX512,X86-AVX512VLDQ
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64,X64-AVX,X64-AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VL
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512FP16
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512VLDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512fp16 | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512FP16
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64,X64-AVX512,X64-AVX512VLDQ
 
 ;
 ; 128-bit Vectors
@@ -25,6 +25,11 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v2f64:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v2f64:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0
@@ -40,6 +45,11 @@ define <2 x double> @fabs_v2f64(<2 x double> %p) {
 ; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v2f64:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v2f64:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0
@@ -66,6 +76,11 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v4f32:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v4f32:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0
@@ -87,6 +102,11 @@ define <4 x float> @fabs_v4f32(<4 x float> %p) {
 ; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v4f32:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v4f32:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
@@ -99,36 +119,42 @@ declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p)
 define <8 x half> @fabs_v8f16(ptr %p) {
 ; X86-AVX1-LABEL: fabs_v8f16:
 ; X86-AVX1:       # %bb.0:
-; X86-AVX1-NEXT:    movl 4(%esp), [[ADDRREG:%.*]]
-; X86-AVX1-NEXT:    vmovaps ([[ADDRREG]]), %xmm0
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX1-NEXT:    vmovaps (%eax), %xmm0
 ; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
 ; X86-AVX1-NEXT:    retl
-
+;
 ; X86-AVX2-LABEL: fabs_v8f16:
 ; X86-AVX2:       # %bb.0:
-; X86-AVX2-NEXT:    movl 4(%esp), [[REG:%.*]]
-; X86-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; X86-AVX2-NEXT:    vpand ([[REG]]), %xmm0, %xmm0
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX2-NEXT:    vpand (%eax), %xmm0, %xmm0
 ; X86-AVX2-NEXT:    retl
-
-; X64-AVX512VL-LABEL: fabs_v8f16:
-; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; X64-AVX512VL-NEXT:    vpand (%rdi), %xmm0, %xmm0
-; X64-AVX512VL-NEXT:    retq
-
+;
+; X86-AVX512-LABEL: fabs_v8f16:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX512-NEXT:    vpand (%eax), %xmm0, %xmm0
+; X86-AVX512-NEXT:    retl
+;
 ; X64-AVX1-LABEL: fabs_v8f16:
 ; X64-AVX1:       # %bb.0:
 ; X64-AVX1-NEXT:    vmovaps (%rdi), %xmm0
 ; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
 ; X64-AVX1-NEXT:    retq
-
+;
 ; X64-AVX2-LABEL: fabs_v8f16:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; X64-AVX2-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
 ; X64-AVX2-NEXT:    vpand (%rdi), %xmm0, %xmm0
 ; X64-AVX2-NEXT:    retq
-
+;
+; X64-AVX512-LABEL: fabs_v8f16:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X64-AVX512-NEXT:    vpand (%rdi), %xmm0, %xmm0
+; X64-AVX512-NEXT:    retq
   %v = load <8 x half>, ptr %p, align 16
   %nnv = call <8 x half> @llvm.fabs.v8f16(<8 x half> %v)
   ret <8 x half> %nnv
@@ -156,6 +182,11 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v4f64:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v4f64:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0
@@ -177,6 +208,11 @@ define <4 x double> @fabs_v4f64(<4 x double> %p) {
 ; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v4f64:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v4f64:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0
@@ -203,6 +239,11 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v8f32:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v8f32:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0
@@ -224,6 +265,11 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 ; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v8f32:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v8f32:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0
@@ -234,19 +280,1377 @@ define <8 x float> @fabs_v8f32(<8 x float> %p) {
 declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p)
 
 define <16 x half> @fabs_v16f16(ptr %p) {
+; X86-AVX1-LABEL: fabs_v16f16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    pushl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    subl $308, %esp # imm = 0x134
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 316
+; X86-AVX1-NEXT:    .cfi_offset %esi, -8
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX1-NEXT:    vmovdqa (%esi), %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovaps 16(%esi), %xmm1
+; X86-AVX1-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 4(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 8(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 12(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 20(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 24(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 28(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X86-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    addl $308, %esp # imm = 0x134
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: fabs_v16f16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    pushl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    subl $372, %esp # imm = 0x174
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 380
+; X86-AVX2-NEXT:    .cfi_offset %esi, -8
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX2-NEXT:    vmovdqa (%esi), %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovaps 16(%esi), %xmm1
+; X86-AVX2-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X86-AVX2-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 4(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 20(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 8(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 24(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 12(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 28(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vpand {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X86-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X86-AVX2-NEXT:    addl $372, %esp # imm = 0x174
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512VL-LABEL: fabs_v16f16:
+; X86-AVX512VL:       # %bb.0:
+; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VL-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm3
+; X86-AVX512VL-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VL-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm4, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-AVX512VL-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X86-AVX512VL-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VL-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X86-AVX512VL-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VL-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X86-AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X86-AVX512VL-NEXT:    retl
+;
 ; X86-AVX512FP16-LABEL: fabs_v16f16:
 ; X86-AVX512FP16:       # %bb.0:
-; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
-; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[YMM:%ymm[0-9]+]]
-; X86-AVX512FP16-NEXT:  vpand ([[REG]]), [[YMM]], [[YMM]]
-; X86-AVX512FP16-NEXT:  retl
-
+; X86-AVX512FP16-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX512FP16-NEXT:    vpand (%eax), %ymm0, %ymm0
+; X86-AVX512FP16-NEXT:    retl
+;
+; X86-AVX512VLDQ-LABEL: fabs_v16f16:
+; X86-AVX512VLDQ:       # %bb.0:
+; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VLDQ-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm3
+; X86-AVX512VLDQ-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X86-AVX512VLDQ-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X86-AVX512VLDQ-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X86-AVX512VLDQ-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X86-AVX512VLDQ-NEXT:    retl
+;
+; X64-AVX1-LABEL: fabs_v16f16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    subq $80, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 96
+; X64-AVX1-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX1-NEXT:    movq %rdi, %rbx
+; X64-AVX1-NEXT:    vbroadcastss 28(%rdi), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa 16(%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 24(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 20(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 12(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 8(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 4(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    addq $80, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    popq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: fabs_v16f16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    subq $128, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 144
+; X64-AVX2-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX2-NEXT:    movq %rdi, %rbx
+; X64-AVX2-NEXT:    vpinsrw $0, 28(%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X64-AVX2-NEXT:    vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa 16(%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 12(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 24(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 8(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 20(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 4(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X64-AVX2-NEXT:    addq $128, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    popq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512VL-LABEL: fabs_v16f16:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VL-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X64-AVX512VL-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X64-AVX512VL-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VL-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X64-AVX512VL-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VL-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VL-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X64-AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X64-AVX512VL-NEXT:    retq
+;
 ; X64-AVX512FP16-LABEL: fabs_v16f16:
 ; X64-AVX512FP16:       # %bb.0:
-; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[YMM:%ymm[0-9]+]]
-; X64-AVX512FP16-NEXT:  vpand (%rdi), [[YMM]], [[YMM]]
-; X64-AVX512FP16-NEXT:  retq
-; 
+; X64-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X64-AVX512FP16-NEXT:    vpand (%rdi), %ymm0, %ymm0
+; X64-AVX512FP16-NEXT:    retq
+;
+; X64-AVX512VLDQ-LABEL: fabs_v16f16:
+; X64-AVX512VLDQ:       # %bb.0:
+; X64-AVX512VLDQ-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VLDQ-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm4 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm3, %ymm4, %ymm3
+; X64-AVX512VLDQ-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm3 = ymm4[0],ymm3[0],ymm4[1],ymm3[1],ymm4[4],ymm3[4],ymm4[5],ymm3[5]
+; X64-AVX512VLDQ-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm1, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm5, %ymm4
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3]
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm5, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm5[0],xmm0[0],xmm5[1],xmm0[1],xmm5[2],xmm0[2],xmm5[3],xmm0[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5]
+; X64-AVX512VLDQ-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm3[0],ymm0[2],ymm3[2]
+; X64-AVX512VLDQ-NEXT:    retq
   %v = load <16 x half>, ptr %p, align 32
   %nnv = call <16 x half> @llvm.fabs.v16f16(<16 x half> %v)
   ret <16 x half> %nnv
@@ -270,6 +1674,11 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
 ; X86-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v8f64:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v8f64:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0
@@ -287,6 +1696,11 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) {
 ; X64-AVX512VL-NEXT:    vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v8f64:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v8f64:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
@@ -309,6 +1723,11 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
 ; X86-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
 ; X86-AVX512VL-NEXT:    retl
 ;
+; X86-AVX512FP16-LABEL: fabs_v16f32:
+; X86-AVX512FP16:       # %bb.0:
+; X86-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
+; X86-AVX512FP16-NEXT:    retl
+;
 ; X86-AVX512VLDQ-LABEL: fabs_v16f32:
 ; X86-AVX512VLDQ:       # %bb.0:
 ; X86-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0
@@ -326,6 +1745,11 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
 ; X64-AVX512VL-NEXT:    vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
 ; X64-AVX512VL-NEXT:    retq
 ;
+; X64-AVX512FP16-LABEL: fabs_v16f32:
+; X64-AVX512FP16:       # %bb.0:
+; X64-AVX512FP16-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
+; X64-AVX512FP16-NEXT:    retq
+;
 ; X64-AVX512VLDQ-LABEL: fabs_v16f32:
 ; X64-AVX512VLDQ:       # %bb.0:
 ; X64-AVX512VLDQ-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0
@@ -336,19 +1760,2704 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) {
 declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p)
 
 define <32 x half> @fabs_v32f16(ptr %p) {
+; X86-AVX1-LABEL: fabs_v32f16:
+; X86-AVX1:       # %bb.0:
+; X86-AVX1-NEXT:    pushl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    subl $644, %esp # imm = 0x284
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 652
+; X86-AVX1-NEXT:    .cfi_offset %esi, -8
+; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX1-NEXT:    vmovdqa 32(%esi), %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 36(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 40(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 44(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqa 48(%esi), %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 52(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 56(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 60(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqa (%esi), %xmm1
+; X86-AVX1-NEXT:    vmovdqu %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovaps 16(%esi), %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vpsrld $16, %xmm1, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 4(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 8(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 12(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 20(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 24(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vbroadcastss 28(%esi), %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __extendhfsf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX1-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm3 = xmm3[0],mem[0],xmm3[1],mem[1],xmm3[2],mem[2],xmm3[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
+; X86-AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX1-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX1-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX1-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX1-NEXT:    vzeroupper
+; X86-AVX1-NEXT:    calll __truncsfhf2
+; X86-AVX1-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X86-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X86-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; X86-AVX1-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
+; X86-AVX1-NEXT:    addl $644, %esp # imm = 0x284
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX1-NEXT:    popl %esi
+; X86-AVX1-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX1-NEXT:    retl
+;
+; X86-AVX2-LABEL: fabs_v32f16:
+; X86-AVX2:       # %bb.0:
+; X86-AVX2-NEXT:    pushl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    subl $708, %esp # imm = 0x2C4
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 716
+; X86-AVX2-NEXT:    .cfi_offset %esi, -8
+; X86-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-AVX2-NEXT:    vmovdqa 32(%esi), %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqa 48(%esi), %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X86-AVX2-NEXT:    vmovups %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 36(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 52(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 40(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 56(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 44(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 60(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqa (%esi), %xmm1
+; X86-AVX2-NEXT:    vmovdqu %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovaps 16(%esi), %xmm0
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vpsrld $16, %xmm1, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 4(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 20(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 8(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 24(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 12(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovd %xmm0, (%esp)
+; X86-AVX2-NEXT:    vpinsrw $0, 28(%esi), %xmm0, %xmm0
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X86-AVX2-NEXT:    vpextrw $0, %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vandps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __extendhfsf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm0, (%esp)
+; X86-AVX2-NEXT:    fstps {{[0-9]+}}(%esp)
+; X86-AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; X86-AVX2-NEXT:    vpand {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vmovss %xmm1, (%esp)
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm3, %xmm3 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm3 = xmm3[0],mem[0],xmm3[1],mem[1],xmm3[2],mem[2],xmm3[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm2, %ymm3, %ymm2
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm1 = ymm2[0],ymm1[0],ymm2[1],ymm1[1],ymm2[4],ymm1[4],ymm2[5],ymm1[5]
+; X86-AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm2, %xmm2 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm2 = xmm2[0],mem[0],xmm2[1],mem[1],xmm2[2],mem[2],xmm2[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm1
+; X86-AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm1, %xmm1 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm1 = xmm1[0],mem[0],xmm1[1],mem[1],xmm1[2],mem[2],xmm1[3],mem[3]
+; X86-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
+; X86-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%e{{[sb]}}p) # 32-byte Spill
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
+; X86-AVX2-NEXT:    vmovdqu %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX2-NEXT:    vzeroupper
+; X86-AVX2-NEXT:    calll __truncsfhf2
+; X86-AVX2-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Reload
+; X86-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X86-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X86-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %ymm0, %ymm1 # 32-byte Folded Reload
+; X86-AVX2-NEXT:    # ymm1 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X86-AVX2-NEXT:    vmovups {{[-0-9]+}}(%e{{[sb]}}p), %ymm0 # 32-byte Reload
+; X86-AVX2-NEXT:    addl $708, %esp # imm = 0x2C4
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X86-AVX2-NEXT:    popl %esi
+; X86-AVX2-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX2-NEXT:    retl
+;
+; X86-AVX512VL-LABEL: fabs_v32f16:
+; X86-AVX512VL:       # %bb.0:
+; X86-AVX512VL-NEXT:    subl $128, %esp
+; X86-AVX512VL-NEXT:    .cfi_def_cfa_offset 132
+; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VL-NEXT:    movzwl 60(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vmovdqa 48(%eax), %xmm3
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VL-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VL-NEXT:    movzwl 44(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vmovdqa 32(%eax), %xmm3
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VL-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm5
+; X86-AVX512VL-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VL-NEXT:    vmovdqu %xmm1, (%esp) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vmovdqu %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm7, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X86-AVX512VL-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm7, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm7
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VL-NEXT:    vmovd %xmm7, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm1
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; X86-AVX512VL-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VL-NEXT:    movzwl 56(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VL-NEXT:    movzwl 40(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VL-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VL-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vmovdqu (%esp), %xmm2 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm1
+; X86-AVX512VL-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %zmm1, %zmm1 # 64-byte Folded Reload
+; X86-AVX512VL-NEXT:    # zmm1 = zmm1[0],mem[0],zmm1[1],mem[1],zmm1[4],mem[4],zmm1[5],mem[5],zmm1[8],mem[8],zmm1[9],mem[9],zmm1[12],mem[12],zmm1[13],mem[13]
+; X86-AVX512VL-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VL-NEXT:    movzwl 52(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm3, %xmm6
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VL-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VL-NEXT:    movzwl 36(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm4, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VL-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpsrlq $48, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VL-NEXT:    movzwl %dx, %edx
+; X86-AVX512VL-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VL-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm6
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm3, %xmm4
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; X86-AVX512VL-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm3
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm3
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X86-AVX512VL-NEXT:    vmovd %xmm3, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm3
+; X86-AVX512VL-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VL-NEXT:    vmovdqu (%esp), %xmm4 # 16-byte Reload
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VL-NEXT:    movzwl %ax, %eax
+; X86-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VL-NEXT:    vpsrld $16, %xmm4, %xmm1
+; X86-AVX512VL-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VL-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VL-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X86-AVX512VL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X86-AVX512VL-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
+; X86-AVX512VL-NEXT:    # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; X86-AVX512VL-NEXT:    addl $128, %esp
+; X86-AVX512VL-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX512VL-NEXT:    retl
+;
 ; X86-AVX512FP16-LABEL: fabs_v32f16:
 ; X86-AVX512FP16:       # %bb.0:
-; X86-AVX512FP16-NEXT:  movl 4(%esp), [[REG:%.*]]
-; X86-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}, [[ZMM:%zmm[0-9]+]]
-; X86-AVX512FP16-NEXT:  vpandq ([[REG]]), [[ZMM]], [[ZMM]]
-; X86-AVX512FP16-NEXT:  retl
-
+; X86-AVX512FP16-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X86-AVX512FP16-NEXT:    vpandq (%eax), %zmm0, %zmm0
+; X86-AVX512FP16-NEXT:    retl
+;
+; X86-AVX512VLDQ-LABEL: fabs_v32f16:
+; X86-AVX512VLDQ:       # %bb.0:
+; X86-AVX512VLDQ-NEXT:    subl $128, %esp
+; X86-AVX512VLDQ-NEXT:    .cfi_def_cfa_offset 132
+; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512VLDQ-NEXT:    movzwl 60(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm0
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqa 48(%eax), %xmm3
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm2 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 44(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovdqa 32(%eax), %xmm3
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm5, %xmm5
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm5, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1],xmm2[2],xmm5[2],xmm2[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm1, %ymm2, %ymm5
+; X86-AVX512VLDQ-NEXT:    movzwl 28(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovdqa (%eax), %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm1, (%esp) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vmovdqa 16(%eax), %xmm2
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 12(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm7, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm7, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[1],xmm1[1],xmm7[2],xmm1[2],xmm7[3],xmm1[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm1
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm1, %zmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VLDQ-NEXT:    movzwl 56(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 40(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vmovdqa %xmm3, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovdqu %xmm3, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VLDQ-NEXT:    movzwl 24(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 8(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovdqu (%esp), %xmm2 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm1 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm1
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{[-0-9]+}}(%e{{[sb]}}p), %zmm1, %zmm1 # 64-byte Folded Reload
+; X86-AVX512VLDQ-NEXT:    # zmm1 = zmm1[0],mem[0],zmm1[1],mem[1],zmm1[4],mem[4],zmm1[5],mem[5],zmm1[8],mem[8],zmm1[9],mem[9],zmm1[12],mem[12],zmm1[13],mem[13]
+; X86-AVX512VLDQ-NEXT:    vmovdqu64 %zmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 64-byte Spill
+; X86-AVX512VLDQ-NEXT:    movzwl 52(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm3 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm3, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm6, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm6
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 36(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm4, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm1, %ymm6
+; X86-AVX512VLDQ-NEXT:    movzwl 20(%eax), %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    movzwl %dx, %edx
+; X86-AVX512VLDQ-NEXT:    vmovd %edx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %edx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %edx, %xmm0, %xmm7
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm1[0],xmm7[0],xmm1[1],xmm7[1],xmm1[2],xmm7[2],xmm1[3],xmm7[3]
+; X86-AVX512VLDQ-NEXT:    movzwl 4(%eax), %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm5
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1],xmm1[2],xmm5[2],xmm1[3],xmm5[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm1, %ymm1
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm1, %zmm6
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm3, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
+; X86-AVX512VLDQ-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm3
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm3
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm3, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm1, %ymm3
+; X86-AVX512VLDQ-NEXT:    vmovdqu {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3]
+; X86-AVX512VLDQ-NEXT:    vmovdqu (%esp), %xmm4 # 16-byte Reload
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X86-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X86-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X86-AVX512VLDQ-NEXT:    vpsrld $16, %xmm4, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %ecx
+; X86-AVX512VLDQ-NEXT:    movzwl %cx, %ecx
+; X86-AVX512VLDQ-NEXT:    vmovd %ecx, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X86-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm1
+; X86-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X86-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X86-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X86-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X86-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X86-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X86-AVX512VLDQ-NEXT:    vpunpcklqdq {{[-0-9]+}}(%e{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
+; X86-AVX512VLDQ-NEXT:    # zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
+; X86-AVX512VLDQ-NEXT:    addl $128, %esp
+; X86-AVX512VLDQ-NEXT:    .cfi_def_cfa_offset 4
+; X86-AVX512VLDQ-NEXT:    retl
+;
+; X64-AVX1-LABEL: fabs_v32f16:
+; X64-AVX1:       # %bb.0:
+; X64-AVX1-NEXT:    pushq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    subq $128, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 144
+; X64-AVX1-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX1-NEXT:    movq %rdi, %rbx
+; X64-AVX1-NEXT:    vbroadcastss 28(%rdi), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa 16(%rbx), %xmm1
+; X64-AVX1-NEXT:    vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps 32(%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps 48(%rbx), %xmm0
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 24(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 20(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 12(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 8(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 4(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 60(%rbx), %xmm0
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 56(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 52(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 44(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 40(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vbroadcastss 36(%rbx), %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vmovdqa %xmm0, (%rsp) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX1-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX1-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX1-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX1-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX1-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX1-NEXT:    vpunpckldq (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
+; X64-AVX1-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    # xmm0 = xmm0[0],mem[0]
+; X64-AVX1-NEXT:    vinsertf128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 16-byte Folded Reload
+; X64-AVX1-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; X64-AVX1-NEXT:    addq $128, %rsp
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX1-NEXT:    popq %rbx
+; X64-AVX1-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: fabs_v32f16:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    pushq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    subq $192, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 208
+; X64-AVX2-NEXT:    .cfi_offset %rbx, -16
+; X64-AVX2-NEXT:    movq %rdi, %rbx
+; X64-AVX2-NEXT:    vpinsrw $0, 28(%rdi), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpbroadcastd {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; X64-AVX2-NEXT:    vmovdqa %xmm1, (%rsp) # 16-byte Spill
+; X64-AVX2-NEXT:    vpand %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps (%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa 16(%rbx), %xmm1
+; X64-AVX2-NEXT:    vmovdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps 32(%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps 48(%rbx), %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 12(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 24(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 8(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 20(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 4(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 60(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 44(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 56(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 40(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrldq {{.*#+}} xmm0 = xmm0[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 52(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vpinsrw $0, 36(%rbx), %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrlq $48, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vmovdqu %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vandps (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-AVX2-NEXT:    vpsrld $16, %xmm0, %xmm0
+; X64-AVX2-NEXT:    callq __extendhfsf2@PLT
+; X64-AVX2-NEXT:    vpand (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    callq __truncsfhf2@PLT
+; X64-AVX2-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-AVX2-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
+; X64-AVX2-NEXT:    vinserti128 $1, {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 16-byte Folded Reload
+; X64-AVX2-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm0 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
+; X64-AVX2-NEXT:    vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %ymm0, %ymm1 # 32-byte Folded Reload
+; X64-AVX2-NEXT:    # ymm1 = ymm0[0],mem[0],ymm0[2],mem[2]
+; X64-AVX2-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
+; X64-AVX2-NEXT:    addq $192, %rsp
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:    popq %rbx
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512VL-LABEL: fabs_v32f16:
+; X64-AVX512VL:       # %bb.0:
+; X64-AVX512VL-NEXT:    movzwl 60(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VL-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VL-NEXT:    vmovdqa 32(%rdi), %xmm3
+; X64-AVX512VL-NEXT:    vmovdqa 48(%rdi), %xmm4
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VL-NEXT:    movzwl 44(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm5, %ymm6, %ymm5
+; X64-AVX512VL-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; X64-AVX512VL-NEXT:    movzwl 56(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    movzwl 40(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VL-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpsrldq {{.*#+}} xmm9 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} zmm5 = zmm6[0],zmm5[0],zmm6[1],zmm5[1],zmm6[4],zmm5[4],zmm6[5],zmm5[5],zmm6[8],zmm5[8],zmm6[9],zmm5[9],zmm6[12],zmm5[12],zmm6[13],zmm5[13]
+; X64-AVX512VL-NEXT:    movzwl 52(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VL-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm4, %xmm7
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VL-NEXT:    movzwl 36(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm3, %xmm8
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VL-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm2, %xmm8
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VL-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VL-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VL-NEXT:    vpsrlq $48, %xmm1, %xmm9
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VL-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VL-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm3
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X64-AVX512VL-NEXT:    vmovd %xmm3, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VL-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VL-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VL-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VL-NEXT:    movzwl %ax, %eax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VL-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VL-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X64-AVX512VL-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X64-AVX512VL-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X64-AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm5[0],zmm0[2],zmm5[2],zmm0[4],zmm5[4],zmm0[6],zmm5[6]
+; X64-AVX512VL-NEXT:    retq
+;
 ; X64-AVX512FP16-LABEL: fabs_v32f16:
 ; X64-AVX512FP16:       # %bb.0:
-; X64-AVX512FP16-NEXT:  vpbroadcastw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), [[ZMM:%zmm[0-9]+]]
-; X64-AVX512FP16-NEXT:  vpandq (%rdi), [[ZMM]], [[ZMM]]
-; X64-AVX512FP16-NEXT:  retq
-
+; X64-AVX512FP16-NEXT:    vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
+; X64-AVX512FP16-NEXT:    vpandq (%rdi), %zmm0, %zmm0
+; X64-AVX512FP16-NEXT:    retq
+;
+; X64-AVX512VLDQ-LABEL: fabs_v32f16:
+; X64-AVX512VLDQ:       # %bb.0:
+; X64-AVX512VLDQ-NEXT:    movzwl 60(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm0, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm5
+; X64-AVX512VLDQ-NEXT:    vmovdqa (%rdi), %xmm1
+; X64-AVX512VLDQ-NEXT:    vmovdqa 16(%rdi), %xmm2
+; X64-AVX512VLDQ-NEXT:    vmovdqa 32(%rdi), %xmm3
+; X64-AVX512VLDQ-NEXT:    vmovdqa 48(%rdi), %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm6 = xmm4[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1],xmm5[2],xmm6[2],xmm5[3],xmm6[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 44(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm3[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm5, %ymm6, %ymm5
+; X64-AVX512VLDQ-NEXT:    movzwl 28(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm2[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 12(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm5, %zmm6, %zmm5
+; X64-AVX512VLDQ-NEXT:    movzwl 56(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm7 = xmm4[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 40(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm3[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VLDQ-NEXT:    movzwl 24(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm8 = xmm2[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 8(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpsrldq {{.*#+}} xmm9 = xmm1[10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} zmm5 = zmm6[0],zmm5[0],zmm6[1],zmm5[1],zmm6[4],zmm5[4],zmm6[5],zmm5[5],zmm6[8],zmm5[8],zmm6[9],zmm5[9],zmm6[12],zmm5[12],zmm6[13],zmm5[13]
+; X64-AVX512VLDQ-NEXT:    movzwl 52(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm6, %xmm6
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm6, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm6
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm4, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 36(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm3, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm6, %ymm7, %ymm6
+; X64-AVX512VLDQ-NEXT:    movzwl 20(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm2, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm7 = xmm7[0],xmm8[0],xmm7[1],xmm8[1],xmm7[2],xmm8[2],xmm7[3],xmm8[3]
+; X64-AVX512VLDQ-NEXT:    movzwl 4(%rdi), %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm8, %xmm8
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm8, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm8
+; X64-AVX512VLDQ-NEXT:    vpsrlq $48, %xmm1, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm9, %xmm9
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm9, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm9
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm9[0],xmm8[1],xmm9[1],xmm8[2],xmm9[2],xmm8[3],xmm9[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm7, %ymm8, %ymm7
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm6, %zmm7, %zmm6
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm4 = xmm7[0],xmm4[0],xmm7[1],xmm4[1],xmm7[2],xmm4[2],xmm7[3],xmm4[3]
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm7, %xmm7
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm7, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm7
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm3, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm3
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm3, %xmm3
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm3, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm3
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm3 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm2, %xmm2
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm2, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm2
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm2 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3]
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm4, %xmm4
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm4, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm4
+; X64-AVX512VLDQ-NEXT:    vpsrld $16, %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpextrw $0, %xmm1, %eax
+; X64-AVX512VLDQ-NEXT:    movzwl %ax, %eax
+; X64-AVX512VLDQ-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VLDQ-NEXT:    vcvtph2ps %xmm1, %xmm1
+; X64-AVX512VLDQ-NEXT:    vpand %xmm0, %xmm1, %xmm0
+; X64-AVX512VLDQ-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512VLDQ-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
+; X64-AVX512VLDQ-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3]
+; X64-AVX512VLDQ-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm0
+; X64-AVX512VLDQ-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm0
+; X64-AVX512VLDQ-NEXT:    vpunpckldq {{.*#+}} zmm0 = zmm0[0],zmm6[0],zmm0[1],zmm6[1],zmm0[4],zmm6[4],zmm0[5],zmm6[5],zmm0[8],zmm6[8],zmm0[9],zmm6[9],zmm0[12],zmm6[12],zmm0[13],zmm6[13]
+; X64-AVX512VLDQ-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm5[0],zmm0[2],zmm5[2],zmm0[4],zmm5[4],zmm0[6],zmm5[6]
+; X64-AVX512VLDQ-NEXT:    retq
   %v = load <32 x half>, ptr %p, align 64
   %nnv = call <32 x half> @llvm.fabs.v32f16(<32 x half> %v)
   ret <32 x half> %nnv
@@ -433,29 +4542,17 @@ define void @PR70947(ptr %src, ptr %dst) {
 ; X86-AVX2-NEXT:    vzeroupper
 ; X86-AVX2-NEXT:    retl
 ;
-; X86-AVX512VL-LABEL: PR70947:
-; X86-AVX512VL:       # %bb.0:
-; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX512VL-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X86-AVX512VL-NEXT:    vandps (%ecx), %ymm0, %ymm1
-; X86-AVX512VL-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
-; X86-AVX512VL-NEXT:    vmovups %ymm1, (%eax)
-; X86-AVX512VL-NEXT:    vmovups %xmm0, 16(%eax)
-; X86-AVX512VL-NEXT:    vzeroupper
-; X86-AVX512VL-NEXT:    retl
-;
-; X86-AVX512VLDQ-LABEL: PR70947:
-; X86-AVX512VLDQ:       # %bb.0:
-; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-AVX512VLDQ-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-AVX512VLDQ-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X86-AVX512VLDQ-NEXT:    vandps (%ecx), %ymm0, %ymm1
-; X86-AVX512VLDQ-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
-; X86-AVX512VLDQ-NEXT:    vmovups %ymm1, (%eax)
-; X86-AVX512VLDQ-NEXT:    vmovups %xmm0, 16(%eax)
-; X86-AVX512VLDQ-NEXT:    vzeroupper
-; X86-AVX512VLDQ-NEXT:    retl
+; X86-AVX512-LABEL: PR70947:
+; X86-AVX512:       # %bb.0:
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
+; X86-AVX512-NEXT:    vandps (%ecx), %ymm0, %ymm1
+; X86-AVX512-NEXT:    vandps 32(%ecx), %xmm0, %xmm0
+; X86-AVX512-NEXT:    vmovups %ymm1, (%eax)
+; X86-AVX512-NEXT:    vmovups %xmm0, 16(%eax)
+; X86-AVX512-NEXT:    vzeroupper
+; X86-AVX512-NEXT:    retl
 ;
 ; X64-AVX1-LABEL: PR70947:
 ; X64-AVX1:       # %bb.0:
@@ -478,25 +4575,15 @@ define void @PR70947(ptr %src, ptr %dst) {
 ; X64-AVX2-NEXT:    vzeroupper
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512VL-LABEL: PR70947:
-; X64-AVX512VL:       # %bb.0:
-; X64-AVX512VL-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X64-AVX512VL-NEXT:    vandps (%rdi), %ymm0, %ymm1
-; X64-AVX512VL-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
-; X64-AVX512VL-NEXT:    vmovups %ymm1, (%rsi)
-; X64-AVX512VL-NEXT:    vmovups %xmm0, 16(%rsi)
-; X64-AVX512VL-NEXT:    vzeroupper
-; X64-AVX512VL-NEXT:    retq
-;
-; X64-AVX512VLDQ-LABEL: PR70947:
-; X64-AVX512VLDQ:       # %bb.0:
-; X64-AVX512VLDQ-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
-; X64-AVX512VLDQ-NEXT:    vandps (%rdi), %ymm0, %ymm1
-; X64-AVX512VLDQ-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
-; X64-AVX512VLDQ-NEXT:    vmovups %ymm1, (%rsi)
-; X64-AVX512VLDQ-NEXT:    vmovups %xmm0, 16(%rsi)
-; X64-AVX512VLDQ-NEXT:    vzeroupper
-; X64-AVX512VLDQ-NEXT:    retq
+; X64-AVX512-LABEL: PR70947:
+; X64-AVX512:       # %bb.0:
+; X64-AVX512-NEXT:    vbroadcastsd {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN]
+; X64-AVX512-NEXT:    vandps (%rdi), %ymm0, %ymm1
+; X64-AVX512-NEXT:    vandps 32(%rdi), %xmm0, %xmm0
+; X64-AVX512-NEXT:    vmovups %ymm1, (%rsi)
+; X64-AVX512-NEXT:    vmovups %xmm0, 16(%rsi)
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %src4 = getelementptr inbounds double, ptr %src, i64 4
   %dst4 = getelementptr inbounds i32, ptr %dst, i64 4
   %ld0 = load <4 x double>, ptr %src, align 8
-- 
cgit v1.1


From 7aaa86b28ddc3deded6e357b27f2bbebb97a3864 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Fri, 17 Nov 2023 10:14:47 +0100
Subject: [include-cleaner] Add regression tests for outliving File&Source
 Manager

---
 .../include-cleaner/unittests/RecordTest.cpp       | 30 ++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
index dfefa66..0f2ded5 100644
--- a/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
+++ b/clang-tools-extra/include-cleaner/unittests/RecordTest.cpp
@@ -558,5 +558,35 @@ TEST_F(PragmaIncludeTest, ExportInUnnamedBuffer) {
       PI.getExporters(llvm::cantFail(FM->getFileRef("foo.h")), *FM),
       testing::ElementsAre(llvm::cantFail(FM->getFileRef("exporter.h"))));
 }
+
+TEST_F(PragmaIncludeTest, OutlivesFMAndSM) {
+  Inputs.Code = R"cpp(
+    #include "public.h"
+  )cpp";
+  Inputs.ExtraFiles["public.h"] = R"cpp(
+    #include "private.h"
+    #include "private2.h" // IWYU pragma: export
+  )cpp";
+  Inputs.ExtraFiles["private.h"] = R"cpp(
+    // IWYU pragma: private, include "public.h"
+  )cpp";
+  Inputs.ExtraFiles["private2.h"] = R"cpp(
+    // IWYU pragma: private
+  )cpp";
+  build(); // Fills up PI, file/source manager used is destroyed afterwards.
+  Inputs.MakeAction = nullptr; // Don't populate PI anymore.
+
+  // Now this build gives us a new File&Source Manager.
+  TestAST Processed = build();
+  auto &FM = Processed.fileManager();
+  auto PrivateFE = FM.getFile("private.h");
+  assert(PrivateFE);
+  EXPECT_EQ(PI.getPublic(PrivateFE.get()), "\"public.h\"");
+
+  auto Private2FE = FM.getFile("private2.h");
+  assert(Private2FE);
+  EXPECT_THAT(PI.getExporters(Private2FE.get(), FM),
+              testing::ElementsAre(llvm::cantFail(FM.getFileRef("public.h"))));
+}
 } // namespace
 } // namespace clang::include_cleaner
-- 
cgit v1.1


From a67b85ef63c7ec29c2076294e3f7c7f923144a53 Mon Sep 17 00:00:00 2001
From: Matheus Izvekov <mizvekov@gmail.com>
Date: Fri, 17 Nov 2023 11:39:20 +0100
Subject: Add llvm-dlltool to the toolchain list (#72563)

This adds dlltool to the list of tools which don't get excluded from
installation when LLVM_INSTALL_TOOLCHAIN_ONLY is set.

The most important effect here is that this tool will now be included in
the official Windows release.

While llvm-lib reuses the dlltool machinary internally and has many of
the same capabilities, it does not expose the functionality controller
by the '-k' flag, which is currently the only way to create import
libraries for i386 with stdcall symbols from a module definition alone.

We avoid changing llvm-lib tool, since it is designed to emulate LIB.EXE
from MSVC toolchain, and as this functionality is not supported there,
we would have had to introduce an LLVM extension flag in order to
support it.

See https://reviews.llvm.org/D36548 for reference on rationale for
dlltool '-k' flag.
---
 llvm/cmake/modules/AddLLVM.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 05e4d6c..0fa5215 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -1307,6 +1307,7 @@ if(NOT LLVM_TOOLCHAIN_TOOLS)
     llvm-ar
     llvm-cov
     llvm-cxxfilt
+    llvm-dlltool
     llvm-dwp
     llvm-ranlib
     llvm-lib
-- 
cgit v1.1


From ec42d547eba5c0ad0bddbecc8902d35383968e78 Mon Sep 17 00:00:00 2001
From: Jacek Caban <jacek@codeweavers.com>
Date: Fri, 17 Nov 2023 12:42:32 +0100
Subject: [lld][COFF][NFC] Factor out exception table sorting. (#72518)

This is a preparation for ARM64EC support, which needs to sort both ARM
and x86_64 tables separately.
---
 lld/COFF/Writer.cpp | 70 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index 0477c09..6dbfa8e 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -247,7 +247,8 @@ private:
   void writeBuildId();
   void writePEChecksum();
   void sortSections();
-  void sortExceptionTable();
+  template <typename T> void sortExceptionTable(Chunk *first, Chunk *last);
+  void sortExceptionTables();
   void sortCRTSectionChunks(std::vector<Chunk *> &chunks);
   void addSyntheticIdata();
   void sortBySectionOrder(std::vector<Chunk *> &chunks);
@@ -751,7 +752,7 @@ void Writer::run() {
     }
     writeSections();
     prepareLoadConfig();
-    sortExceptionTable();
+    sortExceptionTables();
 
     // Fix up the alignment in the TLS Directory's characteristic field,
     // if a specific alignment value is needed
@@ -2164,41 +2165,52 @@ void Writer::writeBuildId() {
 }
 
 // Sort .pdata section contents according to PE/COFF spec 5.5.
-void Writer::sortExceptionTable() {
-  if (!firstPdata)
-    return;
-  llvm::TimeTraceScope timeScope("Sort exception table");
+template <typename T>
+void Writer::sortExceptionTable(Chunk *first, Chunk *last) {
   // We assume .pdata contains function table entries only.
   auto bufAddr = [&](Chunk *c) {
     OutputSection *os = ctx.getOutputSection(c);
     return buffer->getBufferStart() + os->getFileOff() + c->getRVA() -
            os->getRVA();
   };
-  uint8_t *begin = bufAddr(firstPdata);
-  uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize();
-  if (ctx.config.machine == AMD64) {
-    struct Entry { ulittle32_t begin, end, unwind; };
-    if ((end - begin) % sizeof(Entry) != 0) {
-      fatal("unexpected .pdata size: " + Twine(end - begin) +
-            " is not a multiple of " + Twine(sizeof(Entry)));
-    }
-    parallelSort(
-        MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
-        [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
-    return;
+  uint8_t *begin = bufAddr(first);
+  uint8_t *end = bufAddr(last) + last->getSize();
+  if ((end - begin) % sizeof(T) != 0) {
+    fatal("unexpected .pdata size: " + Twine(end - begin) +
+          " is not a multiple of " + Twine(sizeof(T)));
   }
-  if (ctx.config.machine == ARMNT || ctx.config.machine == ARM64) {
-    struct Entry { ulittle32_t begin, unwind; };
-    if ((end - begin) % sizeof(Entry) != 0) {
-      fatal("unexpected .pdata size: " + Twine(end - begin) +
-            " is not a multiple of " + Twine(sizeof(Entry)));
-    }
-    parallelSort(
-        MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
-        [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
-    return;
+
+  parallelSort(MutableArrayRef<T>(reinterpret_cast<T *>(begin),
+                                  reinterpret_cast<T *>(end)),
+               [](const T &a, const T &b) { return a.begin < b.begin; });
+}
+
+// Sort .pdata section contents according to PE/COFF spec 5.5.
+void Writer::sortExceptionTables() {
+  llvm::TimeTraceScope timeScope("Sort exception table");
+
+  struct EntryX64 {
+    ulittle32_t begin, end, unwind;
+  };
+  struct EntryArm {
+    ulittle32_t begin, unwind;
+  };
+
+  switch (ctx.config.machine) {
+  case AMD64:
+    if (firstPdata)
+      sortExceptionTable<EntryX64>(firstPdata, lastPdata);
+    break;
+  case ARMNT:
+  case ARM64:
+    if (firstPdata)
+      sortExceptionTable<EntryArm>(firstPdata, lastPdata);
+    break;
+  default:
+    if (firstPdata)
+      lld::errs() << "warning: don't know how to handle .pdata.\n";
+    break;
   }
-  lld::errs() << "warning: don't know how to handle .pdata.\n";
 }
 
 // The CRT section contains, among other things, the array of function
-- 
cgit v1.1